add pipeline inference

乔峰昇
Showing 6 changed files with 90 additions and 26 deletions
bank_ocr_inference.py
export.py
inference.py
models/yolov5_config.py
pipeline.py
plot_sourece_labels.py
--- a/bank_ocr_inference.py 0 → 100644
View file @5e7dd86
+++ b/bank_ocr_inference.py 0 → 100644
View file @5e7dd86
--- a/export.py
View file @5e7dd86
+++ b/export.py
View file @5e7dd86
@@ -576,8 +576,8 @@ def run(

 def parse_opt():
    parser = argparse.ArgumentParser()
-    parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='dataset.yaml path')
-    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model.pt path(s)')
+    parser.add_argument('--data', type=str, default=ROOT / 'data/VOC.yaml', help='dataset.yaml path')
+    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'runs/train/exp/weights/best.pt', help='model.pt path(s)')
    parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640, 640], help='image (h, w)')
    parser.add_argument('--batch-size', type=int, default=1, help='batch size')
    parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
--- a/inference.py
View file @5e7dd86
+++ b/inference.py
View file @5e7dd86
@@ -95,7 +95,13 @@ class Yolov5:

 if __name__ == "__main__":
    img = cv2.imread(
-        '/home/situ/qfs/invoice_tamper/09_project/project/yolov5_inference/data/images/crop_img/_1594890230.8032346page_10_img_0_hname.jpg')
+        '/home/situ/qfs/invoice_tamper/09_project/project/tamper_det/data/images/img_1.png')
    detector = Yolov5(config)
    result = detector.detect(img)
+    for i in result['result']:
+        position=list(i.values())[2:]
+        print(position)
+        cv2.rectangle(img,(position[0],position[1]),(position[0]+position[2],position[1]+position[3]),(0,0,255))
+    cv2.imshow('w',img)
+    cv2.waitKey(0)
    print(result)
--- a/models/yolov5_config.py
View file @5e7dd86
+++ b/models/yolov5_config.py
View file @5e7dd86
 from easydict import EasyDict as edict

 config = edict(
+    # weights='/home/situ/qfs/invoice_tamper/09_project/project/yolov5_inference/runs/exp2/weights/best.pt',  # model path or triton URL
    weights='runs/train/exp/weights/best.pt',  # model path or triton URL
    data='data/VOC.yaml',  # dataset.yaml path
    imgsz=(640, 640),  # inference size (height, width)
-    conf_thres=0.5,  # confidence threshold
+    conf_thres=0.2,  # confidence threshold
    iou_thres=0.45,  # NMS IOU threshold
    max_det=1000,  # maximum detections per image
    device=''  # cuda device, i.e. 0 or 0,1,2,3 or cpu
--- a/pipeline.py
View file @5e7dd86
+++ b/pipeline.py
View file @5e7dd86
+import time
+
+import cv2
+
+from bank_ocr_inference import bill_ocr, extract_bank_info
+from inference import Yolov5
+from models.yolov5_config import config
+
+
+def enlarge_position(box):
+    x1, y1, x2, y2 = box
+    w, h = abs(x2 - x1), abs(y2 - y1)
+    y1, y2 = max(y1 - h // 3, 0), y2 + h // 3
+    x1, x2 = max(x1 - w // 8, 0), x2 + w // 8
+    return [x1, y1, x2, y2]
+
+
+def tamper_detect(image):
+    st = time.time()
+    ocr_results = bill_ocr(image)
+    et1=time.time()
+    info_results = extract_bank_info(ocr_results)
+    et2=time.time()
+    print(info_results)
+    tamper_results = []
+    if len(info_results) != 0:
+        for info_result in info_results:
+            box = [info_result[1][0], info_result[1][1], info_result[1][4], info_result[1][5]]
+            x1, y1, x2, y2 = enlarge_position(box)
+            # x1, y1, x2, y2 = box
+            info_image = image[y1:y2, x1:x2, :]
+            cv2.imshow('info_image',info_image)
+            results = detector.detect(info_image)
+            print(results)
+            if len(results['result'])!=0:
+                for res in results['result']:
+                    left = int(res['left'])
+                    top = int(res['top'])
+                    width = int(res['width'])
+                    height = int(res['height'])
+                    absolute_position = [x1 + left, y1 + top, x1 + left + width, y1 + top + height]
+                    tamper_results.append(absolute_position)
+    print(tamper_results)
+    et3 = time.time()
+
+    print(f'all:{et3-st}  ocr:{et1-st}  extract:{et2-et1}  yolo:{et3-et2}')
+    for i in tamper_results:
+        cv2.rectangle(image, tuple(i[:2]), tuple(i[2:]), (0, 0, 255), 2)
+    cv2.imshow('info', image)
+    cv2.waitKey(0)
+
+
+if __name__ == '__main__':
+    detector = Yolov5(config)
+    image = cv2.imread(
+        "/home/situ/下载/_1597378020.731796page_33_img_0.jpg")
+    tamper_detect(image)
--- a/plot_sourece_labels.py
View file @5e7dd86
+++ b/plot_sourece_labels.py
View file @5e7dd86
@@ -10,9 +10,9 @@ def get_source_image_det(crop_position, predict_positions):
    result = []
    x1, y1, x2, y2 = crop_position
    for p in predict_positions:
-        px1, py1, px2, py2,score = p
+        px1, py1, px2, py2, score = p
        w, h = px2 - px1, py2 - py1
-        result.append([x1 + px1, y1 + py1, x1 + px1 + w, y1 + py1 + h,score])
+        result.append([x1 + px1, y1 + py1, x1 + px1 + w, y1 + py1 + h, score])
    return result


@@ -22,9 +22,9 @@ def decode_label(image, label_path):
    result = []
    for d in data:
        d = [float(i) for i in d.strip().split(' ')]
-        cls, cx, cy, cw, ch,score = d
+        cls, cx, cy, cw, ch, score = d
        cx, cy, cw, ch = cx * w, cy * h, cw * w, ch * h
-        result.append([int(cx - cw // 2), int(cy - ch // 2), int(cx + cw // 2), int(cy + ch // 2),score])
+        result.append([int(cx - cw // 2), int(cy - ch // 2), int(cx + cw // 2), int(cy + ch // 2), score])
    return result


@@ -38,28 +38,28 @@ if __name__ == '__main__':
    data = pd.read_csv(crop_csv_path)
    img_name = data.loc[:, 'img_name'].tolist()
    crop_position1 = data.loc[:, 'name_crop_coord'].tolist()
-    crop_position2 = data.loc[:,'number_crop_coord'].tolist()
-    cc='/data/situ_invoice_bill_data/new_data/qfs_bank_bill_data/gongshang/tampered/images/val/ps3'
+    crop_position2 = data.loc[:, 'number_crop_coord'].tolist()
+    cc = '/data/situ_invoice_bill_data/new_data/qfs_bank_bill_data/gongshang/tampered/images/val/ps3'
    for im in os.listdir(cc):
        print(im)
-        img = cv2.imread(os.path.join(cc,im))
-        img_=img.copy()
+        img = cv2.imread(os.path.join(cc, im))
+        img_ = img.copy()
        id = img_name.index(im)
-        name_crop_position=[int(i) for i in crop_position1[id].split(',')]
-        number_crop_position=[int(i) for i in crop_position2[id].split(',')]
-        nx1,ny1,nx2,ny2=name_crop_position
-        nux1,nuy1,nux2,nuy2=number_crop_position
-        if im[:-4]+'_hname.txt' in predict_labels:
+        name_crop_position = [int(i) for i in crop_position1[id].split(',')]
+        number_crop_position = [int(i) for i in crop_position2[id].split(',')]
+        nx1, ny1, nx2, ny2 = name_crop_position
+        nux1, nuy1, nux2, nuy2 = number_crop_position
+        if im[:-4] + '_hname.txt' in predict_labels:

            h, w, c = img[ny1:ny2, nx1:nx2, :].shape
-            data = open(os.path.join(predict_label_path,im[:-4]+'_hname.txt')).readlines()
+            data = open(os.path.join(predict_label_path, im[:-4] + '_hname.txt')).readlines()
            for d in data:
-                cls,cx,cy,cw,ch,score = [float(i) for i in d.strip().split(' ')]
-                cx,cy,cw,ch=int(cx*w),int(cy*h),int(cw*w),int(ch*h)
-                cx1,cy1=cx-cw//2,cy-ch//2
-                x1,y1,x2,y2=nx1+cx1,ny1+cy1,nx1+cx1+cw,ny1+cy1+ch
-                cv2.rectangle(img,(x1,y1),(x2,y2),(0,0,255),2)
-                cv2.putText(img,f'tampered:{score}',(x1,y1-5),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255),1)
+                cls, cx, cy, cw, ch, score = [float(i) for i in d.strip().split(' ')]
+                cx, cy, cw, ch = int(cx * w), int(cy * h), int(cw * w), int(ch * h)
+                cx1, cy1 = cx - cw // 2, cy - ch // 2
+                x1, y1, x2, y2 = nx1 + cx1, ny1 + cy1, nx1 + cx1 + cw, ny1 + cy1 + ch
+                cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 2)
+                cv2.putText(img, f'tampered:{score}', (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
        if im[:-4] + '_hnumber.txt' in predict_labels:
            h, w, c = img[nuy1:nuy2, nux1:nux2, :].shape
            data = open(os.path.join(predict_label_path, im[:-4] + '_hname.txt')).readlines()
@@ -70,5 +70,5 @@ if __name__ == '__main__':
                x1, y1, x2, y2 = nux1 + cx1, nuy1 + cy1, nux1 + cx1 + cw, nuy1 + cy1 + ch
                cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 2)
                cv2.putText(img, f'tampered:{score}', (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
-        result = np.vstack((img_,img))
-        cv2.imwrite(f'z/{im}',result)
+        result = np.vstack((img_, img))
+        cv2.imwrite(f'z/{im}', result)