slice part 2

周伟奇
Showing 3 changed files with 223 additions and 48 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/ocr_process.py
src/celery_compare/tasks.py
--- a/src/apps/doc/consts.py
View file @4d3ca7e
+++ b/src/apps/doc/consts.py
View file @4d3ca7e
@@ -1542,9 +1542,12 @@ BC_FIELD_ORDER_2 = (('BankName', '发卡行名称'),

 IMG_PATH_KEY = 'uniq_img_path_key'
 IMG_PATH_KEY_2 = 'uniq_img_path_key_2'
+SECTION_IMG_PATH_KEY = 'uniq_section_img_path_key'
+ALL_POSITION_KEY = 'uniq_all_position_key'
 POSITION_KEY = 'uniq_position_key'
-SECTION_KEY = 'uniq_section_key'
 ANGLE_KEY = 'uniq_angle_key'
+FIELD_POSITION_KEY = 'position'
+FIELD_QUAD_KEY = 'quad'

 INFO_SOURCE = ['POS', 'CMS']

--- a/src/apps/doc/management/commands/ocr_process.py
View file @4d3ca7e
+++ b/src/apps/doc/management/commands/ocr_process.py
View file @4d3ca7e
@@ -373,7 +373,7 @@ class Command(BaseCommand, LoggerMixin):
            license_summary.setdefault(classify, []).extend(license_data)
        res_list.append((pno, ino, part_idx, consts.RES_SUCCESS))

-    def license2_process(self, ocr_res_2, license_summary, pid, classify, res_list, pno, ino, part_idx, img_path, do_dda, dda_id_bc_mapping, section_img_path=None):
+    def license2_process(self, ocr_res_2, license_summary, pid, classify, res_list, pno, ino, part_idx, img_path, do_dda, dda_id_bc_mapping, file_data):
        if ocr_res_2.get('ErrorCode') in consts.SUCCESS_CODE_SET:
            res_list.append((pno, ino, part_idx, consts.RES_SUCCESS))
            if pid == consts.BC_PID:
@@ -389,19 +389,51 @@ class Command(BaseCommand, LoggerMixin):
                        dda_id_bc_mapping.setdefault(consts.BC_FIELD, []).append((bc_no, img_path))
            else:
                # 营业执照等
-                for result_dict in ocr_res_2.get('ResultList', []):
-                    position = result_dict.get('position', {})
-                    angle = result_dict.get('angle', 0)
+                pre, suf = os.path.splitext(img_path)
+                src_section_img_path = img_path if file_data is None else '{0}_{1}{2}'.format(pre, part_idx, suf)
+
+                is_save = False
+                for res_idx, result_dict in enumerate(ocr_res_2.get('ResultList', [])):
+                    image_data = result_dict.get('image_data', '')
+                    if len(image_data) > 0:
+                        position = {}
+                        angle = 0
+                        section_img_path = '{0}_{1}_{2}{3}'.format(pre, part_idx, res_idx, suf)
+                        try:
+                            with open(section_img_path, "wb") as fh:
+                                fh.write(base64.b64decode(image_data.encode()))
+                        except Exception as e:
+                            self.online_log.warn(
+                                '{0} [section img save failed] [img_path={1}]'
+                                ' [part_idx={2}] [res_idx={3}]'.format(self.log_base, img_path, part_idx, res_idx))
+                    else:
+                        is_save = True
+                        section_img_path = src_section_img_path
+                        position = result_dict.get('position', {})
+                        angle = result_dict.get('angle', 0)
                    res_dict = {}
                    position_dict = {}
                    for field_dict in result_dict.get('FieldList', []):
                        res_dict[field_dict.get('chn_key', '')] = field_dict.get('value', '')
-                        position_dict[field_dict.get('chn_key', '')] = field_dict.get('position', [])
-                    position_dict[consts.SECTION_KEY] = position
+                        position_dict[field_dict.get('chn_key', '')] = {
+                            consts.FIELD_POSITION_KEY: field_dict.get('position', {}),
+                            consts.FIELD_QUAD_KEY: field_dict.get('quad', []),
+                        }
+                    position_dict[consts.POSITION_KEY] = position
                    position_dict[consts.ANGLE_KEY] = angle
-                    res_dict[consts.IMG_PATH_KEY] = section_img_path if isinstance(section_img_path, str) else img_path
-                    res_dict[consts.POSITION_KEY] = position_dict
+                    res_dict[consts.IMG_PATH_KEY] = img_path
+                    res_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path
+                    res_dict[consts.ALL_POSITION_KEY] = position_dict
                    license_summary.setdefault(classify, []).append(res_dict)
+
+                if is_save and file_data is not None:
+                    try:
+                        with open(src_section_img_path, "wb") as fh:
+                            fh.write(base64.b64decode(file_data.encode()))
+                    except Exception as e:
+                        self.online_log.warn(
+                            '{0} [section img save failed] [img_path={1}]'
+                            ' [part_idx={2}]'.format(self.log_base, img_path, part_idx))
        else:
            res_list.append((pno, ino, part_idx, consts.RES_FAILED_2))

@@ -980,22 +1012,9 @@ class Command(BaseCommand, LoggerMixin):
                                                                name = '无'
                                                        ocr_2_res['Name'] = name

-                                                    section_img_path = None
-                                                    try:
-                                                        if ocr_2_res.get('ErrorCode') in consts.SUCCESS_CODE_SET:
-                                                            pre, suf = os.path.splitext(img_path)
-                                                            section_img_path = '{0}_{1}{2}'.format(pre, part_idx, suf)
-                                                            with open(section_img_path, "wb") as fh:
-                                                                fh.write(base64.b64decode(file_data.encode()))
-                                                    except Exception as e:
-                                                        self.online_log.warn(
-                                                            '{0} [section img save failed] [img_path={1}]'
-                                                            ' [part_idx={2}]'.format(self.log_base, img_path, part_idx))
-
                                                    self.license2_process(ocr_2_res, license_summary, pid, classify,
                                                                          res_list, pno, ino, part_idx, img_path,
-                                                                          do_dda, dda_id_bc_mapping,
-                                                                          section_img_path=section_img_path)
+                                                                          do_dda, dda_id_bc_mapping, file_data=ocr_data.get('section_img'))
                                                    break
                                            else:
                                                res_list.append((pno, ino, part_idx, consts.RES_FAILED_2))
--- a/src/celery_compare/tasks.py
View file @4d3ca7e
+++ b/src/celery_compare/tasks.py
View file @4d3ca7e
 import json
+import os
+import cv2
 import time
 import logging
 import traceback
+import numpy as np
 from datetime import datetime, timedelta
 from collections import OrderedDict
 from . import app
@@ -40,6 +43,79 @@ empty_error_type = 1000
 des_key = conf.CMS_DES_KEY


+def rotate_bound(image, angle):
+    # grab the dimensions of the image and then determine the
+    # center
+    (h, w) = image.shape[:2]
+    (cX, cY) = (w // 2, h // 2)
+
+    # grab the rotation matrix (applying the negative of the
+    # angle to rotate clockwise), then grab the sine and cosine
+    # (i.e., the rotation components of the matrix)
+    M = cv2.getRotationMatrix2D((cX, cY), angle, 1.0)
+    cos = np.abs(M[0, 0])
+    sin = np.abs(M[0, 1])
+
+    # compute the new bounding dimensions of the image
+    nW = int((h * sin) + (w * cos))
+    nH = int((h * cos) + (w * sin))
+
+    # adjust the rotation matrix to take into account translation
+    M[0, 2] += (nW / 2) - cX
+    M[1, 2] += (nH / 2) - cY
+
+    # perform the actual rotation and return the image
+    return cv2.warpAffine(image, M, (nW, nH))
+
+
+def build_coordinates(section_position_dict):
+    if isinstance(section_position_dict, dict):
+        h_min = section_position_dict.get('top', 0)
+        w_min = section_position_dict.get('left', 0)
+        h_max = h_min + section_position_dict.get('height', 0)
+        w_max = w_min + section_position_dict.get('width', 0)
+        if h_max > h_min and w_max > w_min:
+            return True, (h_min, h_max, w_min, w_max)
+        else:
+            return False, ()
+    return False, ()
+
+
+def field_build_coordinates(field_position_info):
+    field_position_dict = field_position_info.get(consts.FIELD_POSITION_KEY, {})
+    field_quad_list = field_position_info.get(consts.FIELD_QUAD_KEY, [])
+    if isinstance(field_quad_list, list) and len(field_quad_list) == 8:
+        w_list = [field_quad_list[0], field_quad_list[2], field_quad_list[4], field_quad_list[6]]
+        h_list = [field_quad_list[1], field_quad_list[3], field_quad_list[5], field_quad_list[7]]
+        h_min = min(h_list)
+        h_max = max(h_list)
+        w_min = min(w_list)
+        w_max = max(w_list)
+        if h_max > h_min and w_max > w_min:
+            return True, (h_min, h_max, w_min, w_max)
+    if isinstance(field_position_dict, dict):
+        h_min = field_position_dict.get('top', 0)
+        w_min = field_position_dict.get('left', 0)
+        h_max = h_min + field_position_dict.get('height', 0)
+        w_max = w_min + field_position_dict.get('width', 0)
+        if h_max > h_min and w_max > w_min:
+            return True, (h_min, h_max, w_min, w_max)
+        else:
+            return False, ()
+    return False, ()
+
+
+def img_process(section_img_path, section_position, section_angle):
+    image = cv2.imread(section_img_path)
+    is_valid, coord_tuple = build_coordinates(section_position)
+    if is_valid:
+        image = image[coord_tuple[0]:coord_tuple[1], coord_tuple[2]:coord_tuple[3], :]
+    if isinstance(section_angle, int) or isinstance(section_angle, float):
+        if section_angle != 0:
+            return rotate_bound(image, section_angle)
+    return image
+
+
 class FakePOS:

    def __init__(self,
@@ -289,6 +365,8 @@ def ca_compare_license(license_en, ocr_res_dict, field_list):

    is_find = False
    result_field_list = []
+    section_img_info = dict()
+    field_img_path_dict = dict()
    ocr_res_str = ocr_res_dict.get(ocr_field)
    if ocr_res_str is not None:
        ocr_res_list = json.loads(ocr_res_str)
@@ -323,6 +401,8 @@ def ca_compare_license(license_en, ocr_res_dict, field_list):
                    break

                is_find = True
+                section_img_info[consts.SECTION_IMG_PATH_KEY] = ocr_res_list[res_idx].get(consts.SECTION_IMG_PATH_KEY, '')
+                section_img_info[consts.ALL_POSITION_KEY] = ocr_res_list[res_idx].get(consts.ALL_POSITION_KEY, {})

                # 过期期限特殊处理
                if special_expiry_date and name == 'idExpiryDate' and result == consts.RESULT_N:
@@ -341,13 +421,48 @@ def ca_compare_license(license_en, ocr_res_dict, field_list):
                else:
                    img_path = ocr_res_list[res_idx].get(consts.IMG_PATH_KEY, '') if result == consts.RESULT_N else empty_str
                error_type = empty_error_type if result == consts.RESULT_Y else ErrorType.OCR.value
-                result_field_list.append((name, value, result, ocr_str, position_img_path, img_path, error_type))
+                result_field_list.append((name, value, result, ocr_str, img_path, error_type))

    if not is_find:
        for name, value in field_list:
-            result_field_list.append((name, value, consts.RESULT_N, empty_str, empty_str, empty_str, ErrorType.NF.value))
-
-    return result_field_list
+            result_field_list.append((name, value, consts.RESULT_N, empty_str, empty_str, ErrorType.NF.value))
+
+    if is_find:
+        section_img_path = section_img_info.get(consts.SECTION_IMG_PATH_KEY, '')
+        if os.path.exists(section_img_path):
+            failed_field = []
+            base_img_path = empty_str
+            for name, _, result, _, img_path, _ in result_field_list:
+                if result == consts.RESULT_N:
+                    failed_field.append(name)
+                    if base_img_path == empty_str:
+                        base_img_path = img_path
+            if len(failed_field) > 0:
+                info = section_img_info.get(consts.ALL_POSITION_KEY, {})
+                section_position = info.get(consts.POSITION_KEY, {})
+                section_angle = info.get(consts.ANGLE_KEY, 0)
+                try:
+                    last_img = img_process(section_img_path, section_position, section_angle)
+                except Exception as e:
+                    for field in failed_field:
+                        field_img_path_dict[field] = base_img_path
+                else:
+                    pre, suf = os.path.splitext(section_img_path)
+                    for field in failed_field:
+                        try:
+                            res_field = compare_logic[field][0]
+                            is_valid, coord_tuple = field_build_coordinates(info.get(res_field, {}))
+                            if is_valid:
+                                save_path = '{0}_{1}{2}'.format(pre, field, suf)
+                                field_img = last_img[coord_tuple[0]:coord_tuple[1], coord_tuple[2]:coord_tuple[3], :]
+                                cv2.imwrite(save_path, field_img)
+                                field_img_path_dict[field] = save_path
+                            else:
+                                field_img_path_dict[field] = base_img_path
+                        except Exception as e:
+                            field_img_path_dict[field] = base_img_path
+
+    return result_field_list, field_img_path_dict


 def ca_compare_process(compare_info, ocr_res_dict):
@@ -363,8 +478,8 @@ def ca_compare_process(compare_info, ocr_res_dict):
            for idx, license_list in info_value.items():
                for license_dict in license_list:
                    for license_en, field_list in license_dict.items():
-                        result_field_list = ca_compare_license(license_en, ocr_res_dict, field_list)
-                        for name, value, result, ocr_str, position_img_path, img_path, error_type in result_field_list:
+                        result_field_list, field_img_path_dict = ca_compare_license(license_en, ocr_res_dict, field_list)
+                        for name, value, result, ocr_str, img_path, error_type in result_field_list:
                            total_fields += 1
                            if result == consts.RESULT_N:
                                failed_count += 1
@@ -377,15 +492,15 @@ def ca_compare_process(compare_info, ocr_res_dict):
                                    consts.HEAD_LIST[4]: value,
                                    consts.HEAD_LIST[5]: ocr_str,
                                    consts.HEAD_LIST[6]: result,
-                                    consts.HEAD_LIST[7]: position_img_path,
+                                    consts.HEAD_LIST[7]: field_img_path_dict.get(name, empty_str),
                                    consts.HEAD_LIST[8]: img_path,
                                    consts.HEAD_LIST[9]: error_type,
                                }
                            )
        else:
            for license_en, field_list in info_value.items():
-                result_field_list = ca_compare_license(license_en, ocr_res_dict, field_list)
-                for name, value, result, ocr_str, position_img_path, img_path, error_type in result_field_list:
+                result_field_list, field_img_path_dict = ca_compare_license(license_en, ocr_res_dict, field_list)
+                for name, value, result, ocr_str, img_path, error_type in result_field_list:
                    total_fields += 1
                    if result == consts.RESULT_N:
                        failed_count += 1
@@ -398,7 +513,7 @@ def ca_compare_process(compare_info, ocr_res_dict):
                            consts.HEAD_LIST[4]: value,
                            consts.HEAD_LIST[5]: ocr_str,
                            consts.HEAD_LIST[6]: result,
-                            consts.HEAD_LIST[7]: position_img_path,
+                            consts.HEAD_LIST[7]: field_img_path_dict.get(name, empty_str),
                            consts.HEAD_LIST[8]: img_path,
                            consts.HEAD_LIST[9]: error_type,
                        }
@@ -1073,6 +1188,8 @@ def se_compare_license(license_en, ocr_res_dict, field_list):
    is_find = False
    no_ocr_result = False
    result_field_list = []
+    section_img_info = dict()
+    field_img_path_dict = dict()
    ocr_res_str = ocr_res_dict.get(ocr_field)
    if ocr_res_str is not None:
        ocr_res_list = json.loads(ocr_res_str)
@@ -1104,6 +1221,8 @@ def se_compare_license(license_en, ocr_res_dict, field_list):
                    break

                is_find = True
+                section_img_info[consts.SECTION_IMG_PATH_KEY] = ocr_res_list[res_idx].get(consts.SECTION_IMG_PATH_KEY, '')
+                section_img_info[consts.ALL_POSITION_KEY] = ocr_res_list[res_idx].get(consts.ALL_POSITION_KEY, {})

                # 过期期限特殊处理
                if special_expiry_date and name == 'idExpiryDate' and result == consts.RESULT_N:
@@ -1116,15 +1235,50 @@ def se_compare_license(license_en, ocr_res_dict, field_list):

                img_path = ocr_res_list[res_idx].get(consts.IMG_PATH_KEY, '') if result == consts.RESULT_N else empty_str
                error_type = empty_error_type if result == consts.RESULT_Y else ErrorType.OCR.value
-                result_field_list.append((name, value, result, ocr_str, position_img_path, img_path, error_type))
+                result_field_list.append((name, value, result, ocr_str, img_path, error_type))
    else:
        no_ocr_result = True

    if not is_find:
        for name, value in field_list:
-            result_field_list.append((name, value, consts.RESULT_N, empty_str, empty_str, empty_str, ErrorType.NF.value))
-
-    return result_field_list, no_ocr_result
+            result_field_list.append((name, value, consts.RESULT_N, empty_str, empty_str, ErrorType.NF.value))
+
+    if is_find:
+        section_img_path = section_img_info.get(consts.SECTION_IMG_PATH_KEY, '')
+        if os.path.exists(section_img_path):
+            failed_field = []
+            base_img_path = empty_str
+            for name, _, result, _, img_path, _ in result_field_list:
+                if result == consts.RESULT_N:
+                    failed_field.append(name)
+                    if base_img_path == empty_str:
+                        base_img_path = img_path
+            if len(failed_field) > 0:
+                info = section_img_info.get(consts.ALL_POSITION_KEY, {})
+                section_position = info.get(consts.POSITION_KEY, {})
+                section_angle = info.get(consts.ANGLE_KEY, 0)
+                try:
+                    last_img = img_process(section_img_path, section_position, section_angle)
+                except Exception as e:
+                    for field in failed_field:
+                        field_img_path_dict[field] = base_img_path
+                else:
+                    pre, suf = os.path.splitext(section_img_path)
+                    for field in failed_field:
+                        try:
+                            res_field = compare_logic[field][0]
+                            is_valid, coord_tuple = field_build_coordinates(info.get(res_field, {}))
+                            if is_valid:
+                                save_path = '{0}_{1}{2}'.format(pre, field, suf)
+                                field_img = last_img[coord_tuple[0]:coord_tuple[1], coord_tuple[2]:coord_tuple[3], :]
+                                cv2.imwrite(save_path, field_img)
+                                field_img_path_dict[field] = save_path
+                            else:
+                                field_img_path_dict[field] = base_img_path
+                        except Exception as e:
+                            field_img_path_dict[field] = base_img_path
+
+    return result_field_list, no_ocr_result, field_img_path_dict


 def se_mvc34_compare(license_en, ocr_res_dict, field_list):
@@ -1184,14 +1338,13 @@ def se_mvc34_compare(license_en, ocr_res_dict, field_list):
                    result = getattr(cp, compare_logic[name][1])(value, ocr_str, **compare_logic[name][2])
                img_path = ocr_res.get(consts.IMG_PATH_KEY, '') if result == consts.RESULT_N else empty_str
                error_type = empty_error_type if result == consts.RESULT_Y else ErrorType.OCR.value
-                position_img_path = empty_str
-                result_field_list.append((name, value, result, ocr_str, position_img_path, img_path, error_type))
+                result_field_list.append((name, value, result, ocr_str, img_path, error_type))

    if not is_find:
        for name, value in field_list:
-            result_field_list.append((name, value, consts.RESULT_N, empty_str, empty_str, empty_str, ErrorType.NF.value))
+            result_field_list.append((name, value, consts.RESULT_N, empty_str, empty_str, ErrorType.NF.value))

-    return result_field_list
+    return result_field_list, dict()


 def se_compare_process(compare_info, ocr_res_dict):
@@ -1211,8 +1364,8 @@ def se_compare_process(compare_info, ocr_res_dict):
                for license_dict in license_list:
                    for license_en, field_list in license_dict.items():
                        failure_field = []
-                        result_field_list, no_ocr_result = se_compare_license(license_en, ocr_res_dict, field_list)
-                        for name, value, result, ocr_str, position_img_path, img_path, error_type in result_field_list:
+                        result_field_list, no_ocr_result, field_img_path_dict = se_compare_license(license_en, ocr_res_dict, field_list)
+                        for name, value, result, ocr_str, img_path, error_type in result_field_list:
                            if license_en not in consts.SKIP_CARD or not no_ocr_result:
                                total_fields += 1
                                if result == consts.RESULT_N:
@@ -1228,7 +1381,7 @@ def se_compare_process(compare_info, ocr_res_dict):
                                    consts.HEAD_LIST[4]: value,
                                    consts.HEAD_LIST[5]: ocr_str,
                                    consts.HEAD_LIST[6]: result,
-                                    consts.HEAD_LIST[7]: position_img_path,
+                                    consts.HEAD_LIST[7]: field_img_path_dict.get(name, empty_str),
                                    consts.HEAD_LIST[8]: img_path,
                                    consts.HEAD_LIST[9]: error_type,
                                }
@@ -1239,10 +1392,10 @@ def se_compare_process(compare_info, ocr_res_dict):
            for license_en, field_list in info_value.items():
                failure_field = []
                if license_en == consts.MVC34_EN:
-                    result_field_list = se_mvc34_compare(license_en, ocr_res_dict, field_list)
+                    result_field_list, field_img_path_dict = se_mvc34_compare(license_en, ocr_res_dict, field_list)
                else:
-                    result_field_list, _ = se_compare_license(license_en, ocr_res_dict, field_list)
-                for name, value, result, ocr_str, position_img_path, img_path, error_type in result_field_list:
+                    result_field_list, _, field_img_path_dict = se_compare_license(license_en, ocr_res_dict, field_list)
+                for name, value, result, ocr_str, img_path, error_type in result_field_list:
                    total_fields += 1
                    if result == consts.RESULT_N:
                        failed_count += 1
@@ -1257,7 +1410,7 @@ def se_compare_process(compare_info, ocr_res_dict):
                            consts.HEAD_LIST[4]: value,
                            consts.HEAD_LIST[5]: ocr_str,
                            consts.HEAD_LIST[6]: result,
-                            consts.HEAD_LIST[7]: position_img_path,
+                            consts.HEAD_LIST[7]: field_img_path_dict.get(name, empty_str),
                            consts.HEAD_LIST[8]: img_path,
                            consts.HEAD_LIST[9]: error_type,
                        }