add slice

周伟奇
Showing 3 changed files with 120 additions and 11 deletions
src/apps/doc/management/commands/ocr_process.py
src/celery_compare/tasks.py
src/common/electronic_hil_contract/get_char.py
--- a/src/apps/doc/management/commands/ocr_process.py
View file @3d620b3
+++ b/src/apps/doc/management/commands/ocr_process.py
View file @3d620b3
@@ -250,6 +250,7 @@ class Command(BaseCommand, LoggerMixin):
            page_num_only = page_num
        rebuild_page_info = []
        text_key = 'words'
+        position_key = 'position'
        for key, value in contract_dict.get('page_info', {}).items():
            if value is None:
                rebuild_page_info.append((key, ))
@@ -279,11 +280,17 @@ class Command(BaseCommand, LoggerMixin):
        contract_result.setdefault(classify, dict()).setdefault(page_num_only, []).append(rebuild_page_info)
-        page_compare_dict = {}
+        page_compare_dict = {
+            consts.IMG_PATH_KEY: img_path,
+            consts.ALL_POSITION_KEY: {},
+        }
        for key, value in contract_dict.get('page_info', {}).items():
            if not isinstance(value, dict):
                continue
            elif text_key in value:
+                position_list = value.get(position_key, [])
+                page_compare_dict[consts.ALL_POSITION_KEY][key] = position_list if isinstance(position_list, list) else []
                if value[text_key] is None:
                    page_compare_dict[key] = ''
                elif isinstance(value[text_key], str):
@@ -292,16 +299,47 @@ class Command(BaseCommand, LoggerMixin):
                    page_compare_dict[key] = value[text_key]
            else:
                page_compare_dict[key] = {}
+                page_compare_dict[consts.ALL_POSITION_KEY][key] = {}
                for sub_key, sub_value in value.items():
+                    position_list = sub_value.get(position_key, [])
+                    page_compare_dict[consts.ALL_POSITION_KEY][key][sub_key] = position_list if isinstance(
+                        position_list, list) else []
                    if sub_value[text_key] is None:
                        page_compare_dict[key][sub_key] = ''
                    elif isinstance(sub_value[text_key], str):
                        page_compare_dict[key][sub_key] = sub_value[text_key]
-        page_compare_dict[consts.IMG_PATH_KEY] = img_path
        contract_result_compare.setdefault(classify, dict())[consts.ASP_KEY] = contract_dict.get(consts.ASP_KEY, False)
+        # "position" = [xmin, ymin, xmax, ymax]
        contract_result_compare.setdefault(classify, dict())[page_num_only] = page_compare_dict
+    @staticmethod
+    def rebuild_position(src_position):
+        # 'position': {'left': 470, 'top': 671, 'right': 542, 'bottom': 694}
+        #                                        'width'='right-left', 'height'='bottom-top'
+        # 'position': {'left': 470, 'top': 671, 'width': 542, 'height': 694}
+        try:
+            left = src_position.get('left', 0)
+            top = src_position.get('top', 0)
+            right = src_position.get('right', 0)
+            bottom = src_position.get('bottom', 0)
+            width = right - left
+            height = bottom - top
+            return {
+                'left': left,
+                'top': top,
+                'width': width,
+                'height': height,
+            }
+        except Exception as e:
+            return {
+                'left': 0,
+                'top': 0,
+                'width': 0,
+                'height': 0,
+            }
    def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path, do_dda,
                         dda_id_bc_mapping):
        # 类别：'0'身份证， '1'居住证
@@ -329,6 +367,7 @@ class Command(BaseCommand, LoggerMixin):
        # 保单
        if classify == consts.INSURANCE_CLASSIFY:
            product_result = ['', '', '']
+            product_result_position = [dict(), dict(), dict()]
            min_char_count_1 = 1000
            min_char_count_2 = 1000
            for product in license_data.get('result', {}).get('productList', []):
@@ -338,10 +377,16 @@ class Command(BaseCommand, LoggerMixin):
                        min_char_count_1 = len(name)
                        product_result[0] = product.get('coverage', {}).get('words', '')
                        product_result[2] = product.get('deductible_franchise', {}).get('words', '')
+                        product_result_position[0] = self.rebuild_position(product.get('coverage', {}).get(
+                            'position', {}))
+                        product_result_position[2] = self.rebuild_position(product.get('deductible_franchise', {}).get(
+                            'position', {}))
                elif name.find('第三者责任') != -1:
                    if len(name) < min_char_count_2:
                        min_char_count_2 = len(name)
                        product_result[1] = product.get('coverage', {}).get('words', '')
+                        product_result_position[1] = self.rebuild_position(product.get('coverage', {}).get(
+                            'position', {}))
            special_str = license_data.get('result', {}).get('1stBeneficiary', {}).get('words', '')
            special = '无'
@@ -362,11 +407,29 @@ class Command(BaseCommand, LoggerMixin):
                consts.IMG_PATH_KEY: img_path,
                consts.SECTION_IMG_PATH_KEY: section_img_path,
            }
-            # 'position': {'left': 470, 'top': 671, 'right': 542, 'bottom': 694}
-            # position_dict = {
+            position_dict = {
-            #     '被保险人姓名': {consts.FIELD_POSITION_KEY: {}}
+                '被保险人姓名': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
-            # }
+                    'insured', {}).get('name', {}).get('position', {}))},
-            # insurance_ocr_result[consts.ALL_POSITION_KEY] = position_dict
+                '被保险人证件号码': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
+                    'insured', {}).get('certiCode', {}).get('position', {}))},
+                '车架号': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
+                    'vehicle', {}).get('VIN', {}).get('position', {}))},
+                '机动车损失保险金额': {consts.FIELD_POSITION_KEY: product_result_position[0]},
+                '机动车第三者责任保险金额': {consts.FIELD_POSITION_KEY: product_result_position[1]},
+                '机动车损失保险绝对免赔率/绝对免赔额': {consts.FIELD_POSITION_KEY: product_result_position[2]},
+                '保险费合计': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
+                    'premiumSum', {}).get('position', {}))},
+                '保险起始日期': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
+                    'startDate', {}).get('position', {}))},
+                '保险截止日期': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
+                    'endDate', {}).get('position', {}))},
+                '保单章': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
+                    'seal', {}).get('position', {}))},
+                '特别约定第一受益人': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
+                    '1stBeneficiary', {}).get('position', {}))},
+            }
+            insurance_ocr_result[consts.ALL_POSITION_KEY] = position_dict
            license_summary.setdefault(classify, []).append(insurance_ocr_result)
        # DDA
        elif classify == consts.DDA_CLASSIFY:
@@ -873,11 +936,24 @@ class Command(BaseCommand, LoggerMixin):
                        res[key] = page_info_dict.get(str(pno), {}).get(key1, '')
                        res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get(
                            consts.IMG_PATH_KEY, '')
+                        res.setdefault(consts.ALL_POSITION_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get(
+                            consts.ALL_POSITION_KEY, {}).get(key1, [])
                    else:
                        res[key] = page_info_dict.get(str(pno), {}).get(key1, {}).get(key2, '')
                        res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get(
                            consts.IMG_PATH_KEY, '')
+                        res.setdefault(consts.ALL_POSITION_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get(
+                            consts.ALL_POSITION_KEY, {}).get(key1, {}).get(key2, [])
+                # res = {
+                #     'key': 'list or str',
+                #     'uniq_img_path_key': {
+                #         'key': 'str',
+                #     },
+                #     'uniq_all_position_key': {
+                #         'key': 'list'
+                #     }
+                # }
                license_summary[classify] = [res]
            else:
                res = {}
--- a/src/celery_compare/tasks.py
View file @3d620b3
+++ b/src/celery_compare/tasks.py
View file @3d620b3
@@ -2405,11 +2405,13 @@ def se_contract_compare(license_en, ocr_res_dict, strip_list, is_gsyh):
    result_field_list = []
    field_img_path_dict = dict()
+    ocr_res = dict()
    if ocr_res_str is not None:
        ocr_res_list = json.loads(ocr_res_str)
        ocr_res = ocr_res_list.pop()
        for name, value in strip_list:
+            # 购置税校验
            if name == consts.SE_AFC_CON_FIELD[21]:
                if len(value) == 3:
                    reason = []
@@ -2471,6 +2473,29 @@ def se_contract_compare(license_en, ocr_res_dict, strip_list, is_gsyh):
            result_field_list.append((name, value, consts.RESULT_N, empty_str, empty_str, ErrorType.NF.value,
                                      '{0}未找到'.format(license_en)))
+    if ocr_res_str is not None:
+        img_map = {}
+        for name, _, result, _, img_path, _, _ in result_field_list:
+            if result == consts.RESULT_N:
+                img_map.setdefault(img_path, []).append(name)
+        for path, field_list in img_map.items():
+            if os.path.exists(path):
+                pre, suf = os.path.splitext(path)
+                last_img = cv2.imread(path)
+                for field_idx, field in enumerate(field_list):
+                    try:
+                        save_path = '{0}_{1}{2}'.format(pre, str(field_idx), suf)
+                        section_position_list = ocr_res.get(consts.ALL_POSITION_KEY, {}).get(field, [])
+                        if isinstance(section_position_list, list) and len(section_position_list) == 4:
+                            field_img = last_img[section_position_list[1]: section_position_list[3],
+                                                 section_position_list[0]: section_position_list[2], :]
+                            cv2.imwrite(save_path, field_img)
+                            field_img_path_dict[field] = save_path
+                        else:
+                            field_img_path_dict[field] = path
+                    except Exception as e:
+                        field_img_path_dict[field] = path
    return result_field_list, field_img_path_dict
--- a/src/common/electronic_hil_contract/get_char.py
View file @3d620b3
+++ b/src/common/electronic_hil_contract/get_char.py
View file @3d620b3
@@ -788,6 +788,7 @@ class Finder:
        items = []
        start = False
        page = None
+        greater_equal_v35 = False
        for pno in self.pdf_info:
            condition = False
            for block in self.pdf_info[f'{pno}']['blocks']:
@@ -796,6 +797,8 @@ class Finder:
                for line in block['lines']:
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text']
+                        if text == '租赁利率':
+                            greater_equal_v35 = True
                        if '总计' in text:
                            start = True
                        if '注：出租人向承租人购买租赁车辆的对价' in text:
@@ -804,9 +807,14 @@ class Finder:
                        if start == True:
                            items.append(text)
        lines = [['项目', '购买价格', '实际融资金额']]
-        for i in range(len(items) // 3):
+        if greater_equal_v35:
-            line = [items[2 + i * 3 + 0], items[2 + i * 3 + 1], items[2 + i * 3 + 2]]
+            for i in range(len(items) // 4):
-            lines.append(line)
+                line = [items[2 + i * 4 + 0], items[2 + i * 4 + 1], items[2 + i * 4 + 2]]
+                lines.append(line)
+        else:
+            for i in range(len(items) // 3):
+                line = [items[2 + i * 3 + 0], items[2 + i * 3 + 1], items[2 + i * 3 + 2]]
+                lines.append(line)
        if len(items) > 0:
            lines.append([items[0], '', items[1]])