add position

周伟奇
Showing 1 changed file with 168 additions and 55 deletions
src/apps/doc/management/commands/ocr_process.py
--- a/src/apps/doc/management/commands/ocr_process.py
View file @77676d5
+++ b/src/apps/doc/management/commands/ocr_process.py
View file @77676d5
@@ -298,14 +298,26 @@ class Command(BaseCommand, LoggerMixin):
    #             rebuild_data_dict[detail_dict.get('chinese_key', '')] = detail_dict.get('words', '')
    #         return [rebuild_data_dict]

-
-
-    def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path, do_dda, dda_id_bc_mapping):
+    def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path, do_dda,
+                         dda_id_bc_mapping):
        # 类别：'0'身份证， '1'居住证
        license_data = ocr_data.get('data')
        if not license_data:
            res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY))
            return
+        pre, suf = os.path.splitext(img_path)
+        base64_img = license_data.pop('base64_img', '')
+        is_save = True if len(base64_img) > 0 else False
+        section_img_path = '{0}_{1}{2}'.format(pre, part_idx, suf) if is_save else img_path
+        if is_save:
+            try:
+                with open(section_img_path, "wb") as fh:
+                    fh.write(base64.b64decode(base64_img.encode()))
+            except Exception as e:
+                self.online_log.warn(
+                    '{0} [section img save failed] [img_path={1}]'
+                    ' [part_idx={2}]'.format(self.log_base, img_path, part_idx))
+
        # 保单
        if classify == consts.INSURANCE_CLASSIFY:
            product_result = ['', '', '']
@@ -333,7 +345,13 @@ class Command(BaseCommand, LoggerMixin):
                '保险截止日期': license_data.get('result', {}).get('endDate', {}).get('words', ''),
                '保单章': license_data.get('result', {}).get('seal', {}).get('words', ''),
                '特别约定第一受益人': special,
+                consts.IMG_PATH_KEY: img_path,
+                consts.SECTION_IMG_PATH_KEY: section_img_path,
            }
+            # position_dict = {
+            #     '': {consts.FIELD_POSITION_KEY: {}}
+            # }
+            # insurance_ocr_result[consts.ALL_POSITION_KEY] = position_dict
            license_summary.setdefault(classify, []).append(insurance_ocr_result)
        # DDA
        elif classify == consts.DDA_CLASSIFY:
@@ -341,37 +359,103 @@ class Command(BaseCommand, LoggerMixin):
            if pro < consts.DDA_PRO_MIN:
                res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY))
                return
-            dda_ocr_result = {key: value.get('words', '') for key, value in license_data.get('result', {}).items()}
+            dda_ocr_result = {}
+            position_dict = {}
+            for key, value in license_data.get('result', {}).items():
+                dda_ocr_result[key] = value.get('words', '')
+                position_dict[key] = {
+                    consts.FIELD_POSITION_KEY: value.get('position', {})
+                }
            dda_ocr_result[consts.DDA_IMG_PATH] = img_path
            dda_ocr_result[consts.DDA_PRO] = pro
            dda_ocr_result[consts.IMG_PATH_KEY] = img_path
+            dda_ocr_result[consts.SECTION_IMG_PATH_KEY] = section_img_path
+            dda_ocr_result[consts.ALL_POSITION_KEY] = position_dict
            license_summary.setdefault(classify, []).append(dda_ocr_result)
        # 抵押登记豁免函
        elif classify == consts.HMH_CLASSIFY:
-            hmh_ocr_result = {key: value.get('words', '') for key, value in license_data.get('words_result', {}).items()}
+            hmh_ocr_result = {}
+            position_dict = {}
+            for key, value in license_data.get('words_result', {}).items():
+                hmh_ocr_result[key] = value.get('words', '')
+                location_list = value.get('location', [-1, -1, -1, -1])
+                if len(location_list) == 4:
+                    position_dict[key] = {
+                        consts.FIELD_POSITION_KEY: {
+                            'top': location_list[1],
+                            'left': location_list[0],
+                            'height': location_list[-1] - location_list[1],
+                            'width': location_list[2] - location_list[0]
+                        }
+                    }
+            hmh_ocr_result[consts.IMG_PATH_KEY] = img_path
+            hmh_ocr_result[consts.SECTION_IMG_PATH_KEY] = section_img_path
+            hmh_ocr_result[consts.ALL_POSITION_KEY] = position_dict
            license_summary.setdefault(classify, []).append(hmh_ocr_result)
        # 二手车交易凭证
        elif classify == consts.JYPZ_CLASSIFY:
-            jypz_ocr_result = {key: value.get('words', '') for key, value in license_data.get('result', {}).items()}
+            jypz_ocr_result = {}
+            position_dict = {}
+            for key, value in license_data.get('result', {}).items():
+                jypz_ocr_result[key] = value.get('words', '')
+                position_dict[key] = {
+                    consts.FIELD_POSITION_KEY: value.get('position', {})
+                }
+            jypz_ocr_result[consts.IMG_PATH_KEY] = img_path
+            jypz_ocr_result[consts.SECTION_IMG_PATH_KEY] = section_img_path
+            jypz_ocr_result[consts.ALL_POSITION_KEY] = position_dict
            license_summary.setdefault(classify, []).append(jypz_ocr_result)
        # 车辆登记证 3/4页结果整合
        elif classify == consts.MVC_CLASSIFY:
            rebuild_data_dict = {}
+            position_dict = {}
            rebuild_data_dict[consts.IMG_PATH_KEY] = img_path
+            rebuild_data_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path
            mvc_page = license_data.pop('page', 'VehicleRCI')
            mvc_res = license_data.pop('results', {})
            if mvc_page == 'VehicleRegArea':
                rebuild_data_dict['机动车登记证书编号'] = mvc_res.get('机动车登记证书编号', {}).get('words', '')
+                code_position_list = mvc_res.get('机动车登记证书编号', {}).get('position', [0, 0, 0, 0])
+                if len(code_position_list) == 4:
+                    position_dict['机动车登记证书编号'] = {
+                        consts.FIELD_POSITION_KEY: {
+                            'top': code_position_list[1],
+                            'left': code_position_list[0],
+                            'height': code_position_list[-1],
+                            'width': code_position_list[2],
+                        }
+                    }
                for register_info in mvc_res.get('登记信息', []):
                    register_info.pop('register_type', None)
                    register_info.pop('register_type_name', None)
                    for cn_key, detail_dict in register_info.items():
                        rebuild_data_dict.setdefault(cn_key, []).append(
                            detail_dict.get('words', ''))
+                        tmp_position_list = detail_dict.get('position', [0, 0, 0, 0])
+                        if len(tmp_position_list) == 4:
+                            position_dict[cn_key] = {
+                                consts.FIELD_POSITION_KEY: {
+                                    'top': tmp_position_list[1],
+                                    'left': tmp_position_list[0],
+                                    'height': tmp_position_list[-1],
+                                    'width': tmp_position_list[2],
+                                }
+                            }
            else:
                for cn_key, detail_dict in mvc_res.items():
                    rebuild_data_dict[cn_key] = detail_dict.get('words', '')
+                    position_list = detail_dict.get('position', [0, 0, 0, 0])
+                    if len(position_list) == 4:
+                        position_dict[cn_key] = {
+                            consts.FIELD_POSITION_KEY: {
+                                'top': position_list[1],
+                                'left': position_list[0],
+                                'height': position_list[-1],
+                                'width': position_list[2],
+                            }
+                        }
            del mvc_res
+            rebuild_data_dict[consts.ALL_POSITION_KEY] = position_dict
            license_summary.setdefault(classify, []).append(rebuild_data_dict)


@@ -395,6 +479,7 @@ class Command(BaseCommand, LoggerMixin):
        # 身份证真伪
        elif classify == consts.IC_CLASSIFY:
            id_card_dict = {}
+            position_dict = {}
            card_type = license_data.get('type', '')
            is_ic = card_type.startswith('身份证')
            is_info_side = card_type.endswith('信息面')
@@ -405,78 +490,106 @@ class Command(BaseCommand, LoggerMixin):
                field_map = consts.RP_MAP_0 if is_info_side else consts.RP_MAP_1
            for write_field, search_field in field_map:
                id_card_dict[write_field] = license_data.get('words_result', {}).get(search_field, {}).get('words', '')
+                location_list = license_data.get('words_result', {}).get(search_field, {}).get(
+                    'location', [-1, -1, -1, -1])
+                if len(location_list) == 4:
+                    position_dict[write_field] = {
+                        consts.FIELD_POSITION_KEY: {
+                            'top': location_list[1],
+                            'left': location_list[0],
+                            'height': location_list[-1] - location_list[1],
+                            'width': location_list[2] - location_list[0]
+                        }
+                    }
            if not is_info_side:
                start_time = license_data.get('words_result', {}).get('签发日期', {}).get('words', '')
                end_time = license_data.get('words_result', {}).get('失效日期', {}).get('words', '')
                id_card_dict['有效期限'] = '{0}-{1}'.format(start_time, end_time)
+                end_time_location_list = license_data.get('words_result', {}).get('失效日期', {}).get(
+                    'location', [-1, -1, -1, -1])
+                if len(end_time_location_list) == 4:
+                    position_dict['有效期限'] = {
+                        consts.FIELD_POSITION_KEY: {
+                            'top': end_time_location_list[1],
+                            'left': end_time_location_list[0],
+                            'height': end_time_location_list[-1] - end_time_location_list[1],
+                            'width': end_time_location_list[2] - end_time_location_list[0]
+                        }
+                    }

-
+            id_card_dict[consts.ALL_POSITION_KEY] = position_dict
+            id_card_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path
            if not is_info_side:
                id_card_dict[consts.IMG_PATH_KEY_2] = img_path
            else:
                id_card_dict[consts.IMG_PATH_KEY] = img_path
-            if is_ic:
-                try:
-                    base64_img = license_data.pop('base64_img')
-                except Exception as e:
-                    pass
+            if is_ic and is_save:
+                card_type = -1
+                json_data_4 = {
+                    'mode': 1,
+                    'user_info': {
+                        'image_content': base64_img,
+                    },
+                    'options': {
+                        'distinguish_type': 1,
+                        'auto_rotate': True,
+                    },
+                }
+                for times in range(consts.RETRY_TIMES):
+                    try:
+                        start_time = time.time()
+                        ocr_4_response = requests.post(self.ocr_url_4, json=json_data_4)
+                        if ocr_4_response.status_code != 200:
+                            raise OCR4Exception('ocr_4 status code: {0}'.format(ocr_4_response.status_code))
+                    except Exception as e:
+                        self.online_log.warn(
+                            '{0} [ocr_4 failed] [times={1}] [img_path={2}] [error={3}]'.format(
+                                self.log_base, times, img_path, traceback.format_exc()))
+                    else:
+                        ocr_4_res = ocr_4_response.json()
+                        end_time = time.time()
+                        speed_time = int(end_time - start_time)
+
+                        if ocr_4_res.get('code') == 0 and ocr_4_res.get('result', {}).get('rtn') == 0:
+                            card_type = ocr_4_res.get('result', {}).get(
+                                'idcard_distinguish_result', {}).get('result', -1)
+
+                        self.online_log.info(
+                            '{0} [ocr_4 success] [img_path={1}] [speed_time={2}]'.format(
+                                self.log_base, img_path, speed_time))
+                        break
                else:
-                    card_type = -1
-                    json_data_4 = {
-                        'mode': 1,
-                        'user_info': {
-                            'image_content': base64_img,
-                        },
-                        'options': {
-                            'distinguish_type': 1,
-                            'auto_rotate': True,
-                        },
-                    }
-                    for times in range(consts.RETRY_TIMES):
-                        try:
-                            start_time = time.time()
-                            ocr_4_response = requests.post(self.ocr_url_4, json=json_data_4)
-                            if ocr_4_response.status_code != 200:
-                                raise OCR4Exception('ocr_4 status code: {0}'.format(ocr_4_response.status_code))
-                        except Exception as e:
-                            self.online_log.warn(
-                                '{0} [ocr_4 failed] [times={1}] [img_path={2}] [error={3}]'.format(
-                                    self.log_base, times, img_path, traceback.format_exc()))
-                        else:
-                            ocr_4_res = ocr_4_response.json()
-                            end_time = time.time()
-                            speed_time = int(end_time - start_time)
+                    self.online_log.warn(
+                        '{0} [ocr_4 failed] [img_path={1}]'.format(self.log_base, img_path))

-                            if ocr_4_res.get('code') == 0 and ocr_4_res.get('result', {}).get('rtn') == 0:
-                                card_type = ocr_4_res.get('result', {}).get(
-                                    'idcard_distinguish_result', {}).get('result', -1)
+                id_card_dict[consts.IC_TURE_OR_FALSE] = consts.IC_RES_MAPPING.get(card_type)

-                            self.online_log.info(
-                                '{0} [ocr_4 success] [img_path={1}] [speed_time={2}]'.format(
-                                    self.log_base, img_path, speed_time))
-                            break
-                    else:
-                        self.online_log.warn(
-                            '{0} [ocr_4 failed] [img_path={1}]'.format(self.log_base, img_path))
-
-                    id_card_dict[consts.IC_TURE_OR_FALSE] = consts.IC_RES_MAPPING.get(card_type)
-                finally:
-                    if do_dda and isinstance(id_card_dict.get(consts.IC_KEY_FIELD[0]), str) and isinstance(id_card_dict.get(consts.IC_KEY_FIELD[1]), str):
-                        ic_name = id_card_dict[consts.IC_KEY_FIELD[0]].strip()
-                        ic_id = id_card_dict[consts.IC_KEY_FIELD[1]].strip()
-                        if len(ic_name) > 0 and len(ic_id) > 0:
-                            dda_id_bc_mapping.setdefault(consts.IC_FIELD, []).append((ic_name, ic_id, img_path))
+                if do_dda and isinstance(id_card_dict.get(consts.IC_KEY_FIELD[0]), str) and \
+                        isinstance(id_card_dict.get(consts.IC_KEY_FIELD[1]), str):
+                    ic_name = id_card_dict.get(consts.IC_KEY_FIELD[0], '').strip()
+                    ic_id = id_card_dict.get(consts.IC_KEY_FIELD[1], '').strip()
+                    if len(ic_name) > 0 and len(ic_id) > 0:
+                        dda_id_bc_mapping.setdefault(consts.IC_FIELD, []).append((ic_name, ic_id, img_path))
            license_summary.setdefault(classify, []).append(id_card_dict)
+        # 购车发票 & 二手车发票
        elif classify == consts.MVI_CLASSIFY or classify == consts.UCI_CLASSIFY:
            rebuild_data_dict = {}
+            position_dict = {}
            mvi_res = license_data.pop('result', {})
            for en_key, detail_dict in mvi_res.items():
                rebuild_data_dict[detail_dict.get('chinese_key', '')] = detail_dict.get('words', '')
+                position_dict[detail_dict.get('chinese_key', '')] = {
+                    consts.FIELD_POSITION_KEY: detail_dict.get('position', {})
+                }
+            rebuild_data_dict[consts.IMG_PATH_KEY] = img_path
+            rebuild_data_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path
+            rebuild_data_dict[consts.ALL_POSITION_KEY] = position_dict
            license_summary.setdefault(classify, []).append(rebuild_data_dict)
        # 其他
        else:
            for res_dict in license_data:
                res_dict[consts.IMG_PATH_KEY] = img_path
+                res_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path
            license_summary.setdefault(classify, []).extend(license_data)
        res_list.append((pno, ino, part_idx, consts.RES_SUCCESS))