DDA part 1

周伟奇
Showing 2 changed files with 111 additions and 11 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/ocr_process.py
--- a/src/apps/doc/consts.py
View file @31656b3
+++ b/src/apps/doc/consts.py
View file @31656b3
@@ -520,7 +520,7 @@ OTHER_TUPLE = (None, None, None, None, None, None, None, None, None, None, None,
 #     "35":"针式打印-部分格线-竖版-邮储银行",
 #     "36":"针式打印-部分格线-竖版-邮储银行-绿卡",

-#     "38":"普通打印-无格线-农业银行-整数-特殊",
+#     "50":"普通打印-无格线-农业银行-整数-特殊",

 CLASSIFY_LIST = [
    ('其他', OTHER_TUPLE),
@@ -563,6 +563,18 @@ CLASSIFY_LIST = [
    ('针式打印-部分格线-竖版-邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
    ('针式打印-部分格线-竖版-邮储银行-绿卡', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
    ('其他', OTHER_TUPLE),
+    ('其他', OTHER_TUPLE),
+    ('其他', OTHER_TUPLE),
+    ('其他', OTHER_TUPLE),
+    ('其他', OTHER_TUPLE),
+    ('其他', OTHER_TUPLE),
+    ('其他', OTHER_TUPLE),
+    ('其他', OTHER_TUPLE),
+    ('其他', OTHER_TUPLE),
+    ('其他', OTHER_TUPLE),
+    ('其他', OTHER_TUPLE),
+    ('其他', OTHER_TUPLE),
+    ('其他', OTHER_TUPLE),

    ('普通打印-无格线-农业银行-整数-特殊', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
 ]
@@ -608,6 +620,18 @@ CLASSIFY_HEADER_LIST = [
    ('序号', '交易日期', '交易渠道', '摘要', '交易金额', '账户余额', '对方账号/卡号/汇票号', '原子账号', '交易机构名称'),
    ('序号', '交易日期', '交易渠道', '摘要', '交易金额', '账户余额', '对方账号/卡号/汇票号', '原子账号', '交易机构名称'),
    OTHER_TUPLE,
+    OTHER_TUPLE,
+    OTHER_TUPLE,
+    OTHER_TUPLE,
+    OTHER_TUPLE,
+    OTHER_TUPLE,
+    OTHER_TUPLE,
+    OTHER_TUPLE,
+    OTHER_TUPLE,
+    OTHER_TUPLE,
+    OTHER_TUPLE,
+    OTHER_TUPLE,
+    OTHER_TUPLE,

    ('交易日期', '摘要/附言', '交易金额', '账户余额', '对方账号和户名'),
 ]
@@ -812,7 +836,7 @@ MVI_FIELD_ORDER = (('发票代码', '发票代码'),
                   ('主管税务机关及代码', '主管税务机关及代码'),
                   ('吨位', '吨位'),
                   ('限乘人数', '限乘人数'),)
-IC_PID = VAT_PID = VATS_PID = MVC_PID = MVI_PID = None
+IC_PID = VAT_PID = VATS_PID = MVC_PID = MVI_PID = RP_PID = None

 # 营业执照
 BL_CN_NAME = '营业执照'
@@ -916,6 +940,10 @@ BC_FIELD_ORDER = (('BankName', '发卡行名称'),
                  ('CardType', '银行卡类型'),
                  ('Name', '持卡人姓名'),)

+# DDA
+DDA_CN_NAME = 'DDA'
+DDA_CLASSIFY = 38
+
 SUCCESS_CODE_SET = {'0', 0}

 FIELD_ORDER_MAP = {
@@ -941,7 +969,7 @@ MODEL_FIELD_VAT = 'vat_count'

 LICENSE_ORDER = ((MVI_CLASSIFY, (MVI_PID, MVI_CN_NAME, MVI_FIELD_ORDER, False, False, MODEL_FIELD_MVI)),
                 (IC_CLASSIFY, (IC_PID, IC_CN_NAME, None, True, False, MODEL_FIELD_IC)),
-                 (RP_CLASSIFY, (None, RP_CN_NAME, None, True, False, MODEL_FIELD_RP)),
+                 (RP_CLASSIFY, (RP_PID, RP_CN_NAME, None, True, False, MODEL_FIELD_RP)),
                 (BC_CLASSIFY, (BC_PID, BC_CN_NAME, BC_FIELD_ORDER, False, False, MODEL_FIELD_BC)),
                 (BL_CLASSIFY, (BL_PID, BL_CN_NAME, BL_FIELD_ORDER, False, False, MODEL_FIELD_BL)),
                 (UCI_CLASSIFY, (UCI_PID, UCI_CN_NAME, UCI_FIELD_ORDER, False, False, MODEL_FIELD_UCI)),
@@ -960,12 +988,12 @@ FOLDER_LICENSE_ORDER = ((MVI_CLASSIFY, (MVI_PID, MVI_CN_NAME, MVI_FIELD_ORDER, F
 LICENSE_CLASSIFY_MAPPING = dict(LICENSE_ORDER)

 OTHER_CLASSIFY_SET = {OTHER_CLASSIFY}
-LICENSE_CLASSIFY_SET_1 = {IC_CLASSIFY, VAT_CLASSIFY, MVC_CLASSIFY, MVI_CLASSIFY, UCI_CLASSIFY}
+LICENSE_CLASSIFY_SET_1 = {IC_CLASSIFY, VAT_CLASSIFY, MVC_CLASSIFY, MVI_CLASSIFY, UCI_CLASSIFY, DDA_CLASSIFY}
 LICENSE_CLASSIFY_SET_2 = {BL_CLASSIFY, EEP_CLASSIFY, DL_CLASSIFY, PP_CLASSIFY, BC_CLASSIFY}

 NYYH_CLASSIFY = {17, 18}
 NYZS_CLASSIFY = 18
-SPECIAL_NYZS_CLASSIFY = 38
+SPECIAL_NYZS_CLASSIFY = 50
 MS_CLASSIFY = 21
 MS_ERROR_COL = (5, 6)
 WECHART_CLASSIFY = 12
@@ -1163,5 +1191,24 @@ UC_ORDER = ('vinNo', 'manufactureDate', 'firstRegistrationDate')
 CO_ORDER = ('customerType', 'customerChineseName', 'legalRepName', 'idNum', 'businessLicenseNo', 'taxRegistrationCode',
            'incorporationDate', 'businessLicenseDueDate', 'capitalRegAmount')

+# --------------- DDA 保存图片 --------------------
+DDA_FIELD = 'DDA'
+IC_FIELD = 'ID'
+BC_FIELD = 'BC'
+
+IC_KEY_FIELD = ('姓名', '公民身份号码')
+BC_KEY_FIELD = 'CardNum'

+DDA_IC_NAME = 'customer_name'
+DDA_IC_ID = 'customer_id'
+DDA_BC_NAME = 'account_name'
+DDA_BC_ID = 'account_id'
+DDA_IMG_PATH = 'img_path'
+
+
+DDA_MAPPING = [
+    (DDA_IC_NAME, IC_FIELD),
+    (DDA_IC_ID, IC_FIELD),
+    (DDA_BC_ID, BC_FIELD),
+]

--- a/src/apps/doc/management/commands/ocr_process.py
View file @31656b3
+++ b/src/apps/doc/management/commands/ocr_process.py
View file @31656b3
@@ -189,13 +189,23 @@ class Command(BaseCommand, LoggerMixin):
        else:
            res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY))

-    def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path):
+    def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path, is_hil, hil_id_bc_mapping):
        # 类别：'0'身份证， '1'居住证
-        license_data = ocr_data.get('data', [])
+        license_data = ocr_data.get('data')
        if not license_data:
            res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY))
            return
        res_list.append((pno, ino, part_idx, consts.RES_SUCCESS))
+        if classify == consts.DDA_CLASSIFY:  # DDA处理
+            dda_ocr_result = {
+                consts.DDA_IC_NAME: license_data.get('result', {}).get(consts.DDA_IC_NAME, {}).get('words', ''),
+                consts.DDA_IC_ID: license_data.get('result', {}).get(consts.DDA_IC_ID, {}).get('words', ''),
+                consts.DDA_BC_NAME: license_data.get('result', {}).get(consts.DDA_BC_NAME, {}).get('words', ''),
+                consts.DDA_BC_ID: license_data.get('result', {}).get(consts.DDA_BC_ID, {}).get('words', ''),
+                consts.DDA_IMG_PATH: img_path
+            }
+            license_summary.setdefault(classify, []).append(dda_ocr_result)
+
        if classify == consts.MVC_CLASSIFY:  # 车辆登记证 3/4页结果整合
            for mvc_dict in license_data:
                try:
@@ -231,6 +241,8 @@ class Command(BaseCommand, LoggerMixin):
                                mvc_dict['解除抵押日期'].append(
                                    register_info.get('details', {}).get('date', {}).get('words', ''))
                        del mvc_res
+            license_summary.setdefault(classify, []).extend(license_data)
+
        if classify == consts.IC_CLASSIFY:  # 身份证真伪
            for id_card_dict in license_data:
                try:
@@ -277,9 +289,18 @@ class Command(BaseCommand, LoggerMixin):
                            '{0} [ocr_4 failed] [img_path={1}]'.format(self.log_base, img_path))

                    id_card_dict[consts.IC_TURE_OR_FALSE] = consts.IC_RES_MAPPING.get(card_type)
+                finally:
+                    if is_hil:
+                        for key in consts.IC_KEY_FIELD:
+                            if not isinstance(id_card_dict.get(key), str):
+                                break
+                            value = id_card_dict[key].strip()
+                            if len(value) > 0:
+                                hil_id_bc_mapping.setdefault(consts.IC_FIELD, dict()).setdefault(
+                                    value, set()).add(img_path)
            license_summary.setdefault(classify, []).extend(license_data)

-    def license2_process(self, ocr_res_2, license_summary, pid, classify, res_list, pno, ino, part_idx):
+    def license2_process(self, ocr_res_2, license_summary, pid, classify, res_list, pno, ino, part_idx, img_path, is_hil, hil_id_bc_mapping):
        if ocr_res_2.get('ErrorCode') in consts.SUCCESS_CODE_SET:
            res_list.append((pno, ino, part_idx, consts.RES_SUCCESS))
            if pid == consts.BC_PID:
@@ -288,6 +309,11 @@ class Command(BaseCommand, LoggerMixin):
                # for en_key, chn_key in consts.BC_FIELD:
                #     res_dict[chn_key] = ocr_res_2.get(en_key, '')
                license_summary.setdefault(classify, []).append(ocr_res_2)
+                if is_hil and isinstance(ocr_res_2.get(consts.BC_KEY_FIELD), str):
+                    value = ocr_res_2[consts.BC_KEY_FIELD].strip()
+                    if len(value) > 0:
+                        hil_id_bc_mapping.setdefault(consts.BC_FIELD, dict()).setdefault(
+                            value, set()).add(img_path)
            else:
                # 营业执照等
                for result_dict in ocr_res_2.get('ResultList', []):
@@ -696,6 +722,8 @@ class Command(BaseCommand, LoggerMixin):
                business_type, doc_id_str = task_str.split(consts.SPLIT_STR)
                doc_id = int(doc_id_str)
                doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc
+                is_hil = True if business_type == consts.HIL_PREFIX else False
+                hil_id_bc_mapping = dict()

                doc_data_path = os.path.join(self.data_dir, business_type, consts.TMP_DIR_NAME, doc_id_str)
                excel_path = os.path.join(doc_data_path, '{0}.xlsx'.format(doc_id_str))
@@ -742,8 +770,8 @@ class Command(BaseCommand, LoggerMixin):
                                            res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_OTHER))
                                            continue
                                        elif classify in consts.LICENSE_CLASSIFY_SET_1:  # 证件1
-                                            self.license1_process(ocr_data, license_summary, classify, res_list,
-                                                                  pno, ino, part_idx, img_path)
+                                            self.license1_process(ocr_data, license_summary, classify, res_list, pno,
+                                                                  ino, part_idx, img_path, is_hil, hil_id_bc_mapping)
                                        elif classify in consts.LICENSE_CLASSIFY_SET_2:  # 证件2
                                            pid, _, _, _, _, _ = consts.LICENSE_CLASSIFY_MAPPING.get(classify)
                                            file_data = ocr_data.get('section_img')
@@ -788,7 +816,9 @@ class Command(BaseCommand, LoggerMixin):
                                                                    card_name_res.get('data', {}).get('is_exists_name') == 0:
                                                                name = '无'
                                                        ocr_2_res['Name'] = name
-                                                    self.license2_process(ocr_2_res, license_summary, pid, classify, res_list, pno, ino, part_idx)
+                                                    self.license2_process(ocr_2_res, license_summary, pid, classify,
+                                                                          res_list, pno, ino, part_idx, img_path,
+                                                                          is_hil, hil_id_bc_mapping)
                                                    break
                                            else:
                                                res_list.append((pno, ino, part_idx, consts.RES_FAILED_2))
@@ -890,6 +920,29 @@ class Command(BaseCommand, LoggerMixin):
                                    os.remove(excel_path)
                        finally:
                            # TODO 识别结果存一张表，方便跑报表
+
+                            # DDA处理
+                            if is_hil:
+                                # 获取需要保存图片的集合
+                                hil_dda_save_img_list = []
+                                for dda_res_list in license_summary.get(consts.DDA_CLASSIFY, []):
+                                    for dda_idx, dda_res in enumerate(dda_res_list):
+                                        save_img_dict = {
+                                            consts.DDA_FIELD: dda_res.get(consts.DDA_IMG_PATH)
+                                        }
+                                        for dda_field, license_field in consts.DDA_MAPPING:
+                                            target_str = dda_res.get(dda_field, '')
+                                            save_img_dict.setdefault(license_field, set()).update(
+                                                hil_id_bc_mapping.get(license_field, dict()).get(target_str, set()))
+                                        hil_dda_save_img_list.append(save_img_dict)
+                                self.online_log.info('{0} [DDA process] [DDA_info={1}]'.format(self.log_base, license_summary.get(consts.DDA_CLASSIFY, [])))
+                                self.online_log.info('{0} [DDA process] [ic&bc_info={1}]'.format(self.log_base, hil_id_bc_mapping))
+                                self.online_log.info('{0} [DDA process] [img_path={1}]'.format(self.log_base, hil_dda_save_img_list))
+                                # 保存图片
+                                # 数据库记录
+                                # report记录
+
+                            # CA比对
                            if doc.document_scheme == consts.DOC_SCHEME_LIST[0]:
                                try:
                                    # 更新OCR累计识别结果表