31656b38 by 周伟奇

DDA part 1

1 parent 779dbb1a
...@@ -520,7 +520,7 @@ OTHER_TUPLE = (None, None, None, None, None, None, None, None, None, None, None, ...@@ -520,7 +520,7 @@ OTHER_TUPLE = (None, None, None, None, None, None, None, None, None, None, None,
520 # "35":"针式打印-部分格线-竖版-邮储银行", 520 # "35":"针式打印-部分格线-竖版-邮储银行",
521 # "36":"针式打印-部分格线-竖版-邮储银行-绿卡", 521 # "36":"针式打印-部分格线-竖版-邮储银行-绿卡",
522 522
523 # "38":"普通打印-无格线-农业银行-整数-特殊", 523 # "50":"普通打印-无格线-农业银行-整数-特殊",
524 524
525 CLASSIFY_LIST = [ 525 CLASSIFY_LIST = [
526 ('其他', OTHER_TUPLE), 526 ('其他', OTHER_TUPLE),
...@@ -563,6 +563,18 @@ CLASSIFY_LIST = [ ...@@ -563,6 +563,18 @@ CLASSIFY_LIST = [
563 ('针式打印-部分格线-竖版-邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)), 563 ('针式打印-部分格线-竖版-邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
564 ('针式打印-部分格线-竖版-邮储银行-绿卡', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)), 564 ('针式打印-部分格线-竖版-邮储银行-绿卡', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
565 ('其他', OTHER_TUPLE), 565 ('其他', OTHER_TUPLE),
566 ('其他', OTHER_TUPLE),
567 ('其他', OTHER_TUPLE),
568 ('其他', OTHER_TUPLE),
569 ('其他', OTHER_TUPLE),
570 ('其他', OTHER_TUPLE),
571 ('其他', OTHER_TUPLE),
572 ('其他', OTHER_TUPLE),
573 ('其他', OTHER_TUPLE),
574 ('其他', OTHER_TUPLE),
575 ('其他', OTHER_TUPLE),
576 ('其他', OTHER_TUPLE),
577 ('其他', OTHER_TUPLE),
566 578
567 ('普通打印-无格线-农业银行-整数-特殊', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)), 579 ('普通打印-无格线-农业银行-整数-特殊', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
568 ] 580 ]
...@@ -608,6 +620,18 @@ CLASSIFY_HEADER_LIST = [ ...@@ -608,6 +620,18 @@ CLASSIFY_HEADER_LIST = [
608 ('序号', '交易日期', '交易渠道', '摘要', '交易金额', '账户余额', '对方账号/卡号/汇票号', '原子账号', '交易机构名称'), 620 ('序号', '交易日期', '交易渠道', '摘要', '交易金额', '账户余额', '对方账号/卡号/汇票号', '原子账号', '交易机构名称'),
609 ('序号', '交易日期', '交易渠道', '摘要', '交易金额', '账户余额', '对方账号/卡号/汇票号', '原子账号', '交易机构名称'), 621 ('序号', '交易日期', '交易渠道', '摘要', '交易金额', '账户余额', '对方账号/卡号/汇票号', '原子账号', '交易机构名称'),
610 OTHER_TUPLE, 622 OTHER_TUPLE,
623 OTHER_TUPLE,
624 OTHER_TUPLE,
625 OTHER_TUPLE,
626 OTHER_TUPLE,
627 OTHER_TUPLE,
628 OTHER_TUPLE,
629 OTHER_TUPLE,
630 OTHER_TUPLE,
631 OTHER_TUPLE,
632 OTHER_TUPLE,
633 OTHER_TUPLE,
634 OTHER_TUPLE,
611 635
612 ('交易日期', '摘要/附言', '交易金额', '账户余额', '对方账号和户名'), 636 ('交易日期', '摘要/附言', '交易金额', '账户余额', '对方账号和户名'),
613 ] 637 ]
...@@ -812,7 +836,7 @@ MVI_FIELD_ORDER = (('发票代码', '发票代码'), ...@@ -812,7 +836,7 @@ MVI_FIELD_ORDER = (('发票代码', '发票代码'),
812 ('主管税务机关及代码', '主管税务机关及代码'), 836 ('主管税务机关及代码', '主管税务机关及代码'),
813 ('吨位', '吨位'), 837 ('吨位', '吨位'),
814 ('限乘人数', '限乘人数'),) 838 ('限乘人数', '限乘人数'),)
815 IC_PID = VAT_PID = VATS_PID = MVC_PID = MVI_PID = None 839 IC_PID = VAT_PID = VATS_PID = MVC_PID = MVI_PID = RP_PID = None
816 840
817 # 营业执照 841 # 营业执照
818 BL_CN_NAME = '营业执照' 842 BL_CN_NAME = '营业执照'
...@@ -916,6 +940,10 @@ BC_FIELD_ORDER = (('BankName', '发卡行名称'), ...@@ -916,6 +940,10 @@ BC_FIELD_ORDER = (('BankName', '发卡行名称'),
916 ('CardType', '银行卡类型'), 940 ('CardType', '银行卡类型'),
917 ('Name', '持卡人姓名'),) 941 ('Name', '持卡人姓名'),)
918 942
943 # DDA
944 DDA_CN_NAME = 'DDA'
945 DDA_CLASSIFY = 38
946
919 SUCCESS_CODE_SET = {'0', 0} 947 SUCCESS_CODE_SET = {'0', 0}
920 948
921 FIELD_ORDER_MAP = { 949 FIELD_ORDER_MAP = {
...@@ -941,7 +969,7 @@ MODEL_FIELD_VAT = 'vat_count' ...@@ -941,7 +969,7 @@ MODEL_FIELD_VAT = 'vat_count'
941 969
942 LICENSE_ORDER = ((MVI_CLASSIFY, (MVI_PID, MVI_CN_NAME, MVI_FIELD_ORDER, False, False, MODEL_FIELD_MVI)), 970 LICENSE_ORDER = ((MVI_CLASSIFY, (MVI_PID, MVI_CN_NAME, MVI_FIELD_ORDER, False, False, MODEL_FIELD_MVI)),
943 (IC_CLASSIFY, (IC_PID, IC_CN_NAME, None, True, False, MODEL_FIELD_IC)), 971 (IC_CLASSIFY, (IC_PID, IC_CN_NAME, None, True, False, MODEL_FIELD_IC)),
944 (RP_CLASSIFY, (None, RP_CN_NAME, None, True, False, MODEL_FIELD_RP)), 972 (RP_CLASSIFY, (RP_PID, RP_CN_NAME, None, True, False, MODEL_FIELD_RP)),
945 (BC_CLASSIFY, (BC_PID, BC_CN_NAME, BC_FIELD_ORDER, False, False, MODEL_FIELD_BC)), 973 (BC_CLASSIFY, (BC_PID, BC_CN_NAME, BC_FIELD_ORDER, False, False, MODEL_FIELD_BC)),
946 (BL_CLASSIFY, (BL_PID, BL_CN_NAME, BL_FIELD_ORDER, False, False, MODEL_FIELD_BL)), 974 (BL_CLASSIFY, (BL_PID, BL_CN_NAME, BL_FIELD_ORDER, False, False, MODEL_FIELD_BL)),
947 (UCI_CLASSIFY, (UCI_PID, UCI_CN_NAME, UCI_FIELD_ORDER, False, False, MODEL_FIELD_UCI)), 975 (UCI_CLASSIFY, (UCI_PID, UCI_CN_NAME, UCI_FIELD_ORDER, False, False, MODEL_FIELD_UCI)),
...@@ -960,12 +988,12 @@ FOLDER_LICENSE_ORDER = ((MVI_CLASSIFY, (MVI_PID, MVI_CN_NAME, MVI_FIELD_ORDER, F ...@@ -960,12 +988,12 @@ FOLDER_LICENSE_ORDER = ((MVI_CLASSIFY, (MVI_PID, MVI_CN_NAME, MVI_FIELD_ORDER, F
960 LICENSE_CLASSIFY_MAPPING = dict(LICENSE_ORDER) 988 LICENSE_CLASSIFY_MAPPING = dict(LICENSE_ORDER)
961 989
962 OTHER_CLASSIFY_SET = {OTHER_CLASSIFY} 990 OTHER_CLASSIFY_SET = {OTHER_CLASSIFY}
963 LICENSE_CLASSIFY_SET_1 = {IC_CLASSIFY, VAT_CLASSIFY, MVC_CLASSIFY, MVI_CLASSIFY, UCI_CLASSIFY} 991 LICENSE_CLASSIFY_SET_1 = {IC_CLASSIFY, VAT_CLASSIFY, MVC_CLASSIFY, MVI_CLASSIFY, UCI_CLASSIFY, DDA_CLASSIFY}
964 LICENSE_CLASSIFY_SET_2 = {BL_CLASSIFY, EEP_CLASSIFY, DL_CLASSIFY, PP_CLASSIFY, BC_CLASSIFY} 992 LICENSE_CLASSIFY_SET_2 = {BL_CLASSIFY, EEP_CLASSIFY, DL_CLASSIFY, PP_CLASSIFY, BC_CLASSIFY}
965 993
966 NYYH_CLASSIFY = {17, 18} 994 NYYH_CLASSIFY = {17, 18}
967 NYZS_CLASSIFY = 18 995 NYZS_CLASSIFY = 18
968 SPECIAL_NYZS_CLASSIFY = 38 996 SPECIAL_NYZS_CLASSIFY = 50
969 MS_CLASSIFY = 21 997 MS_CLASSIFY = 21
970 MS_ERROR_COL = (5, 6) 998 MS_ERROR_COL = (5, 6)
971 WECHART_CLASSIFY = 12 999 WECHART_CLASSIFY = 12
...@@ -1163,5 +1191,24 @@ UC_ORDER = ('vinNo', 'manufactureDate', 'firstRegistrationDate') ...@@ -1163,5 +1191,24 @@ UC_ORDER = ('vinNo', 'manufactureDate', 'firstRegistrationDate')
1163 CO_ORDER = ('customerType', 'customerChineseName', 'legalRepName', 'idNum', 'businessLicenseNo', 'taxRegistrationCode', 1191 CO_ORDER = ('customerType', 'customerChineseName', 'legalRepName', 'idNum', 'businessLicenseNo', 'taxRegistrationCode',
1164 'incorporationDate', 'businessLicenseDueDate', 'capitalRegAmount') 1192 'incorporationDate', 'businessLicenseDueDate', 'capitalRegAmount')
1165 1193
1194 # --------------- DDA 保存图片 --------------------
1195 DDA_FIELD = 'DDA'
1196 IC_FIELD = 'ID'
1197 BC_FIELD = 'BC'
1198
1199 IC_KEY_FIELD = ('姓名', '公民身份号码')
1200 BC_KEY_FIELD = 'CardNum'
1166 1201
1202 DDA_IC_NAME = 'customer_name'
1203 DDA_IC_ID = 'customer_id'
1204 DDA_BC_NAME = 'account_name'
1205 DDA_BC_ID = 'account_id'
1206 DDA_IMG_PATH = 'img_path'
1207
1208
1209 DDA_MAPPING = [
1210 (DDA_IC_NAME, IC_FIELD),
1211 (DDA_IC_ID, IC_FIELD),
1212 (DDA_BC_ID, BC_FIELD),
1213 ]
1167 1214
......
...@@ -189,13 +189,23 @@ class Command(BaseCommand, LoggerMixin): ...@@ -189,13 +189,23 @@ class Command(BaseCommand, LoggerMixin):
189 else: 189 else:
190 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) 190 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY))
191 191
192 def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path): 192 def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path, is_hil, hil_id_bc_mapping):
193 # 类别:'0'身份证, '1'居住证 193 # 类别:'0'身份证, '1'居住证
194 license_data = ocr_data.get('data', []) 194 license_data = ocr_data.get('data')
195 if not license_data: 195 if not license_data:
196 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) 196 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY))
197 return 197 return
198 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS)) 198 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS))
199 if classify == consts.DDA_CLASSIFY: # DDA处理
200 dda_ocr_result = {
201 consts.DDA_IC_NAME: license_data.get('result', {}).get(consts.DDA_IC_NAME, {}).get('words', ''),
202 consts.DDA_IC_ID: license_data.get('result', {}).get(consts.DDA_IC_ID, {}).get('words', ''),
203 consts.DDA_BC_NAME: license_data.get('result', {}).get(consts.DDA_BC_NAME, {}).get('words', ''),
204 consts.DDA_BC_ID: license_data.get('result', {}).get(consts.DDA_BC_ID, {}).get('words', ''),
205 consts.DDA_IMG_PATH: img_path
206 }
207 license_summary.setdefault(classify, []).append(dda_ocr_result)
208
199 if classify == consts.MVC_CLASSIFY: # 车辆登记证 3/4页结果整合 209 if classify == consts.MVC_CLASSIFY: # 车辆登记证 3/4页结果整合
200 for mvc_dict in license_data: 210 for mvc_dict in license_data:
201 try: 211 try:
...@@ -231,6 +241,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -231,6 +241,8 @@ class Command(BaseCommand, LoggerMixin):
231 mvc_dict['解除抵押日期'].append( 241 mvc_dict['解除抵押日期'].append(
232 register_info.get('details', {}).get('date', {}).get('words', '')) 242 register_info.get('details', {}).get('date', {}).get('words', ''))
233 del mvc_res 243 del mvc_res
244 license_summary.setdefault(classify, []).extend(license_data)
245
234 if classify == consts.IC_CLASSIFY: # 身份证真伪 246 if classify == consts.IC_CLASSIFY: # 身份证真伪
235 for id_card_dict in license_data: 247 for id_card_dict in license_data:
236 try: 248 try:
...@@ -277,9 +289,18 @@ class Command(BaseCommand, LoggerMixin): ...@@ -277,9 +289,18 @@ class Command(BaseCommand, LoggerMixin):
277 '{0} [ocr_4 failed] [img_path={1}]'.format(self.log_base, img_path)) 289 '{0} [ocr_4 failed] [img_path={1}]'.format(self.log_base, img_path))
278 290
279 id_card_dict[consts.IC_TURE_OR_FALSE] = consts.IC_RES_MAPPING.get(card_type) 291 id_card_dict[consts.IC_TURE_OR_FALSE] = consts.IC_RES_MAPPING.get(card_type)
280 license_summary.setdefault(classify, []).extend(license_data) 292 finally:
281 293 if is_hil:
282 def license2_process(self, ocr_res_2, license_summary, pid, classify, res_list, pno, ino, part_idx): 294 for key in consts.IC_KEY_FIELD:
295 if not isinstance(id_card_dict.get(key), str):
296 break
297 value = id_card_dict[key].strip()
298 if len(value) > 0:
299 hil_id_bc_mapping.setdefault(consts.IC_FIELD, dict()).setdefault(
300 value, set()).add(img_path)
301 license_summary.setdefault(classify, []).extend(license_data)
302
303 def license2_process(self, ocr_res_2, license_summary, pid, classify, res_list, pno, ino, part_idx, img_path, is_hil, hil_id_bc_mapping):
283 if ocr_res_2.get('ErrorCode') in consts.SUCCESS_CODE_SET: 304 if ocr_res_2.get('ErrorCode') in consts.SUCCESS_CODE_SET:
284 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS)) 305 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS))
285 if pid == consts.BC_PID: 306 if pid == consts.BC_PID:
...@@ -288,6 +309,11 @@ class Command(BaseCommand, LoggerMixin): ...@@ -288,6 +309,11 @@ class Command(BaseCommand, LoggerMixin):
288 # for en_key, chn_key in consts.BC_FIELD: 309 # for en_key, chn_key in consts.BC_FIELD:
289 # res_dict[chn_key] = ocr_res_2.get(en_key, '') 310 # res_dict[chn_key] = ocr_res_2.get(en_key, '')
290 license_summary.setdefault(classify, []).append(ocr_res_2) 311 license_summary.setdefault(classify, []).append(ocr_res_2)
312 if is_hil and isinstance(ocr_res_2.get(consts.BC_KEY_FIELD), str):
313 value = ocr_res_2[consts.BC_KEY_FIELD].strip()
314 if len(value) > 0:
315 hil_id_bc_mapping.setdefault(consts.BC_FIELD, dict()).setdefault(
316 value, set()).add(img_path)
291 else: 317 else:
292 # 营业执照等 318 # 营业执照等
293 for result_dict in ocr_res_2.get('ResultList', []): 319 for result_dict in ocr_res_2.get('ResultList', []):
...@@ -696,6 +722,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -696,6 +722,8 @@ class Command(BaseCommand, LoggerMixin):
696 business_type, doc_id_str = task_str.split(consts.SPLIT_STR) 722 business_type, doc_id_str = task_str.split(consts.SPLIT_STR)
697 doc_id = int(doc_id_str) 723 doc_id = int(doc_id_str)
698 doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc 724 doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc
725 is_hil = True if business_type == consts.HIL_PREFIX else False
726 hil_id_bc_mapping = dict()
699 727
700 doc_data_path = os.path.join(self.data_dir, business_type, consts.TMP_DIR_NAME, doc_id_str) 728 doc_data_path = os.path.join(self.data_dir, business_type, consts.TMP_DIR_NAME, doc_id_str)
701 excel_path = os.path.join(doc_data_path, '{0}.xlsx'.format(doc_id_str)) 729 excel_path = os.path.join(doc_data_path, '{0}.xlsx'.format(doc_id_str))
...@@ -742,8 +770,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -742,8 +770,8 @@ class Command(BaseCommand, LoggerMixin):
742 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_OTHER)) 770 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_OTHER))
743 continue 771 continue
744 elif classify in consts.LICENSE_CLASSIFY_SET_1: # 证件1 772 elif classify in consts.LICENSE_CLASSIFY_SET_1: # 证件1
745 self.license1_process(ocr_data, license_summary, classify, res_list, 773 self.license1_process(ocr_data, license_summary, classify, res_list, pno,
746 pno, ino, part_idx, img_path) 774 ino, part_idx, img_path, is_hil, hil_id_bc_mapping)
747 elif classify in consts.LICENSE_CLASSIFY_SET_2: # 证件2 775 elif classify in consts.LICENSE_CLASSIFY_SET_2: # 证件2
748 pid, _, _, _, _, _ = consts.LICENSE_CLASSIFY_MAPPING.get(classify) 776 pid, _, _, _, _, _ = consts.LICENSE_CLASSIFY_MAPPING.get(classify)
749 file_data = ocr_data.get('section_img') 777 file_data = ocr_data.get('section_img')
...@@ -788,7 +816,9 @@ class Command(BaseCommand, LoggerMixin): ...@@ -788,7 +816,9 @@ class Command(BaseCommand, LoggerMixin):
788 card_name_res.get('data', {}).get('is_exists_name') == 0: 816 card_name_res.get('data', {}).get('is_exists_name') == 0:
789 name = '无' 817 name = '无'
790 ocr_2_res['Name'] = name 818 ocr_2_res['Name'] = name
791 self.license2_process(ocr_2_res, license_summary, pid, classify, res_list, pno, ino, part_idx) 819 self.license2_process(ocr_2_res, license_summary, pid, classify,
820 res_list, pno, ino, part_idx, img_path,
821 is_hil, hil_id_bc_mapping)
792 break 822 break
793 else: 823 else:
794 res_list.append((pno, ino, part_idx, consts.RES_FAILED_2)) 824 res_list.append((pno, ino, part_idx, consts.RES_FAILED_2))
...@@ -890,6 +920,29 @@ class Command(BaseCommand, LoggerMixin): ...@@ -890,6 +920,29 @@ class Command(BaseCommand, LoggerMixin):
890 os.remove(excel_path) 920 os.remove(excel_path)
891 finally: 921 finally:
892 # TODO 识别结果存一张表,方便跑报表 922 # TODO 识别结果存一张表,方便跑报表
923
924 # DDA处理
925 if is_hil:
926 # 获取需要保存图片的集合
927 hil_dda_save_img_list = []
928 for dda_res_list in license_summary.get(consts.DDA_CLASSIFY, []):
929 for dda_idx, dda_res in enumerate(dda_res_list):
930 save_img_dict = {
931 consts.DDA_FIELD: dda_res.get(consts.DDA_IMG_PATH)
932 }
933 for dda_field, license_field in consts.DDA_MAPPING:
934 target_str = dda_res.get(dda_field, '')
935 save_img_dict.setdefault(license_field, set()).update(
936 hil_id_bc_mapping.get(license_field, dict()).get(target_str, set()))
937 hil_dda_save_img_list.append(save_img_dict)
938 self.online_log.info('{0} [DDA process] [DDA_info={1}]'.format(self.log_base, license_summary.get(consts.DDA_CLASSIFY, [])))
939 self.online_log.info('{0} [DDA process] [ic&bc_info={1}]'.format(self.log_base, hil_id_bc_mapping))
940 self.online_log.info('{0} [DDA process] [img_path={1}]'.format(self.log_base, hil_dda_save_img_list))
941 # 保存图片
942 # 数据库记录
943 # report记录
944
945 # CA比对
893 if doc.document_scheme == consts.DOC_SCHEME_LIST[0]: 946 if doc.document_scheme == consts.DOC_SCHEME_LIST[0]:
894 try: 947 try:
895 # 更新OCR累计识别结果表 948 # 更新OCR累计识别结果表
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!