3586d37a by 周伟奇

add ltgt

1 parent 634fd497
...@@ -832,6 +832,16 @@ MVC_SE_FIELD_ORDER_3_4 = ( ...@@ -832,6 +832,16 @@ MVC_SE_FIELD_ORDER_3_4 = (
832 832
833 ('解除抵押日期', '解除抵押日期'), 833 ('解除抵押日期', '解除抵押日期'),
834 ) 834 )
835
836 MVC_SE_FIELD_ORDER_1_2_LTGT = (('1.机动车所有人/身份证名称/号码', '机动车所有人/身份证明名称/号码'),
837 ('编号', '机动车登记证书编号'),)
838
839 MVC_SE_FIELD_ORDER_3_4_LTGT = (
840 ('身份证名称/号码', '身份证明名称/号码'),
841 ('机动车登记证书编号', '机动车登记证书编号'),
842 ('抵押登记日期', '抵押登记日期'),
843 )
844
835 # 机动车销售统一发票 845 # 机动车销售统一发票
836 MVI_CN_NAME = '机动车销售统一发票' 846 MVI_CN_NAME = '机动车销售统一发票'
837 MVI_CLASSIFY = 29 847 MVI_CLASSIFY = 29
...@@ -1421,6 +1431,23 @@ SE_DDA_FIELD = ['applicationId(1)', 'applicationId(2)', 'bankName', 'companyName ...@@ -1421,6 +1431,23 @@ SE_DDA_FIELD = ['applicationId(1)', 'applicationId(2)', 'bankName', 'companyName
1421 1431
1422 ASP_KEY = 'is_asp' 1432 ASP_KEY = 'is_asp'
1423 1433
1434
1435 AFC_CON_MAP_LTGT = {
1436 '合同编号': (1, 1, '合同编号', None),
1437 '借款人姓名': (2, 2, '借款人及抵押人', 'name'),
1438 '共借人姓名': (2, 2, '共同借款人及共同抵押人', 'name'),
1439 '保证人姓名1': (2, 2, '保证人1', 'name'),
1440 '保证人姓名2': (2, 2, '保证人2', 'name'),
1441 }
1442
1443 AFC_CON_FIELD_ORDER_LTGT = (
1444 ('合同编号', '合同编号'),
1445 ('借款人姓名', '借款人姓名'),
1446 ('共借人姓名', '共借人姓名'),
1447 ('保证人姓名1', '保证人姓名1'),
1448 ('保证人姓名2', '保证人姓名2'),
1449 )
1450
1424 SE_AFC_CON_MAP = { 1451 SE_AFC_CON_MAP = {
1425 '合同编号-每页': (None, None, '合同编号', None), 1452 '合同编号-每页': (None, None, '合同编号', None),
1426 '所购车辆价格-小写-重要条款': (1, 1, '所购车辆价格', None), 1453 '所购车辆价格-小写-重要条款': (1, 1, '所购车辆价格', None),
......
...@@ -58,10 +58,11 @@ class Command(BaseCommand, LoggerMixin): ...@@ -58,10 +58,11 @@ class Command(BaseCommand, LoggerMixin):
58 130: '民事调解书' 58 130: '民事调解书'
59 } 59 }
60 self.sheet_content = { 60 self.sheet_content = {
61 128: ('执行裁定书', ('承办法院', '案号/标号', '被执行人', '债权金额', '诉讼时间')), 61 128: ['执行裁定书', ['承办法院', '案号/标号', '被执行人', '债权金额', '诉讼时间']],
62 129: ('民事判决书', ('承办法院', '案号/标号', '被告', '判决结果: 贷款本金', '判决结果: 罚息', '判决结果: 律师费', '判决结果: 案件受理费', '诉讼时间')), 62 129: ['民事判决书', ['承办法院', '案号/标号', '被告', '判决结果: 贷款本金', '判决结果: 罚息', '判决结果: 律师费', '判决结果: 案件受理费', '诉讼时间']],
63 130: ('民事调解书', ('承办法院', '案号/标号', '被告', '协议内容: 支付金额', '协议内容: 案件受理费', '诉讼时间')), 63 130: ['民事调解书', ['承办法院', '案号/标号', '被告', '协议内容: 支付金额', '协议内容: 案件受理费', '诉讼时间']],
64 } 64 }
65 self.FILE_KEY = 'file'
65 self.DATE_KEY = 'date' 66 self.DATE_KEY = 'date'
66 self.CLASSIFY_KEY = 'classify' 67 self.CLASSIFY_KEY = 'classify'
67 self.RESULT_KEY = 'result' 68 self.RESULT_KEY = 'result'
...@@ -84,6 +85,14 @@ class Command(BaseCommand, LoggerMixin): ...@@ -84,6 +85,14 @@ class Command(BaseCommand, LoggerMixin):
84 consts.IC_CLASSIFY: (consts.IC_CN_NAME, '有效期限', consts.IC_FIELD_ORDER_3, consts.IC_FIELD_ORDER_2), 85 consts.IC_CLASSIFY: (consts.IC_CN_NAME, '有效期限', consts.IC_FIELD_ORDER_3, consts.IC_FIELD_ORDER_2),
85 consts.MVC_CLASSIFY: (consts.MVC_CN_NAME, '机动车登记证书编号', consts.MVC_SE_FIELD_ORDER_3_4, consts.MVC_SE_FIELD_ORDER_1_2), 86 consts.MVC_CLASSIFY: (consts.MVC_CN_NAME, '机动车登记证书编号', consts.MVC_SE_FIELD_ORDER_3_4, consts.MVC_SE_FIELD_ORDER_1_2),
86 } 87 }
88 self.field_map_2 = {
89 # sheet_name, key_field, side_field_order, src_field_order
90 consts.CONTRACT_CLASSIFY: (consts.CONTRACT_CN_NAME, None, None, consts.AFC_CON_FIELD_ORDER_LTGT),
91 consts.VAT_CLASSIFY: (consts.VAT_CN_NAME, None, None, consts.VATS_FIELD_ORDER),
92 consts.IC_CLASSIFY: (consts.IC_CN_NAME, '有效期限', consts.IC_FIELD_ORDER_3, consts.IC_FIELD_ORDER_2),
93 consts.MVC_CLASSIFY: (
94 consts.MVC_CN_NAME, '机动车登记证书编号', consts.MVC_SE_FIELD_ORDER_3_4_LTGT, consts.MVC_SE_FIELD_ORDER_1_2_LTGT),
95 }
87 # ocr相关 96 # ocr相关
88 self.ocr_url = conf.OCR_URL_FOLDER 97 self.ocr_url = conf.OCR_URL_FOLDER
89 self.ocr_url_2 = conf.OCR2_URL_FOLDER 98 self.ocr_url_2 = conf.OCR2_URL_FOLDER
...@@ -95,7 +104,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -95,7 +104,7 @@ class Command(BaseCommand, LoggerMixin):
95 def signal_handler(self, sig, frame): 104 def signal_handler(self, sig, frame):
96 self.switch = False # 停止处理文件 105 self.switch = False # 停止处理文件
97 106
98 def contract_process(self, ocr_data, contract_result, classify): 107 def contract_process(self, ocr_data, contract_result, classify, rebuild_contract_result):
99 contract_dict = ocr_data.get('data') 108 contract_dict = ocr_data.get('data')
100 if not contract_dict or contract_dict.get('page_num') is None or contract_dict.get('page_info') is None: 109 if not contract_dict or contract_dict.get('page_num') is None or contract_dict.get('page_info') is None:
101 return 110 return
...@@ -135,6 +144,26 @@ class Command(BaseCommand, LoggerMixin): ...@@ -135,6 +144,26 @@ class Command(BaseCommand, LoggerMixin):
135 144
136 contract_result.setdefault(classify, dict()).setdefault(page_num_only, []).append(rebuild_page_info) 145 contract_result.setdefault(classify, dict()).setdefault(page_num_only, []).append(rebuild_page_info)
137 146
147 page_compare_dict = {}
148 for key, value in contract_dict.get('page_info', {}).items():
149 if not isinstance(value, dict):
150 continue
151 elif text_key in value:
152 if value[text_key] is None:
153 page_compare_dict[key] = ''
154 elif isinstance(value[text_key], str):
155 page_compare_dict[key] = value[text_key]
156 elif isinstance(value[text_key], list):
157 page_compare_dict[key] = value[text_key]
158 else:
159 page_compare_dict[key] = {}
160 for sub_key, sub_value in value.items():
161 if sub_value[text_key] is None:
162 page_compare_dict[key][sub_key] = ''
163 elif isinstance(sub_value[text_key], str):
164 page_compare_dict[key][sub_key] = sub_value[text_key]
165 rebuild_contract_result.setdefault(classify, dict())[page_num_only] = page_compare_dict
166
138 def license1_process(self, ocr_data, all_res, classify): 167 def license1_process(self, ocr_data, all_res, classify):
139 # 类别:'0'身份证, '1'居住证 168 # 类别:'0'身份证, '1'居住证
140 license_data = ocr_data.get('data') 169 license_data = ocr_data.get('data')
...@@ -253,7 +282,31 @@ class Command(BaseCommand, LoggerMixin): ...@@ -253,7 +282,31 @@ class Command(BaseCommand, LoggerMixin):
253 seperate_path_map[c] = os.path.join(seperate_dir, new_name) 282 seperate_path_map[c] = os.path.join(seperate_dir, new_name)
254 return img_save_path, excel_path, pdf_save_path, seperate_path_map 283 return img_save_path, excel_path, pdf_save_path, seperate_path_map
255 284
256 def res_process(self, all_res, excel_path, classify, contract_result): 285 @staticmethod
286 def all_res_add_contract(all_res, rebuild_contract_result):
287 for classify, page_info_dict in rebuild_contract_result.items():
288 res = {}
289 is_asp = False
290 for key, (pno_not_asp, pno_asp, key1, key2) in consts.AFC_CON_MAP_LTGT.items():
291 pno = pno_asp if is_asp else pno_not_asp
292 if pno is None:
293 if isinstance(pno_asp, int):
294 continue
295 end_idx = 9 if is_asp else 8
296 for i in range(1, end_idx):
297 res.setdefault(key, list()).append(page_info_dict.get(str(i), {}).get(key1, ''))
298 elif key2 is None:
299 res[key] = page_info_dict.get(str(pno), {}).get(key1, '')
300 res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get(
301 consts.IMG_PATH_KEY, '')
302 else:
303 res[key] = page_info_dict.get(str(pno), {}).get(key1, {}).get(key2, '')
304 res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get(
305 consts.IMG_PATH_KEY, '')
306
307 all_res[classify] = [res]
308
309 def res_process(self, all_res, excel_path, classify, contract_result, rebuild_contract_result):
257 try: 310 try:
258 wb = BSWorkbook(set(), set(), set(), set(), set()) 311 wb = BSWorkbook(set(), set(), set(), set(), set())
259 for c, res_list in all_res.items(): 312 for c, res_list in all_res.items():
...@@ -274,6 +327,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -274,6 +327,8 @@ class Command(BaseCommand, LoggerMixin):
274 wb.contract_rebuild(contract_result) 327 wb.contract_rebuild(contract_result)
275 wb.remove_base_sheet() 328 wb.remove_base_sheet()
276 wb.save(excel_path) 329 wb.save(excel_path)
330
331 self.all_res_add_contract(all_res, rebuild_contract_result)
277 except Exception as e: 332 except Exception as e:
278 self.folder_log.error('{0} [wb build error] [path={1}] [error={2}]'.format( 333 self.folder_log.error('{0} [wb build error] [path={1}] [error={2}]'.format(
279 self.log_base, excel_path, traceback.format_exc())) 334 self.log_base, excel_path, traceback.format_exc()))
...@@ -284,7 +339,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -284,7 +339,7 @@ class Command(BaseCommand, LoggerMixin):
284 sep = os.path.sep + (os.path.altsep or '') 339 sep = os.path.sep + (os.path.altsep or '')
285 return os.path.basename(path.rstrip(sep)) 340 return os.path.basename(path.rstrip(sep))
286 341
287 def ocr_process(self, img_path, classify, all_res, seperate_path_map, contract_result): 342 def ocr_process(self, img_path, classify, all_res, seperate_path_map, contract_result, rebuild_contract_result):
288 if os.path.exists(img_path): 343 if os.path.exists(img_path):
289 # TODO 图片验证 344 # TODO 图片验证
290 with open(img_path, 'rb') as f: 345 with open(img_path, 'rb') as f:
...@@ -332,7 +387,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -332,7 +387,7 @@ class Command(BaseCommand, LoggerMixin):
332 elif new_classify in consts.LICENSE_CLASSIFY_SET_2: 387 elif new_classify in consts.LICENSE_CLASSIFY_SET_2:
333 self.license2_process(ocr_data, all_res, new_classify, img_path) 388 self.license2_process(ocr_data, all_res, new_classify, img_path)
334 elif new_classify in consts.CONTRACT_SET: 389 elif new_classify in consts.CONTRACT_SET:
335 self.contract_process(ocr_data, contract_result, new_classify) 390 self.contract_process(ocr_data, contract_result, new_classify, rebuild_contract_result)
336 break 391 break
337 else: 392 else:
338 self.folder_log.warn('{0} [ocr failed] [img_path={1}]'.format(self.log_base, img_path)) 393 self.folder_log.warn('{0} [ocr failed] [img_path={1}]'.format(self.log_base, img_path))
...@@ -395,15 +450,15 @@ class Command(BaseCommand, LoggerMixin): ...@@ -395,15 +450,15 @@ class Command(BaseCommand, LoggerMixin):
395 def images_process(self, img_path_list, classify, excel_path, seperate_path_map): 450 def images_process(self, img_path_list, classify, excel_path, seperate_path_map):
396 all_res = dict() 451 all_res = dict()
397 contract_result = dict() 452 contract_result = dict()
453 rebuild_contract_result = dict()
398 for img_path in img_path_list: 454 for img_path in img_path_list:
399 self.ocr_process(img_path, classify, all_res, seperate_path_map, contract_result) 455 self.ocr_process(img_path, classify, all_res, seperate_path_map, contract_result, rebuild_contract_result)
400 # if len(all_res) > 0: 456 # if len(all_res) > 0:
401 self.res_process(all_res, excel_path, classify, contract_result) 457 self.res_process(all_res, excel_path, classify, contract_result, rebuild_contract_result)
402 return all_res 458 return all_res
403 459
404 def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir_map): 460 def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir_map):
405 if os.path.exists(path): 461 if os.path.exists(path):
406 rebuild_res = None
407 img_save_path, excel_path, pdf_save_path, seperate_path_map = self.get_path( 462 img_save_path, excel_path, pdf_save_path, seperate_path_map = self.get_path(
408 name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir_map) 463 name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir_map)
409 pdf_handler = PDFHandler(path, img_save_path) 464 pdf_handler = PDFHandler(path, img_save_path)
...@@ -420,7 +475,9 @@ class Command(BaseCommand, LoggerMixin): ...@@ -420,7 +475,9 @@ class Command(BaseCommand, LoggerMixin):
420 else: 475 else:
421 ocr_result = afc_predict(pdf_handler.pdf_info) 476 ocr_result = afc_predict(pdf_handler.pdf_info)
422 contract_result = dict() 477 contract_result = dict()
478 rebuild_contract_result = dict()
423 page_res = {} 479 page_res = {}
480 all_res = dict()
424 for page_num, page_info in ocr_result.get('page_info', {}).items(): 481 for page_num, page_info in ocr_result.get('page_info', {}).items():
425 if isinstance(page_num, str) and page_num.startswith('page_'): 482 if isinstance(page_num, str) and page_num.startswith('page_'):
426 page_res[page_num] = { 483 page_res[page_num] = {
...@@ -435,9 +492,10 @@ class Command(BaseCommand, LoggerMixin): ...@@ -435,9 +492,10 @@ class Command(BaseCommand, LoggerMixin):
435 'classify': page_res[page_key].pop('classify', consts.OTHER_CLASSIFY), 492 'classify': page_res[page_key].pop('classify', consts.OTHER_CLASSIFY),
436 'data': page_res[page_key] 493 'data': page_res[page_key]
437 } 494 }
438 self.contract_process(ocr_data, contract_result, classify) 495 self.contract_process(ocr_data, contract_result, classify, rebuild_contract_result)
439 self.res_process({}, excel_path, classify, contract_result) 496 self.res_process(all_res, excel_path, classify, contract_result, rebuild_contract_result)
440 shutil.move(path, pdf_save_path) 497 shutil.move(path, pdf_save_path)
498 return all_res
441 else: 499 else:
442 try: 500 try:
443 self.folder_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path)) 501 self.folder_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path))
...@@ -465,7 +523,6 @@ class Command(BaseCommand, LoggerMixin): ...@@ -465,7 +523,6 @@ class Command(BaseCommand, LoggerMixin):
465 523
466 def tif_process(self, name, path, classify, img_output_dir, wb_output_dir, tiff_output_dir, seperate_dir_map): 524 def tif_process(self, name, path, classify, img_output_dir, wb_output_dir, tiff_output_dir, seperate_dir_map):
467 if os.path.exists(path): 525 if os.path.exists(path):
468 rebuild_res = None
469 try: 526 try:
470 img_save_path, excel_path, tiff_save_path, seperate_path_map = self.get_path( 527 img_save_path, excel_path, tiff_save_path, seperate_path_map = self.get_path(
471 name, img_output_dir, wb_output_dir, tiff_output_dir, seperate_dir_map) 528 name, img_output_dir, wb_output_dir, tiff_output_dir, seperate_dir_map)
...@@ -490,7 +547,6 @@ class Command(BaseCommand, LoggerMixin): ...@@ -490,7 +547,6 @@ class Command(BaseCommand, LoggerMixin):
490 return rebuild_res 547 return rebuild_res
491 548
492 def img_process(self, name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir, seperate_dir_map): 549 def img_process(self, name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir, seperate_dir_map):
493 rebuild_res = None
494 try: 550 try:
495 img_save_path, excel_path, _, seperate_path_map = self.get_path( 551 img_save_path, excel_path, _, seperate_path_map = self.get_path(
496 name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir_map) 552 name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir_map)
...@@ -539,19 +595,27 @@ class Command(BaseCommand, LoggerMixin): ...@@ -539,19 +595,27 @@ class Command(BaseCommand, LoggerMixin):
539 try: 595 try:
540 if result[self.CLASSIFY_KEY] in self.sheet_content: 596 if result[self.CLASSIFY_KEY] in self.sheet_content:
541 sheet_name, head_fields = self.sheet_content[result[self.CLASSIFY_KEY]] 597 sheet_name, head_fields = self.sheet_content[result[self.CLASSIFY_KEY]]
598 first_head_row = head_fields
599 else:
600 sheet_name, key_field, side_field_order, field_order = self.field_map_2[result[self.CLASSIFY_KEY]]
601 if key_field is not None and len(side_field_order) > len(field_order):
602 first_head_row = []
603 for a, _ in side_field_order:
604 first_head_row.append(a)
542 else: 605 else:
543 sheet_name, key_field, side_field_order, field_order = self.field_map[result[self.CLASSIFY_KEY]] 606 first_head_row = []
607 for a, _ in field_order:
608 first_head_row.append(a)
609
544 if key_field is not None and key_field in result[self.RESULT_KEY]: 610 if key_field is not None and key_field in result[self.RESULT_KEY]:
545 head_fields = [] 611 head_fields = []
546 for a, b in side_field_order: 612 for a, _ in side_field_order:
547 if isinstance(b, str):
548 head_fields.append(a) 613 head_fields.append(a)
549 else: 614 else:
550 head_fields = [] 615 head_fields = []
551 for a, b in field_order: 616 for a, _ in field_order:
552 if isinstance(b, str):
553 head_fields.append(a) 617 head_fields.append(a)
554 row = [] 618 row = [result[self.FILE_KEY]]
555 for field in head_fields: 619 for field in head_fields:
556 ocr_str_or_list = result[self.RESULT_KEY].get(field, '') 620 ocr_str_or_list = result[self.RESULT_KEY].get(field, '')
557 if isinstance(ocr_str_or_list, list): 621 if isinstance(ocr_str_or_list, list):
...@@ -563,7 +627,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -563,7 +627,8 @@ class Command(BaseCommand, LoggerMixin):
563 ws = wb.get_sheet_by_name(sheet_name) 627 ws = wb.get_sheet_by_name(sheet_name)
564 else: 628 else:
565 ws = wb.create_sheet(sheet_name) 629 ws = wb.create_sheet(sheet_name)
566 ws.append(head_fields) 630 first_head_row.insert(0, '文件名')
631 ws.append(first_head_row)
567 ws.append(row) 632 ws.append(row)
568 except Exception as e: 633 except Exception as e:
569 self.folder_log.info('{0} [daily wb failed] [result={1}] [error={2}]'.format( 634 self.folder_log.info('{0} [daily wb failed] [result={1}] [error={2}]'.format(
...@@ -617,6 +682,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -617,6 +682,7 @@ class Command(BaseCommand, LoggerMixin):
617 if len(true_file_set) == 0 and len(os_error_filename_set) > 0: 682 if len(true_file_set) == 0 and len(os_error_filename_set) > 0:
618 true_file_set.add(os_error_filename_set.pop()) 683 true_file_set.add(os_error_filename_set.pop())
619 for name in true_file_set: 684 for name in true_file_set:
685 time.sleep(5)
620 path = os.path.join(input_dir, name) 686 path = os.path.join(input_dir, name)
621 687
622 try: 688 try:
...@@ -664,7 +730,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -664,7 +730,8 @@ class Command(BaseCommand, LoggerMixin):
664 { 730 {
665 self.CLASSIFY_KEY: c, 731 self.CLASSIFY_KEY: c,
666 self.RESULT_KEY: res, 732 self.RESULT_KEY: res,
667 self.DATE_KEY: date_str 733 self.DATE_KEY: date_str,
734 self.FILE_KEY: name,
668 } 735 }
669 ) 736 )
670 737
......
...@@ -1067,8 +1067,8 @@ def get_se_cms_compare_info_auto(last_obj, application_entity): ...@@ -1067,8 +1067,8 @@ def get_se_cms_compare_info_auto(last_obj, application_entity):
1067 # ('accountNo', account_no), 1067 # ('accountNo', account_no),
1068 # ] 1068 # ]
1069 # bank_info[consts.DDA_EN] = dda_field_input 1069 # bank_info[consts.DDA_EN] = dda_field_input
1070 # if len(bank_info) > 0: 1070 if len(bank_info) > 0:
1071 # compare_info['bankInfo'] = bank_info 1071 compare_info['bankInfo'] = bank_info
1072 1072
1073 # 银行流水 -------------------------------------------------------------------- 1073 # 银行流水 --------------------------------------------------------------------
1074 if cms_info.get('autoApprovedDetails', {}).get('aaType', '') in ['CAA1', 'CAA2']: 1074 if cms_info.get('autoApprovedDetails', {}).get('aaType', '') in ['CAA1', 'CAA2']:
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!