0b550510 by 王聪

Merge branch 'feature/uat-tmp' of http://gitlab.situdata.com/zhouweiqi/bmw-ocr into feature/uat-tmp

2 parents 2d500b7a 76b3bbd5
...@@ -1168,6 +1168,7 @@ MS_ERROR_COL = (5, 6) ...@@ -1168,6 +1168,7 @@ MS_ERROR_COL = (5, 6)
1168 WECHART_CLASSIFY = 12 1168 WECHART_CLASSIFY = 12
1169 NEW_ZHIFUBAO_CLASSIFY = 48 1169 NEW_ZHIFUBAO_CLASSIFY = 48
1170 ALI_WECHART_CLASSIFY = {12, 13, 48} 1170 ALI_WECHART_CLASSIFY = {12, 13, 48}
1171 JSYH_CLASSIFY = {11, 27, 34}
1171 WECHART_ERROR_COL = (1, 2) 1172 WECHART_ERROR_COL = (1, 2)
1172 SPECIAL_HEADERS_MAPPING = copy.deepcopy(HEADERS_MAPPING) 1173 SPECIAL_HEADERS_MAPPING = copy.deepcopy(HEADERS_MAPPING)
1173 SPECIAL_HEADERS_MAPPING.update( 1174 SPECIAL_HEADERS_MAPPING.update(
......
...@@ -2009,6 +2009,9 @@ class Command(BaseCommand, LoggerMixin): ...@@ -2009,6 +2009,9 @@ class Command(BaseCommand, LoggerMixin):
2009 report_list[5] = BSCheckResult.CHECK_FAILED.value 2009 report_list[5] = BSCheckResult.CHECK_FAILED.value
2010 2010
2011 finally: 2011 finally:
2012 self.online_log.info('{0} [task={1}] [license_summary={2}] '
2013 '[contract_result_compare={3}]'.format(self.log_base, task_str,
2014 license_summary, contract_result_compare))
2012 self.rebuild_contract(license_summary, contract_result_compare) 2015 self.rebuild_contract(license_summary, contract_result_compare)
2013 2016
2014 bs_rebuild = self.rebuild_bs(merged_bs_summary) 2017 bs_rebuild = self.rebuild_bs(merged_bs_summary)
......
...@@ -27,6 +27,7 @@ class RequestTeam(NamedEnum): ...@@ -27,6 +27,7 @@ class RequestTeam(NamedEnum):
27 SETTLEMENT = (1, 'SETTLEMENT') 27 SETTLEMENT = (1, 'SETTLEMENT')
28 CONTRACTMANAGEMENT = (2, 'CONTRACTMANAGEMENT') 28 CONTRACTMANAGEMENT = (2, 'CONTRACTMANAGEMENT')
29 CONTROLLING = (3, 'CONTROLLING') 29 CONTROLLING = (3, 'CONTROLLING')
30 INSURANCE = (4, 'INSURANCE')
30 31
31 32
32 class RequestTrigger(NamedEnum): 33 class RequestTrigger(NamedEnum):
......
...@@ -39,6 +39,18 @@ class BSWorkbook(Workbook): ...@@ -39,6 +39,18 @@ class BSWorkbook(Workbook):
39 self.MAX_MEAN = 31 39 self.MAX_MEAN = 31
40 self.need_follow = False 40 self.need_follow = False
41 41
42 # @staticmethod
43 # def date_calibration(date_str):
44 # result = True
45 # try:
46 # if date_str[-2] not in ['20', '21']:
47 # result = False
48 # if date_str[-5:-3] not in ['03', '06', '09', '12']:
49 # result = False
50 # except Exception as e:
51 # result = False
52 # return result
53
42 @staticmethod 54 @staticmethod
43 def replace_newline(queryset_value): 55 def replace_newline(queryset_value):
44 new_set = set() 56 new_set = set()
...@@ -338,7 +350,7 @@ class BSWorkbook(Workbook): ...@@ -338,7 +350,7 @@ class BSWorkbook(Workbook):
338 metadata_rows.append(('Producer', producer)) 350 metadata_rows.append(('Producer', producer))
339 if len(author) > 0: 351 if len(author) > 0:
340 metadata_highlight_row.append(6) 352 metadata_highlight_row.append(6)
341 if 'iText' not in producer and 'Qt' not in producer and 'Haru Free' not in producer: 353 if 'iText' not in producer and 'Qt' not in producer and 'Haru Free' not in producer and 'OpenPDF' not in producer:
342 metadata_highlight_row.append(7) 354 metadata_highlight_row.append(7)
343 metadata_rows.append(self.blank_row) 355 metadata_rows.append(self.blank_row)
344 356
...@@ -496,6 +508,7 @@ class BSWorkbook(Workbook): ...@@ -496,6 +508,7 @@ class BSWorkbook(Workbook):
496 508
497 tmp_ws = self.create_sheet('tmp_ws') 509 tmp_ws = self.create_sheet('tmp_ws')
498 tmp2_ws = self.create_sheet('tmp2_ws') 510 tmp2_ws = self.create_sheet('tmp2_ws')
511 tmp3_ws = self.create_sheet('tmp3_ws')
499 if classify in consts.ALI_WECHART_CLASSIFY: 512 if classify in consts.ALI_WECHART_CLASSIFY:
500 high_light_keyword = self.wechat_keyword 513 high_light_keyword = self.wechat_keyword
501 else: 514 else:
...@@ -629,7 +642,7 @@ class BSWorkbook(Workbook): ...@@ -629,7 +642,7 @@ class BSWorkbook(Workbook):
629 # 关键词1提取 642 # 关键词1提取
630 if summary_cell_value in self.interest_keyword: 643 if summary_cell_value in self.interest_keyword:
631 new_amount_cell_value = None if amount_cell is None else amount_cell.value 644 new_amount_cell_value = None if amount_cell is None else amount_cell.value
632 ms.append((summary_cell_value, date_cell_value, new_amount_cell_value)) 645 tmp3_ws.append((summary_cell_value, date_cell_value, new_amount_cell_value))
633 # 关键词2提取至临时表 646 # 关键词2提取至临时表
634 elif summary_cell_value in self.salary_keyword: 647 elif summary_cell_value in self.salary_keyword:
635 new_amount_cell_value = None if amount_cell is None else amount_cell.value 648 new_amount_cell_value = None if amount_cell is None else amount_cell.value
...@@ -657,6 +670,18 @@ class BSWorkbook(Workbook): ...@@ -657,6 +670,18 @@ class BSWorkbook(Workbook):
657 # if summary_cell_idx is not None: 670 # if summary_cell_idx is not None:
658 # new_ws[row][summary_cell_idx].fill = self.amount_fill 671 # new_ws[row][summary_cell_idx].fill = self.amount_fill
659 672
673 # 关键词1信息提取:结息
674 for row in tmp3_ws.iter_rows(values_only=True):
675 ms.append(row)
676 # # 建设银行
677 # if classify in consts.JSYH_CLASSIFY:
678 # if isinstance(row[1], str) and self.date_calibration(row[1]):
679 # pass
680 # else:
681 # for cell in ms[ms.max_row]:
682 # cell.fill = self.amount_fill
683 self.remove(tmp3_ws)
684
660 # 关键词2信息提取 685 # 关键词2信息提取
661 ms.append(self.blank_row) 686 ms.append(self.blank_row)
662 ms.append(self.salary_keyword_header) 687 ms.append(self.salary_keyword_header)
......
...@@ -257,6 +257,7 @@ se_compare_content = { ...@@ -257,6 +257,7 @@ se_compare_content = {
257 'fsmSpecialCar': fields.Boolean(required=False), 257 'fsmSpecialCar': fields.Boolean(required=False),
258 'fsmBestPrice': fields.Boolean(required=False), 258 'fsmBestPrice': fields.Boolean(required=False),
259 'isAutoSettlement': fields.Boolean(required=False), 259 'isAutoSettlement': fields.Boolean(required=False),
260 'fsmLandingDealer': fields.Str(required=False, validate=validate.Length(max=1024)),
260 261
261 'individualCusInfo': fields.List(fields.Nested(se_individual_args), 262 'individualCusInfo': fields.List(fields.Nested(se_individual_args),
262 required=True, validate=validate.Length(min=1, max=4)), 263 required=True, validate=validate.Length(min=1, max=4)),
...@@ -592,12 +593,12 @@ class UploadDocView(GenericView, DocHandler): ...@@ -592,12 +593,12 @@ class UploadDocView(GenericView, DocHandler):
592 if business_type == consts.HIL_PREFIX: 593 if business_type == consts.HIL_PREFIX:
593 if document_scheme == RequestTeam.ACCEPTANCE.name: 594 if document_scheme == RequestTeam.ACCEPTANCE.name:
594 result_class = HILOCRResult 595 result_class = HILOCRResult
595 elif document_scheme == RequestTeam.SETTLEMENT.name: 596 elif document_scheme == RequestTeam.SETTLEMENT.name or document_scheme == RequestTeam.INSURANCE.name:
596 result_class = HILSEOCRResult 597 result_class = HILSEOCRResult
597 elif business_type == consts.AFC_PREFIX: 598 elif business_type == consts.AFC_PREFIX:
598 if document_scheme == RequestTeam.ACCEPTANCE.name: 599 if document_scheme == RequestTeam.ACCEPTANCE.name:
599 result_class = AFCOCRResult 600 result_class = AFCOCRResult
600 elif document_scheme == RequestTeam.SETTLEMENT.name: 601 elif document_scheme == RequestTeam.SETTLEMENT.name or document_scheme == RequestTeam.INSURANCE.name:
601 result_class = AFCSEOCRResult 602 result_class = AFCSEOCRResult
602 603
603 ocr_result_obj = result_class.objects.filter(application_id=application_id).first() 604 ocr_result_obj = result_class.objects.filter(application_id=application_id).first()
...@@ -610,6 +611,7 @@ class UploadDocView(GenericView, DocHandler): ...@@ -610,6 +611,7 @@ class UploadDocView(GenericView, DocHandler):
610 ocr_result_obj.fsm_activited = 1 611 ocr_result_obj.fsm_activited = 1
611 ocr_result_obj.save() 612 ocr_result_obj.save()
612 613
614 self.running_log.info('[doc upload applicationId-{0}] [ocr result saved]'.format(application_id))
613 if data_source == consts.DATA_SOURCE_LIST[1]: 615 if data_source == consts.DATA_SOURCE_LIST[1]:
614 if document_name.endswith('-证书.pdf') or document_name.endswith('-证书'): 616 if document_name.endswith('-证书.pdf') or document_name.endswith('-证书'):
615 self.running_log.info('[doc upload success] [eapp license skip] [args={0}]'.format(args)) 617 self.running_log.info('[doc upload success] [eapp license skip] [args={0}]'.format(args))
...@@ -880,6 +882,7 @@ class SECompareView(GenericView, PreSEHandler): ...@@ -880,6 +882,7 @@ class SECompareView(GenericView, PreSEHandler):
880 fsm_flag = content.get('fsmFlag', False) 882 fsm_flag = content.get('fsmFlag', False)
881 fsm_special_car = content.get('fsmSpecialCar', False) 883 fsm_special_car = content.get('fsmSpecialCar', False)
882 fsm_best_price = content.get('fsmBestPrice', False) 884 fsm_best_price = content.get('fsmBestPrice', False)
885 fsm_landing_dealer = content.get('fsmLandingDealer')
883 886
884 if fsm_special_car: 887 if fsm_special_car:
885 compare_result = { 888 compare_result = {
......
...@@ -62,6 +62,7 @@ class Finder: ...@@ -62,6 +62,7 @@ class Finder:
62 }, 62 },
63 "page_3": {"合同编号": self.item, 63 "page_3": {"合同编号": self.item,
64 "还款计划表": self.item, 64 "还款计划表": self.item,
65 "车辆代理商": self.item,
65 }, 66 },
66 "page_4": {"合同编号": self.item, 67 "page_4": {"合同编号": self.item,
67 "附加产品融资贷款本金总金额明细": self.item, 68 "附加产品融资贷款本金总金额明细": self.item,
...@@ -71,6 +72,7 @@ class Finder: ...@@ -71,6 +72,7 @@ class Finder:
71 "page_6": {"合同编号": self.item, 72 "page_6": {"合同编号": self.item,
72 }, 73 },
73 } 74 }
75 if self.is_asp:
74 self.init_result["page_7"] = {"合同编号": self.item, 76 self.init_result["page_7"] = {"合同编号": self.item,
75 } 77 }
76 self.init_result["page_8"] = {"合同编号": self.item, 78 self.init_result["page_8"] = {"合同编号": self.item,
...@@ -90,6 +92,25 @@ class Finder: ...@@ -90,6 +92,25 @@ class Finder:
90 "日期": self.item, 92 "日期": self.item,
91 }, 93 },
92 } 94 }
95 else:
96 self.init_result["page_7"] = {"合同编号": self.item,
97 "主借人签字": {"签字": self.item,
98 "日期": self.item,
99 },
100 "共借人签字": {"签字": self.item,
101 "日期": self.item,
102 },
103 "保证人1签字": {"签字": self.item,
104 "日期": self.item,
105 },
106 "保证人2签字": {"签字": self.item,
107 "日期": self.item,
108 },
109 "见证人签字": {"签字": self.item,
110 "日期": self.item,
111 },
112 }
113
93 114
94 def get_top_iou(self, poly, ocr_result): 115 def get_top_iou(self, poly, ocr_result):
95 """传入一个多边形, 找到与之最匹配的多边形 116 """传入一个多边形, 找到与之最匹配的多边形
...@@ -398,6 +419,39 @@ class Finder: ...@@ -398,6 +419,39 @@ class Finder:
398 seller['words'] = text 419 seller['words'] = text
399 return seller 420 return seller
400 421
422 def get_cldls(self):
423 seller = self.item.copy()
424 # 先找到 key
425 anchor_bbox = None
426 for block in self.pdf_info['2']['blocks']:
427 if anchor_bbox is not None:
428 break
429 if block['type'] != 0:
430 continue
431 for line in block['lines']:
432 if anchor_bbox is not None:
433 break
434 for span in line['spans']:
435 bbox, text = span['bbox'], span['text']
436 if text.strip() == '车辆代理商':
437 anchor_bbox = bbox
438 # print(anchor_bbox)
439 # 当找到了 key, 则根据 key 去匹配 value
440 if anchor_bbox:
441 half_width = self.pdf_info['2']['width'] * 0.5
442 for block in self.pdf_info['2']['blocks']:
443 if block['type'] != 0:
444 continue
445 for line in block['lines']:
446 for span in line['spans']:
447 bbox, text = span['bbox'], span['text']
448 if anchor_bbox[2]<np.mean(bbox[::2])<half_width and \
449 anchor_bbox[1]<np.mean(bbox[1::2])<anchor_bbox[3]:
450 seller['position'] = bbox
451 seller['words'] = text
452 return seller
453 return seller
454
401 def get_borrower_collection_account(self): 455 def get_borrower_collection_account(self):
402 account = self.item.copy() 456 account = self.item.copy()
403 account_name = self.item.copy() 457 account_name = self.item.copy()
...@@ -885,6 +939,9 @@ class Finder: ...@@ -885,6 +939,9 @@ class Finder:
885 repayment_schedule_table = self.get_repayment_schedule() 939 repayment_schedule_table = self.get_repayment_schedule()
886 # print(repayment_schedule_table) 940 # print(repayment_schedule_table)
887 self.init_result['page_3']['还款计划表'] = repayment_schedule_table 941 self.init_result['page_3']['还款计划表'] = repayment_schedule_table
942 # 车辆代理商
943 cldls = self.get_cldls()
944 self.init_result['page_3']['车辆代理商'] = cldls
888 ####################################### 945 #######################################
889 # Page 4 946 # Page 4
890 # 找合同编号 947 # 找合同编号
...@@ -907,6 +964,8 @@ class Finder: ...@@ -907,6 +964,8 @@ class Finder:
907 contract_no = self.get_contract_no(page_num='5') 964 contract_no = self.get_contract_no(page_num='5')
908 # print(contract_no) 965 # print(contract_no)
909 self.init_result['page_6']['合同编号'] = contract_no 966 self.init_result['page_6']['合同编号'] = contract_no
967
968 if self.is_asp:
910 # Page 7 969 # Page 7
911 # 找合同编号 970 # 找合同编号
912 contract_no = self.get_contract_no(page_num='6') 971 contract_no = self.get_contract_no(page_num='6')
...@@ -944,6 +1003,41 @@ class Finder: ...@@ -944,6 +1003,41 @@ class Finder:
944 # print(signature_name, signature_date) 1003 # print(signature_name, signature_date)
945 self.init_result['page_8']['见证人签字']['签字'] = signature_name 1004 self.init_result['page_8']['见证人签字']['签字'] = signature_name
946 self.init_result['page_8']['见证人签字']['日期'] = signature_date 1005 self.init_result['page_8']['见证人签字']['日期'] = signature_date
1006 else:
1007 # Page 7
1008 # 找合同编号
1009 contract_no = self.get_contract_no(page_num='6')
1010 self.init_result['page_7']['合同编号'] = contract_no
1011 signature_name, signature_date = self.get_last_page_signature(page_num='6',
1012 top='合同编号', bottom='共同借款人')
1013 if signature_name['words'] == None:
1014 signature_name, signature_date = self.get_last_page_signature(page_num='6',
1015 top='合同编号', bottom='共同借款人(抵押人)')
1016 # print(signature_name, signature_date)
1017 self.init_result['page_7']['主借人签字']['签字'] = signature_name
1018 self.init_result['page_7']['主借人签字']['日期'] = signature_date
1019 signature_name, signature_date = self.get_last_page_signature(page_num='6',
1020 top='共同借款人', bottom='保证人1')
1021 if signature_name['words'] == None:
1022 signature_name, signature_date = self.get_last_page_signature(page_num='6',
1023 top='共同借款人(抵押人)', bottom='保证人1')
1024 # print(signature_name, signature_date)
1025 self.init_result['page_7']['共借人签字']['签字'] = signature_name
1026 self.init_result['page_7']['共借人签字']['日期'] = signature_date
1027 signature_name, signature_date = self.get_last_page_signature(page_num='6',
1028 top='保证人1', bottom='保证人2')
1029 self.init_result['page_7']['保证人1签字']['签字'] = signature_name
1030 self.init_result['page_7']['保证人1签字']['日期'] = signature_date
1031 signature_name, signature_date = self.get_last_page_signature(page_num='6',
1032 top='保证人2', bottom='在本人面前亲笔签署本合同')
1033 self.init_result['page_7']['保证人2签字']['签字'] = signature_name
1034 self.init_result['page_7']['保证人2签字']['日期'] = signature_date
1035 signature_name, signature_date = self.get_last_page_signature(page_num='6',
1036 top='在本人面前亲笔签署本合同', bottom='以下无正文')
1037 # print(signature_name, signature_date)
1038 self.init_result['page_7']['见证人签字']['签字'] = signature_name
1039 self.init_result['page_7']['见证人签字']['日期'] = signature_date
1040
947 1041
948 # 重新定制输出 1042 # 重新定制输出
949 new_results = {"is_asp": self.is_asp, 1043 new_results = {"is_asp": self.is_asp,
......
...@@ -6,6 +6,7 @@ class HMHRetriever: ...@@ -6,6 +6,7 @@ class HMHRetriever:
6 def __init__(self): 6 def __init__(self):
7 self.words_str = 'words' 7 self.words_str = 'words'
8 self.position_str = 'location' 8 self.position_str = 'location'
9 self.fix_hava_str = '有'
9 self.default_position = [0, 0, 0, 0] 10 self.default_position = [0, 0, 0, 0]
10 self.search_fields_list = [ 11 self.search_fields_list = [
11 ('借款/承租人姓名', ''), 12 ('借款/承租人姓名', ''),
...@@ -51,7 +52,8 @@ class HMHRetriever: ...@@ -51,7 +52,8 @@ class HMHRetriever:
51 for name_date_tuple in name_date_list: 52 for name_date_tuple in name_date_list:
52 if len(name_date_tuple) == 2: 53 if len(name_date_tuple) == 2:
53 result[self.search_fields_list[4][0]] = { 54 result[self.search_fields_list[4][0]] = {
54 self.words_str: '{0} {1}'.format(name_date_tuple[0].replace('\u3000', '').strip(), name_date_tuple[1]), 55 # self.words_str: '{0} {1}'.format(name_date_tuple[0].replace('\u3000', '').strip(), name_date_tuple[1]),
56 self.words_str: self.fix_hava_str,
55 self.position_str: bbox 57 self.position_str: bbox
56 } 58 }
57 is_find_name_date = True 59 is_find_name_date = True
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!