Merge branch 'feature/uat-tmp' of http://gitlab.situdata.com/zhouweiqi/bmw-ocr into feature/uat-tmp
Showing
8 changed files
with
134 additions
and
5 deletions
... | @@ -1168,6 +1168,7 @@ MS_ERROR_COL = (5, 6) | ... | @@ -1168,6 +1168,7 @@ MS_ERROR_COL = (5, 6) |
1168 | WECHART_CLASSIFY = 12 | 1168 | WECHART_CLASSIFY = 12 |
1169 | NEW_ZHIFUBAO_CLASSIFY = 48 | 1169 | NEW_ZHIFUBAO_CLASSIFY = 48 |
1170 | ALI_WECHART_CLASSIFY = {12, 13, 48} | 1170 | ALI_WECHART_CLASSIFY = {12, 13, 48} |
1171 | JSYH_CLASSIFY = {11, 27, 34} | ||
1171 | WECHART_ERROR_COL = (1, 2) | 1172 | WECHART_ERROR_COL = (1, 2) |
1172 | SPECIAL_HEADERS_MAPPING = copy.deepcopy(HEADERS_MAPPING) | 1173 | SPECIAL_HEADERS_MAPPING = copy.deepcopy(HEADERS_MAPPING) |
1173 | SPECIAL_HEADERS_MAPPING.update( | 1174 | SPECIAL_HEADERS_MAPPING.update( | ... | ... |
... | @@ -2009,6 +2009,9 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -2009,6 +2009,9 @@ class Command(BaseCommand, LoggerMixin): |
2009 | report_list[5] = BSCheckResult.CHECK_FAILED.value | 2009 | report_list[5] = BSCheckResult.CHECK_FAILED.value |
2010 | 2010 | ||
2011 | finally: | 2011 | finally: |
2012 | self.online_log.info('{0} [task={1}] [license_summary={2}] ' | ||
2013 | '[contract_result_compare={3}]'.format(self.log_base, task_str, | ||
2014 | license_summary, contract_result_compare)) | ||
2012 | self.rebuild_contract(license_summary, contract_result_compare) | 2015 | self.rebuild_contract(license_summary, contract_result_compare) |
2013 | 2016 | ||
2014 | bs_rebuild = self.rebuild_bs(merged_bs_summary) | 2017 | bs_rebuild = self.rebuild_bs(merged_bs_summary) | ... | ... |
... | @@ -27,6 +27,7 @@ class RequestTeam(NamedEnum): | ... | @@ -27,6 +27,7 @@ class RequestTeam(NamedEnum): |
27 | SETTLEMENT = (1, 'SETTLEMENT') | 27 | SETTLEMENT = (1, 'SETTLEMENT') |
28 | CONTRACTMANAGEMENT = (2, 'CONTRACTMANAGEMENT') | 28 | CONTRACTMANAGEMENT = (2, 'CONTRACTMANAGEMENT') |
29 | CONTROLLING = (3, 'CONTROLLING') | 29 | CONTROLLING = (3, 'CONTROLLING') |
30 | INSURANCE = (4, 'INSURANCE') | ||
30 | 31 | ||
31 | 32 | ||
32 | class RequestTrigger(NamedEnum): | 33 | class RequestTrigger(NamedEnum): | ... | ... |
... | @@ -39,6 +39,18 @@ class BSWorkbook(Workbook): | ... | @@ -39,6 +39,18 @@ class BSWorkbook(Workbook): |
39 | self.MAX_MEAN = 31 | 39 | self.MAX_MEAN = 31 |
40 | self.need_follow = False | 40 | self.need_follow = False |
41 | 41 | ||
42 | # @staticmethod | ||
43 | # def date_calibration(date_str): | ||
44 | # result = True | ||
45 | # try: | ||
46 | # if date_str[-2] not in ['20', '21']: | ||
47 | # result = False | ||
48 | # if date_str[-5:-3] not in ['03', '06', '09', '12']: | ||
49 | # result = False | ||
50 | # except Exception as e: | ||
51 | # result = False | ||
52 | # return result | ||
53 | |||
42 | @staticmethod | 54 | @staticmethod |
43 | def replace_newline(queryset_value): | 55 | def replace_newline(queryset_value): |
44 | new_set = set() | 56 | new_set = set() |
... | @@ -338,7 +350,7 @@ class BSWorkbook(Workbook): | ... | @@ -338,7 +350,7 @@ class BSWorkbook(Workbook): |
338 | metadata_rows.append(('Producer', producer)) | 350 | metadata_rows.append(('Producer', producer)) |
339 | if len(author) > 0: | 351 | if len(author) > 0: |
340 | metadata_highlight_row.append(6) | 352 | metadata_highlight_row.append(6) |
341 | if 'iText' not in producer and 'Qt' not in producer and 'Haru Free' not in producer: | 353 | if 'iText' not in producer and 'Qt' not in producer and 'Haru Free' not in producer and 'OpenPDF' not in producer: |
342 | metadata_highlight_row.append(7) | 354 | metadata_highlight_row.append(7) |
343 | metadata_rows.append(self.blank_row) | 355 | metadata_rows.append(self.blank_row) |
344 | 356 | ||
... | @@ -496,6 +508,7 @@ class BSWorkbook(Workbook): | ... | @@ -496,6 +508,7 @@ class BSWorkbook(Workbook): |
496 | 508 | ||
497 | tmp_ws = self.create_sheet('tmp_ws') | 509 | tmp_ws = self.create_sheet('tmp_ws') |
498 | tmp2_ws = self.create_sheet('tmp2_ws') | 510 | tmp2_ws = self.create_sheet('tmp2_ws') |
511 | tmp3_ws = self.create_sheet('tmp3_ws') | ||
499 | if classify in consts.ALI_WECHART_CLASSIFY: | 512 | if classify in consts.ALI_WECHART_CLASSIFY: |
500 | high_light_keyword = self.wechat_keyword | 513 | high_light_keyword = self.wechat_keyword |
501 | else: | 514 | else: |
... | @@ -629,7 +642,7 @@ class BSWorkbook(Workbook): | ... | @@ -629,7 +642,7 @@ class BSWorkbook(Workbook): |
629 | # 关键词1提取 | 642 | # 关键词1提取 |
630 | if summary_cell_value in self.interest_keyword: | 643 | if summary_cell_value in self.interest_keyword: |
631 | new_amount_cell_value = None if amount_cell is None else amount_cell.value | 644 | new_amount_cell_value = None if amount_cell is None else amount_cell.value |
632 | ms.append((summary_cell_value, date_cell_value, new_amount_cell_value)) | 645 | tmp3_ws.append((summary_cell_value, date_cell_value, new_amount_cell_value)) |
633 | # 关键词2提取至临时表 | 646 | # 关键词2提取至临时表 |
634 | elif summary_cell_value in self.salary_keyword: | 647 | elif summary_cell_value in self.salary_keyword: |
635 | new_amount_cell_value = None if amount_cell is None else amount_cell.value | 648 | new_amount_cell_value = None if amount_cell is None else amount_cell.value |
... | @@ -657,6 +670,18 @@ class BSWorkbook(Workbook): | ... | @@ -657,6 +670,18 @@ class BSWorkbook(Workbook): |
657 | # if summary_cell_idx is not None: | 670 | # if summary_cell_idx is not None: |
658 | # new_ws[row][summary_cell_idx].fill = self.amount_fill | 671 | # new_ws[row][summary_cell_idx].fill = self.amount_fill |
659 | 672 | ||
673 | # 关键词1信息提取:结息 | ||
674 | for row in tmp3_ws.iter_rows(values_only=True): | ||
675 | ms.append(row) | ||
676 | # # 建设银行 | ||
677 | # if classify in consts.JSYH_CLASSIFY: | ||
678 | # if isinstance(row[1], str) and self.date_calibration(row[1]): | ||
679 | # pass | ||
680 | # else: | ||
681 | # for cell in ms[ms.max_row]: | ||
682 | # cell.fill = self.amount_fill | ||
683 | self.remove(tmp3_ws) | ||
684 | |||
660 | # 关键词2信息提取 | 685 | # 关键词2信息提取 |
661 | ms.append(self.blank_row) | 686 | ms.append(self.blank_row) |
662 | ms.append(self.salary_keyword_header) | 687 | ms.append(self.salary_keyword_header) | ... | ... |
... | @@ -257,6 +257,7 @@ se_compare_content = { | ... | @@ -257,6 +257,7 @@ se_compare_content = { |
257 | 'fsmSpecialCar': fields.Boolean(required=False), | 257 | 'fsmSpecialCar': fields.Boolean(required=False), |
258 | 'fsmBestPrice': fields.Boolean(required=False), | 258 | 'fsmBestPrice': fields.Boolean(required=False), |
259 | 'isAutoSettlement': fields.Boolean(required=False), | 259 | 'isAutoSettlement': fields.Boolean(required=False), |
260 | 'fsmLandingDealer': fields.Str(required=False, validate=validate.Length(max=1024)), | ||
260 | 261 | ||
261 | 'individualCusInfo': fields.List(fields.Nested(se_individual_args), | 262 | 'individualCusInfo': fields.List(fields.Nested(se_individual_args), |
262 | required=True, validate=validate.Length(min=1, max=4)), | 263 | required=True, validate=validate.Length(min=1, max=4)), |
... | @@ -592,12 +593,12 @@ class UploadDocView(GenericView, DocHandler): | ... | @@ -592,12 +593,12 @@ class UploadDocView(GenericView, DocHandler): |
592 | if business_type == consts.HIL_PREFIX: | 593 | if business_type == consts.HIL_PREFIX: |
593 | if document_scheme == RequestTeam.ACCEPTANCE.name: | 594 | if document_scheme == RequestTeam.ACCEPTANCE.name: |
594 | result_class = HILOCRResult | 595 | result_class = HILOCRResult |
595 | elif document_scheme == RequestTeam.SETTLEMENT.name: | 596 | elif document_scheme == RequestTeam.SETTLEMENT.name or document_scheme == RequestTeam.INSURANCE.name: |
596 | result_class = HILSEOCRResult | 597 | result_class = HILSEOCRResult |
597 | elif business_type == consts.AFC_PREFIX: | 598 | elif business_type == consts.AFC_PREFIX: |
598 | if document_scheme == RequestTeam.ACCEPTANCE.name: | 599 | if document_scheme == RequestTeam.ACCEPTANCE.name: |
599 | result_class = AFCOCRResult | 600 | result_class = AFCOCRResult |
600 | elif document_scheme == RequestTeam.SETTLEMENT.name: | 601 | elif document_scheme == RequestTeam.SETTLEMENT.name or document_scheme == RequestTeam.INSURANCE.name: |
601 | result_class = AFCSEOCRResult | 602 | result_class = AFCSEOCRResult |
602 | 603 | ||
603 | ocr_result_obj = result_class.objects.filter(application_id=application_id).first() | 604 | ocr_result_obj = result_class.objects.filter(application_id=application_id).first() |
... | @@ -610,6 +611,7 @@ class UploadDocView(GenericView, DocHandler): | ... | @@ -610,6 +611,7 @@ class UploadDocView(GenericView, DocHandler): |
610 | ocr_result_obj.fsm_activited = 1 | 611 | ocr_result_obj.fsm_activited = 1 |
611 | ocr_result_obj.save() | 612 | ocr_result_obj.save() |
612 | 613 | ||
614 | self.running_log.info('[doc upload applicationId-{0}] [ocr result saved]'.format(application_id)) | ||
613 | if data_source == consts.DATA_SOURCE_LIST[1]: | 615 | if data_source == consts.DATA_SOURCE_LIST[1]: |
614 | if document_name.endswith('-证书.pdf') or document_name.endswith('-证书'): | 616 | if document_name.endswith('-证书.pdf') or document_name.endswith('-证书'): |
615 | self.running_log.info('[doc upload success] [eapp license skip] [args={0}]'.format(args)) | 617 | self.running_log.info('[doc upload success] [eapp license skip] [args={0}]'.format(args)) |
... | @@ -880,6 +882,7 @@ class SECompareView(GenericView, PreSEHandler): | ... | @@ -880,6 +882,7 @@ class SECompareView(GenericView, PreSEHandler): |
880 | fsm_flag = content.get('fsmFlag', False) | 882 | fsm_flag = content.get('fsmFlag', False) |
881 | fsm_special_car = content.get('fsmSpecialCar', False) | 883 | fsm_special_car = content.get('fsmSpecialCar', False) |
882 | fsm_best_price = content.get('fsmBestPrice', False) | 884 | fsm_best_price = content.get('fsmBestPrice', False) |
885 | fsm_landing_dealer = content.get('fsmLandingDealer') | ||
883 | 886 | ||
884 | if fsm_special_car: | 887 | if fsm_special_car: |
885 | compare_result = { | 888 | compare_result = { | ... | ... |
... | @@ -62,6 +62,7 @@ class Finder: | ... | @@ -62,6 +62,7 @@ class Finder: |
62 | }, | 62 | }, |
63 | "page_3": {"合同编号": self.item, | 63 | "page_3": {"合同编号": self.item, |
64 | "还款计划表": self.item, | 64 | "还款计划表": self.item, |
65 | "车辆代理商": self.item, | ||
65 | }, | 66 | }, |
66 | "page_4": {"合同编号": self.item, | 67 | "page_4": {"合同编号": self.item, |
67 | "附加产品融资贷款本金总金额明细": self.item, | 68 | "附加产品融资贷款本金总金额明细": self.item, |
... | @@ -71,6 +72,7 @@ class Finder: | ... | @@ -71,6 +72,7 @@ class Finder: |
71 | "page_6": {"合同编号": self.item, | 72 | "page_6": {"合同编号": self.item, |
72 | }, | 73 | }, |
73 | } | 74 | } |
75 | if self.is_asp: | ||
74 | self.init_result["page_7"] = {"合同编号": self.item, | 76 | self.init_result["page_7"] = {"合同编号": self.item, |
75 | } | 77 | } |
76 | self.init_result["page_8"] = {"合同编号": self.item, | 78 | self.init_result["page_8"] = {"合同编号": self.item, |
... | @@ -90,6 +92,25 @@ class Finder: | ... | @@ -90,6 +92,25 @@ class Finder: |
90 | "日期": self.item, | 92 | "日期": self.item, |
91 | }, | 93 | }, |
92 | } | 94 | } |
95 | else: | ||
96 | self.init_result["page_7"] = {"合同编号": self.item, | ||
97 | "主借人签字": {"签字": self.item, | ||
98 | "日期": self.item, | ||
99 | }, | ||
100 | "共借人签字": {"签字": self.item, | ||
101 | "日期": self.item, | ||
102 | }, | ||
103 | "保证人1签字": {"签字": self.item, | ||
104 | "日期": self.item, | ||
105 | }, | ||
106 | "保证人2签字": {"签字": self.item, | ||
107 | "日期": self.item, | ||
108 | }, | ||
109 | "见证人签字": {"签字": self.item, | ||
110 | "日期": self.item, | ||
111 | }, | ||
112 | } | ||
113 | |||
93 | 114 | ||
94 | def get_top_iou(self, poly, ocr_result): | 115 | def get_top_iou(self, poly, ocr_result): |
95 | """传入一个多边形, 找到与之最匹配的多边形 | 116 | """传入一个多边形, 找到与之最匹配的多边形 |
... | @@ -398,6 +419,39 @@ class Finder: | ... | @@ -398,6 +419,39 @@ class Finder: |
398 | seller['words'] = text | 419 | seller['words'] = text |
399 | return seller | 420 | return seller |
400 | 421 | ||
422 | def get_cldls(self): | ||
423 | seller = self.item.copy() | ||
424 | # 先找到 key | ||
425 | anchor_bbox = None | ||
426 | for block in self.pdf_info['2']['blocks']: | ||
427 | if anchor_bbox is not None: | ||
428 | break | ||
429 | if block['type'] != 0: | ||
430 | continue | ||
431 | for line in block['lines']: | ||
432 | if anchor_bbox is not None: | ||
433 | break | ||
434 | for span in line['spans']: | ||
435 | bbox, text = span['bbox'], span['text'] | ||
436 | if text.strip() == '车辆代理商': | ||
437 | anchor_bbox = bbox | ||
438 | # print(anchor_bbox) | ||
439 | # 当找到了 key, 则根据 key 去匹配 value | ||
440 | if anchor_bbox: | ||
441 | half_width = self.pdf_info['2']['width'] * 0.5 | ||
442 | for block in self.pdf_info['2']['blocks']: | ||
443 | if block['type'] != 0: | ||
444 | continue | ||
445 | for line in block['lines']: | ||
446 | for span in line['spans']: | ||
447 | bbox, text = span['bbox'], span['text'] | ||
448 | if anchor_bbox[2]<np.mean(bbox[::2])<half_width and \ | ||
449 | anchor_bbox[1]<np.mean(bbox[1::2])<anchor_bbox[3]: | ||
450 | seller['position'] = bbox | ||
451 | seller['words'] = text | ||
452 | return seller | ||
453 | return seller | ||
454 | |||
401 | def get_borrower_collection_account(self): | 455 | def get_borrower_collection_account(self): |
402 | account = self.item.copy() | 456 | account = self.item.copy() |
403 | account_name = self.item.copy() | 457 | account_name = self.item.copy() |
... | @@ -885,6 +939,9 @@ class Finder: | ... | @@ -885,6 +939,9 @@ class Finder: |
885 | repayment_schedule_table = self.get_repayment_schedule() | 939 | repayment_schedule_table = self.get_repayment_schedule() |
886 | # print(repayment_schedule_table) | 940 | # print(repayment_schedule_table) |
887 | self.init_result['page_3']['还款计划表'] = repayment_schedule_table | 941 | self.init_result['page_3']['还款计划表'] = repayment_schedule_table |
942 | # 车辆代理商 | ||
943 | cldls = self.get_cldls() | ||
944 | self.init_result['page_3']['车辆代理商'] = cldls | ||
888 | ####################################### | 945 | ####################################### |
889 | # Page 4 | 946 | # Page 4 |
890 | # 找合同编号 | 947 | # 找合同编号 |
... | @@ -907,6 +964,8 @@ class Finder: | ... | @@ -907,6 +964,8 @@ class Finder: |
907 | contract_no = self.get_contract_no(page_num='5') | 964 | contract_no = self.get_contract_no(page_num='5') |
908 | # print(contract_no) | 965 | # print(contract_no) |
909 | self.init_result['page_6']['合同编号'] = contract_no | 966 | self.init_result['page_6']['合同编号'] = contract_no |
967 | |||
968 | if self.is_asp: | ||
910 | # Page 7 | 969 | # Page 7 |
911 | # 找合同编号 | 970 | # 找合同编号 |
912 | contract_no = self.get_contract_no(page_num='6') | 971 | contract_no = self.get_contract_no(page_num='6') |
... | @@ -944,6 +1003,41 @@ class Finder: | ... | @@ -944,6 +1003,41 @@ class Finder: |
944 | # print(signature_name, signature_date) | 1003 | # print(signature_name, signature_date) |
945 | self.init_result['page_8']['见证人签字']['签字'] = signature_name | 1004 | self.init_result['page_8']['见证人签字']['签字'] = signature_name |
946 | self.init_result['page_8']['见证人签字']['日期'] = signature_date | 1005 | self.init_result['page_8']['见证人签字']['日期'] = signature_date |
1006 | else: | ||
1007 | # Page 7 | ||
1008 | # 找合同编号 | ||
1009 | contract_no = self.get_contract_no(page_num='6') | ||
1010 | self.init_result['page_7']['合同编号'] = contract_no | ||
1011 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
1012 | top='合同编号', bottom='共同借款人') | ||
1013 | if signature_name['words'] == None: | ||
1014 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
1015 | top='合同编号', bottom='共同借款人(抵押人)') | ||
1016 | # print(signature_name, signature_date) | ||
1017 | self.init_result['page_7']['主借人签字']['签字'] = signature_name | ||
1018 | self.init_result['page_7']['主借人签字']['日期'] = signature_date | ||
1019 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
1020 | top='共同借款人', bottom='保证人1') | ||
1021 | if signature_name['words'] == None: | ||
1022 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
1023 | top='共同借款人(抵押人)', bottom='保证人1') | ||
1024 | # print(signature_name, signature_date) | ||
1025 | self.init_result['page_7']['共借人签字']['签字'] = signature_name | ||
1026 | self.init_result['page_7']['共借人签字']['日期'] = signature_date | ||
1027 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
1028 | top='保证人1', bottom='保证人2') | ||
1029 | self.init_result['page_7']['保证人1签字']['签字'] = signature_name | ||
1030 | self.init_result['page_7']['保证人1签字']['日期'] = signature_date | ||
1031 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
1032 | top='保证人2', bottom='在本人面前亲笔签署本合同') | ||
1033 | self.init_result['page_7']['保证人2签字']['签字'] = signature_name | ||
1034 | self.init_result['page_7']['保证人2签字']['日期'] = signature_date | ||
1035 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
1036 | top='在本人面前亲笔签署本合同', bottom='以下无正文') | ||
1037 | # print(signature_name, signature_date) | ||
1038 | self.init_result['page_7']['见证人签字']['签字'] = signature_name | ||
1039 | self.init_result['page_7']['见证人签字']['日期'] = signature_date | ||
1040 | |||
947 | 1041 | ||
948 | # 重新定制输出 | 1042 | # 重新定制输出 |
949 | new_results = {"is_asp": self.is_asp, | 1043 | new_results = {"is_asp": self.is_asp, | ... | ... |
This diff is collapsed.
Click to expand it.
... | @@ -6,6 +6,7 @@ class HMHRetriever: | ... | @@ -6,6 +6,7 @@ class HMHRetriever: |
6 | def __init__(self): | 6 | def __init__(self): |
7 | self.words_str = 'words' | 7 | self.words_str = 'words' |
8 | self.position_str = 'location' | 8 | self.position_str = 'location' |
9 | self.fix_hava_str = '有' | ||
9 | self.default_position = [0, 0, 0, 0] | 10 | self.default_position = [0, 0, 0, 0] |
10 | self.search_fields_list = [ | 11 | self.search_fields_list = [ |
11 | ('借款/承租人姓名', ''), | 12 | ('借款/承租人姓名', ''), |
... | @@ -51,7 +52,8 @@ class HMHRetriever: | ... | @@ -51,7 +52,8 @@ class HMHRetriever: |
51 | for name_date_tuple in name_date_list: | 52 | for name_date_tuple in name_date_list: |
52 | if len(name_date_tuple) == 2: | 53 | if len(name_date_tuple) == 2: |
53 | result[self.search_fields_list[4][0]] = { | 54 | result[self.search_fields_list[4][0]] = { |
54 | self.words_str: '{0} {1}'.format(name_date_tuple[0].replace('\u3000', '').strip(), name_date_tuple[1]), | 55 | # self.words_str: '{0} {1}'.format(name_date_tuple[0].replace('\u3000', '').strip(), name_date_tuple[1]), |
56 | self.words_str: self.fix_hava_str, | ||
55 | self.position_str: bbox | 57 | self.position_str: bbox |
56 | } | 58 | } |
57 | is_find_name_date = True | 59 | is_find_name_date = True | ... | ... |
-
Please register or sign in to post a comment