Merge branch 'feature/fsm-contract' into feature/uat-tmp
Showing
3 changed files
with
97 additions
and
1 deletions
| ... | @@ -62,6 +62,7 @@ class Finder: | ... | @@ -62,6 +62,7 @@ class Finder: |
| 62 | }, | 62 | }, |
| 63 | "page_3": {"合同编号": self.item, | 63 | "page_3": {"合同编号": self.item, |
| 64 | "还款计划表": self.item, | 64 | "还款计划表": self.item, |
| 65 | "车辆代理商": self.item, | ||
| 65 | }, | 66 | }, |
| 66 | "page_4": {"合同编号": self.item, | 67 | "page_4": {"合同编号": self.item, |
| 67 | "附加产品融资贷款本金总金额明细": self.item, | 68 | "附加产品融资贷款本金总金额明细": self.item, |
| ... | @@ -71,6 +72,7 @@ class Finder: | ... | @@ -71,6 +72,7 @@ class Finder: |
| 71 | "page_6": {"合同编号": self.item, | 72 | "page_6": {"合同编号": self.item, |
| 72 | }, | 73 | }, |
| 73 | } | 74 | } |
| 75 | if self.is_asp: | ||
| 74 | self.init_result["page_7"] = {"合同编号": self.item, | 76 | self.init_result["page_7"] = {"合同编号": self.item, |
| 75 | } | 77 | } |
| 76 | self.init_result["page_8"] = {"合同编号": self.item, | 78 | self.init_result["page_8"] = {"合同编号": self.item, |
| ... | @@ -90,6 +92,25 @@ class Finder: | ... | @@ -90,6 +92,25 @@ class Finder: |
| 90 | "日期": self.item, | 92 | "日期": self.item, |
| 91 | }, | 93 | }, |
| 92 | } | 94 | } |
| 95 | else: | ||
| 96 | self.init_result["page_7"] = {"合同编号": self.item, | ||
| 97 | "主借人签字": {"签字": self.item, | ||
| 98 | "日期": self.item, | ||
| 99 | }, | ||
| 100 | "共借人签字": {"签字": self.item, | ||
| 101 | "日期": self.item, | ||
| 102 | }, | ||
| 103 | "保证人1签字": {"签字": self.item, | ||
| 104 | "日期": self.item, | ||
| 105 | }, | ||
| 106 | "保证人2签字": {"签字": self.item, | ||
| 107 | "日期": self.item, | ||
| 108 | }, | ||
| 109 | "见证人签字": {"签字": self.item, | ||
| 110 | "日期": self.item, | ||
| 111 | }, | ||
| 112 | } | ||
| 113 | |||
| 93 | 114 | ||
| 94 | def get_top_iou(self, poly, ocr_result): | 115 | def get_top_iou(self, poly, ocr_result): |
| 95 | """传入一个多边形, 找到与之最匹配的多边形 | 116 | """传入一个多边形, 找到与之最匹配的多边形 |
| ... | @@ -398,6 +419,39 @@ class Finder: | ... | @@ -398,6 +419,39 @@ class Finder: |
| 398 | seller['words'] = text | 419 | seller['words'] = text |
| 399 | return seller | 420 | return seller |
| 400 | 421 | ||
| 422 | def get_cldls(self): | ||
| 423 | seller = self.item.copy() | ||
| 424 | # 先找到 key | ||
| 425 | anchor_bbox = None | ||
| 426 | for block in self.pdf_info['2']['blocks']: | ||
| 427 | if anchor_bbox is not None: | ||
| 428 | break | ||
| 429 | if block['type'] != 0: | ||
| 430 | continue | ||
| 431 | for line in block['lines']: | ||
| 432 | if anchor_bbox is not None: | ||
| 433 | break | ||
| 434 | for span in line['spans']: | ||
| 435 | bbox, text = span['bbox'], span['text'] | ||
| 436 | if text.strip() == '车辆代理商': | ||
| 437 | anchor_bbox = bbox | ||
| 438 | # print(anchor_bbox) | ||
| 439 | # 当找到了 key, 则根据 key 去匹配 value | ||
| 440 | if anchor_bbox: | ||
| 441 | half_width = self.pdf_info['2']['width'] * 0.5 | ||
| 442 | for block in self.pdf_info['2']['blocks']: | ||
| 443 | if block['type'] != 0: | ||
| 444 | continue | ||
| 445 | for line in block['lines']: | ||
| 446 | for span in line['spans']: | ||
| 447 | bbox, text = span['bbox'], span['text'] | ||
| 448 | if anchor_bbox[2]<np.mean(bbox[::2])<half_width and \ | ||
| 449 | anchor_bbox[1]<np.mean(bbox[1::2])<anchor_bbox[3]: | ||
| 450 | seller['position'] = bbox | ||
| 451 | seller['words'] = text | ||
| 452 | return seller | ||
| 453 | return seller | ||
| 454 | |||
| 401 | def get_borrower_collection_account(self): | 455 | def get_borrower_collection_account(self): |
| 402 | account = self.item.copy() | 456 | account = self.item.copy() |
| 403 | account_name = self.item.copy() | 457 | account_name = self.item.copy() |
| ... | @@ -885,6 +939,9 @@ class Finder: | ... | @@ -885,6 +939,9 @@ class Finder: |
| 885 | repayment_schedule_table = self.get_repayment_schedule() | 939 | repayment_schedule_table = self.get_repayment_schedule() |
| 886 | # print(repayment_schedule_table) | 940 | # print(repayment_schedule_table) |
| 887 | self.init_result['page_3']['还款计划表'] = repayment_schedule_table | 941 | self.init_result['page_3']['还款计划表'] = repayment_schedule_table |
| 942 | # 车辆代理商 | ||
| 943 | cldls = self.get_cldls() | ||
| 944 | self.init_result['page_3']['车辆代理商'] = cldls | ||
| 888 | ####################################### | 945 | ####################################### |
| 889 | # Page 4 | 946 | # Page 4 |
| 890 | # 找合同编号 | 947 | # 找合同编号 |
| ... | @@ -907,6 +964,8 @@ class Finder: | ... | @@ -907,6 +964,8 @@ class Finder: |
| 907 | contract_no = self.get_contract_no(page_num='5') | 964 | contract_no = self.get_contract_no(page_num='5') |
| 908 | # print(contract_no) | 965 | # print(contract_no) |
| 909 | self.init_result['page_6']['合同编号'] = contract_no | 966 | self.init_result['page_6']['合同编号'] = contract_no |
| 967 | |||
| 968 | if self.is_asp: | ||
| 910 | # Page 7 | 969 | # Page 7 |
| 911 | # 找合同编号 | 970 | # 找合同编号 |
| 912 | contract_no = self.get_contract_no(page_num='6') | 971 | contract_no = self.get_contract_no(page_num='6') |
| ... | @@ -944,6 +1003,41 @@ class Finder: | ... | @@ -944,6 +1003,41 @@ class Finder: |
| 944 | # print(signature_name, signature_date) | 1003 | # print(signature_name, signature_date) |
| 945 | self.init_result['page_8']['见证人签字']['签字'] = signature_name | 1004 | self.init_result['page_8']['见证人签字']['签字'] = signature_name |
| 946 | self.init_result['page_8']['见证人签字']['日期'] = signature_date | 1005 | self.init_result['page_8']['见证人签字']['日期'] = signature_date |
| 1006 | else: | ||
| 1007 | # Page 7 | ||
| 1008 | # 找合同编号 | ||
| 1009 | contract_no = self.get_contract_no(page_num='6') | ||
| 1010 | self.init_result['page_7']['合同编号'] = contract_no | ||
| 1011 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
| 1012 | top='合同编号', bottom='共同借款人') | ||
| 1013 | if signature_name['words'] == None: | ||
| 1014 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
| 1015 | top='合同编号', bottom='共同借款人(抵押人)') | ||
| 1016 | # print(signature_name, signature_date) | ||
| 1017 | self.init_result['page_7']['主借人签字']['签字'] = signature_name | ||
| 1018 | self.init_result['page_7']['主借人签字']['日期'] = signature_date | ||
| 1019 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
| 1020 | top='共同借款人', bottom='保证人1') | ||
| 1021 | if signature_name['words'] == None: | ||
| 1022 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
| 1023 | top='共同借款人(抵押人)', bottom='保证人1') | ||
| 1024 | # print(signature_name, signature_date) | ||
| 1025 | self.init_result['page_7']['共借人签字']['签字'] = signature_name | ||
| 1026 | self.init_result['page_7']['共借人签字']['日期'] = signature_date | ||
| 1027 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
| 1028 | top='保证人1', bottom='保证人2') | ||
| 1029 | self.init_result['page_7']['保证人1签字']['签字'] = signature_name | ||
| 1030 | self.init_result['page_7']['保证人1签字']['日期'] = signature_date | ||
| 1031 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
| 1032 | top='保证人2', bottom='在本人面前亲笔签署本合同') | ||
| 1033 | self.init_result['page_7']['保证人2签字']['签字'] = signature_name | ||
| 1034 | self.init_result['page_7']['保证人2签字']['日期'] = signature_date | ||
| 1035 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
| 1036 | top='在本人面前亲笔签署本合同', bottom='以下无正文') | ||
| 1037 | # print(signature_name, signature_date) | ||
| 1038 | self.init_result['page_7']['见证人签字']['签字'] = signature_name | ||
| 1039 | self.init_result['page_7']['见证人签字']['日期'] = signature_date | ||
| 1040 | |||
| 947 | 1041 | ||
| 948 | # 重新定制输出 | 1042 | # 重新定制输出 |
| 949 | new_results = {"is_asp": self.is_asp, | 1043 | new_results = {"is_asp": self.is_asp, | ... | ... |
This diff is collapsed.
Click to expand it.
| ... | @@ -6,6 +6,7 @@ class HMHRetriever: | ... | @@ -6,6 +6,7 @@ class HMHRetriever: |
| 6 | def __init__(self): | 6 | def __init__(self): |
| 7 | self.words_str = 'words' | 7 | self.words_str = 'words' |
| 8 | self.position_str = 'location' | 8 | self.position_str = 'location' |
| 9 | self.fix_hava_str = '有' | ||
| 9 | self.default_position = [0, 0, 0, 0] | 10 | self.default_position = [0, 0, 0, 0] |
| 10 | self.search_fields_list = [ | 11 | self.search_fields_list = [ |
| 11 | ('借款/承租人姓名', ''), | 12 | ('借款/承租人姓名', ''), |
| ... | @@ -51,7 +52,8 @@ class HMHRetriever: | ... | @@ -51,7 +52,8 @@ class HMHRetriever: |
| 51 | for name_date_tuple in name_date_list: | 52 | for name_date_tuple in name_date_list: |
| 52 | if len(name_date_tuple) == 2: | 53 | if len(name_date_tuple) == 2: |
| 53 | result[self.search_fields_list[4][0]] = { | 54 | result[self.search_fields_list[4][0]] = { |
| 54 | self.words_str: '{0} {1}'.format(name_date_tuple[0].replace('\u3000', '').strip(), name_date_tuple[1]), | 55 | # self.words_str: '{0} {1}'.format(name_date_tuple[0].replace('\u3000', '').strip(), name_date_tuple[1]), |
| 56 | self.words_str: self.fix_hava_str, | ||
| 55 | self.position_str: bbox | 57 | self.position_str: bbox |
| 56 | } | 58 | } |
| 57 | is_find_name_date = True | 59 | is_find_name_date = True | ... | ... |
-
Please register or sign in to post a comment