add ltgt bzll
Showing
2 changed files
with
38 additions
and
28 deletions
| ... | @@ -1434,6 +1434,7 @@ ASP_KEY = 'is_asp' | ... | @@ -1434,6 +1434,7 @@ ASP_KEY = 'is_asp' |
| 1434 | 1434 | ||
| 1435 | AFC_CON_MAP_LTGT = { | 1435 | AFC_CON_MAP_LTGT = { |
| 1436 | '合同编号': (2, 2, '合同编号', None), | 1436 | '合同编号': (2, 2, '合同编号', None), |
| 1437 | '标准利率': (2, 2, '标准利率', None), | ||
| 1437 | '借款人姓名': (2, 2, '借款人及抵押人', 'name'), | 1438 | '借款人姓名': (2, 2, '借款人及抵押人', 'name'), |
| 1438 | '共借人姓名': (2, 2, '共同借款人及共同抵押人', 'name'), | 1439 | '共借人姓名': (2, 2, '共同借款人及共同抵押人', 'name'), |
| 1439 | '保证人姓名1': (2, 2, '保证人1', 'name'), | 1440 | '保证人姓名1': (2, 2, '保证人1', 'name'), |
| ... | @@ -1446,6 +1447,7 @@ AFC_CON_FIELD_ORDER_LTGT = ( | ... | @@ -1446,6 +1447,7 @@ AFC_CON_FIELD_ORDER_LTGT = ( |
| 1446 | ('共借人姓名', '共借人姓名'), | 1447 | ('共借人姓名', '共借人姓名'), |
| 1447 | ('保证人姓名1', '保证人姓名1'), | 1448 | ('保证人姓名1', '保证人姓名1'), |
| 1448 | ('保证人姓名2', '保证人姓名2'), | 1449 | ('保证人姓名2', '保证人姓名2'), |
| 1450 | ('标准利率', '标准利率'), | ||
| 1449 | ) | 1451 | ) |
| 1450 | 1452 | ||
| 1451 | SE_AFC_CON_MAP = { | 1453 | SE_AFC_CON_MAP = { | ... | ... |
| ... | @@ -55,6 +55,7 @@ class Finder: | ... | @@ -55,6 +55,7 @@ class Finder: |
| 55 | "附加产品融资贷款本金总金额": self.item, | 55 | "附加产品融资贷款本金总金额": self.item, |
| 56 | }, | 56 | }, |
| 57 | "贷款期限": self.item, | 57 | "贷款期限": self.item, |
| 58 | "标准利率": self.item, | ||
| 58 | "还款账户": {"账号": self.item, | 59 | "还款账户": {"账号": self.item, |
| 59 | "户名": self.item, | 60 | "户名": self.item, |
| 60 | "开户行": self.item, | 61 | "开户行": self.item, |
| ... | @@ -228,18 +229,32 @@ class Finder: | ... | @@ -228,18 +229,32 @@ class Finder: |
| 228 | loan_term['words'] = words | 229 | loan_term['words'] = words |
| 229 | return loan_term | 230 | return loan_term |
| 230 | 231 | ||
| 232 | def get_standard_rate(self, page_num='0'): | ||
| 233 | standard_rate = self.item.copy() | ||
| 234 | for block in self.pdf_info[page_num]['blocks']: | ||
| 235 | if block['type'] != 0: | ||
| 236 | continue | ||
| 237 | for line in block['lines']: | ||
| 238 | for span in line['spans']: | ||
| 239 | bbox, text = span['bbox'], span['text'] | ||
| 240 | matchs = re.search(r'本合同当期的标准利率为(\S+)%/年', text) | ||
| 241 | if matchs: | ||
| 242 | standard_rate['position'] = bbox | ||
| 243 | standard_rate['words'] = matchs.group(1) | ||
| 244 | return standard_rate | ||
| 245 | |||
| 231 | def mergelist(self, text_list): | 246 | def mergelist(self, text_list): |
| 232 | pattern = re.compile("[^\u4e00-\u9fa5]") # 匹配不是中文的其他字符 | 247 | pattern = re.compile("[^\u4e00-\u9fa5]") # 匹配不是中文的其他字符 |
| 233 | mergeindex = -1 | 248 | mergeindex = -1 |
| 234 | for index, i in enumerate(text_list): | 249 | for index, i in enumerate(text_list): |
| 235 | if '所购' in i and len(pattern.sub('', pattern.sub('', text_list[index+1]))) != 0: | 250 | if '所购' in i and len(pattern.sub('', pattern.sub('', text_list[index + 1]))) != 0: |
| 236 | # if '所购' in i and '.00' not in text_list[index+1]: | 251 | # if '所购' in i and '.00' not in text_list[index+1]: |
| 237 | mergeindex = index | 252 | mergeindex = index |
| 238 | if mergeindex == -1: | 253 | if mergeindex == -1: |
| 239 | return text_list | 254 | return text_list |
| 240 | else: | 255 | else: |
| 241 | new_text_list = text_list[:mergeindex] + [ | 256 | new_text_list = text_list[:mergeindex] + [text_list[mergeindex] + text_list[mergeindex + 1]] + \ |
| 242 | text_list[mergeindex] + text_list[mergeindex+1]] + text_list[mergeindex+2:] | 257 | text_list[mergeindex + 2:] |
| 243 | return self.mergelist(new_text_list) | 258 | return self.mergelist(new_text_list) |
| 244 | 259 | ||
| 245 | def get_asp_details(self, page_num): | 260 | def get_asp_details(self, page_num): |
| ... | @@ -261,13 +276,13 @@ class Finder: | ... | @@ -261,13 +276,13 @@ class Finder: |
| 261 | # print(text) | 276 | # print(text) |
| 262 | asp_details_text_list.append(text) | 277 | asp_details_text_list.append(text) |
| 263 | asp_details_text_list = self.mergelist(asp_details_text_list) | 278 | asp_details_text_list = self.mergelist(asp_details_text_list) |
| 264 | for i in range((len(asp_details_text_list)+2)//3): | 279 | for i in range((len(asp_details_text_list) + 2) // 3): |
| 265 | line = [] | 280 | line = [] |
| 266 | if i == 0: | 281 | if i == 0: |
| 267 | line = [asp_details_text_list[0]] | 282 | line = [asp_details_text_list[0]] |
| 268 | else: | 283 | else: |
| 269 | for j in range(3): | 284 | for j in range(3): |
| 270 | line.append(asp_details_text_list[i*3-2+j]) | 285 | line.append(asp_details_text_list[i * 3 - 2 + j]) |
| 271 | asp_details_table.append(line) | 286 | asp_details_table.append(line) |
| 272 | if len(asp_details_table) > 0: | 287 | if len(asp_details_table) > 0: |
| 273 | asp_details_table_term['words'] = asp_details_table | 288 | asp_details_table_term['words'] = asp_details_table |
| ... | @@ -420,12 +435,12 @@ class Finder: | ... | @@ -420,12 +435,12 @@ class Finder: |
| 420 | table = False | 435 | table = False |
| 421 | if table == True: | 436 | if table == True: |
| 422 | repayment_schedule_text_list.append(text) | 437 | repayment_schedule_text_list.append(text) |
| 423 | for i in range(len(repayment_schedule_text_list)//5): | 438 | for i in range(len(repayment_schedule_text_list) // 5): |
| 424 | line = [] | 439 | line = [] |
| 425 | # 5表示5列的意思 | 440 | # 5表示5列的意思 |
| 426 | for j in range(5): | 441 | for j in range(5): |
| 427 | line.append(repayment_schedule_text_list[i*5+j]) | 442 | line.append(repayment_schedule_text_list[i * 5 + j]) |
| 428 | if str(i+1) == line[1]: | 443 | if str(i + 1) == line[1]: |
| 429 | break | 444 | break |
| 430 | repayment_schedule_table.append(line) | 445 | repayment_schedule_table.append(line) |
| 431 | if len(repayment_schedule_table) > 0: | 446 | if len(repayment_schedule_table) > 0: |
| ... | @@ -461,8 +476,7 @@ class Finder: | ... | @@ -461,8 +476,7 @@ class Finder: |
| 461 | else: | 476 | else: |
| 462 | words = '无' | 477 | words = '无' |
| 463 | boxes = np.array(boxes).reshape((-1, 2)) | 478 | boxes = np.array(boxes).reshape((-1, 2)) |
| 464 | position = [min(boxes[:, 0]), min(boxes[:, 1]), | 479 | position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])] |
| 465 | max(boxes[:, 0]), max(boxes[:, 1])] | ||
| 466 | signature_role_1['page_num'] = page_num | 480 | signature_role_1['page_num'] = page_num |
| 467 | signature_role_1['position'] = position | 481 | signature_role_1['position'] = position |
| 468 | signature_role_1['words'] = words | 482 | signature_role_1['words'] = words |
| ... | @@ -497,8 +511,7 @@ class Finder: | ... | @@ -497,8 +511,7 @@ class Finder: |
| 497 | else: | 511 | else: |
| 498 | words = '无' | 512 | words = '无' |
| 499 | boxes = np.array(boxes).reshape((-1, 2)) | 513 | boxes = np.array(boxes).reshape((-1, 2)) |
| 500 | position = [min(boxes[:, 0]), min(boxes[:, 1]), | 514 | position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])] |
| 501 | max(boxes[:, 0]), max(boxes[:, 1])] | ||
| 502 | signature_role_2['page_num'] = page_num | 515 | signature_role_2['page_num'] = page_num |
| 503 | signature_role_2['position'] = position | 516 | signature_role_2['position'] = position |
| 504 | signature_role_2['words'] = words | 517 | signature_role_2['words'] = words |
| ... | @@ -533,8 +546,7 @@ class Finder: | ... | @@ -533,8 +546,7 @@ class Finder: |
| 533 | else: | 546 | else: |
| 534 | words = '无' | 547 | words = '无' |
| 535 | boxes = np.array(boxes).reshape((-1, 2)) | 548 | boxes = np.array(boxes).reshape((-1, 2)) |
| 536 | position = [min(boxes[:, 0]), min(boxes[:, 1]), | 549 | position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])] |
| 537 | max(boxes[:, 0]), max(boxes[:, 1])] | ||
| 538 | signature_role_3['page_num'] = page_num | 550 | signature_role_3['page_num'] = page_num |
| 539 | signature_role_3['position'] = position | 551 | signature_role_3['position'] = position |
| 540 | signature_role_3['words'] = words | 552 | signature_role_3['words'] = words |
| ... | @@ -569,8 +581,7 @@ class Finder: | ... | @@ -569,8 +581,7 @@ class Finder: |
| 569 | else: | 581 | else: |
| 570 | words = '无' | 582 | words = '无' |
| 571 | boxes = np.array(boxes).reshape((-1, 2)) | 583 | boxes = np.array(boxes).reshape((-1, 2)) |
| 572 | position = [min(boxes[:, 0]), min(boxes[:, 1]), | 584 | position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])] |
| 573 | max(boxes[:, 0]), max(boxes[:, 1])] | ||
| 574 | signature_role_4['page_num'] = page_num | 585 | signature_role_4['page_num'] = page_num |
| 575 | signature_role_4['position'] = position | 586 | signature_role_4['position'] = position |
| 576 | signature_role_4['words'] = words | 587 | signature_role_4['words'] = words |
| ... | @@ -606,8 +617,7 @@ class Finder: | ... | @@ -606,8 +617,7 @@ class Finder: |
| 606 | else: | 617 | else: |
| 607 | words = '无' | 618 | words = '无' |
| 608 | boxes = np.array(boxes).reshape((-1, 2)) | 619 | boxes = np.array(boxes).reshape((-1, 2)) |
| 609 | position = [min(boxes[:, 0]), min(boxes[:, 1]), | 620 | position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])] |
| 610 | max(boxes[:, 0]), max(boxes[:, 1])] | ||
| 611 | signature_role_5['page_num'] = page_num | 621 | signature_role_5['page_num'] = page_num |
| 612 | signature_role_5['position'] = position | 622 | signature_role_5['position'] = position |
| 613 | signature_role_5['words'] = words | 623 | signature_role_5['words'] = words |
| ... | @@ -699,27 +709,22 @@ class Finder: | ... | @@ -699,27 +709,22 @@ class Finder: |
| 699 | contract_no = self.get_contract_no(page_num='0') | 709 | contract_no = self.get_contract_no(page_num='0') |
| 700 | self.init_result['page_2']['合同编号'] = contract_no | 710 | self.init_result['page_2']['合同编号'] = contract_no |
| 701 | # 找借款人及抵押人(地址字段原本有空格) | 711 | # 找借款人及抵押人(地址字段原本有空格) |
| 702 | borrower_name, borrower_id = self.get_somebody( | 712 | borrower_name, borrower_id = self.get_somebody(top='借款人及抵押人:', bottom='共同借款人:') |
| 703 | top='借款人及抵押人:', bottom='共同借款人:') | ||
| 704 | # 这是为了同时兼容 8.1 版本 | 713 | # 这是为了同时兼容 8.1 版本 |
| 705 | if borrower_name['words'] == None: | 714 | if borrower_name['words'] == None: |
| 706 | borrower_name, borrower_id = self.get_somebody( | 715 | borrower_name, borrower_id = self.get_somebody(top='借款人及抵押人:', bottom='共同借款人及共同抵押人:') |
| 707 | top='借款人及抵押人:', bottom='共同借款人及共同抵押人:') | ||
| 708 | self.init_result['page_2']['借款人及抵押人']['name'] = borrower_name | 716 | self.init_result['page_2']['借款人及抵押人']['name'] = borrower_name |
| 709 | self.init_result['page_2']['借款人及抵押人']['id'] = borrower_id | 717 | self.init_result['page_2']['借款人及抵押人']['id'] = borrower_id |
| 710 | # 找共同借款人及共同抵押人 | 718 | # 找共同借款人及共同抵押人 |
| 711 | co_borrower_name, co_borrower_id = self.get_somebody( | 719 | co_borrower_name, co_borrower_id = self.get_somebody(top='共同借款人:', bottom='保证人1:') |
| 712 | top='共同借款人:', bottom='保证人1:') | ||
| 713 | self.init_result['page_2']['共同借款人及共同抵押人']['name'] = co_borrower_name | 720 | self.init_result['page_2']['共同借款人及共同抵押人']['name'] = co_borrower_name |
| 714 | self.init_result['page_2']['共同借款人及共同抵押人']['id'] = co_borrower_id | 721 | self.init_result['page_2']['共同借款人及共同抵押人']['id'] = co_borrower_id |
| 715 | # 保证人1 | 722 | # 保证人1 |
| 716 | first_guarantor_name, first_guarantor_id = self.get_somebody( | 723 | first_guarantor_name, first_guarantor_id = self.get_somebody(top='保证人1:', bottom='保证人2:') |
| 717 | top='保证人1:', bottom='保证人2:') | ||
| 718 | self.init_result['page_2']['保证人1']['name'] = first_guarantor_name | 724 | self.init_result['page_2']['保证人1']['name'] = first_guarantor_name |
| 719 | self.init_result['page_2']['保证人1']['id'] = first_guarantor_id | 725 | self.init_result['page_2']['保证人1']['id'] = first_guarantor_id |
| 720 | # 保证人2 | 726 | # 保证人2 |
| 721 | second_guarantor_name, second_guarantor_id = self.get_somebody( | 727 | second_guarantor_name, second_guarantor_id = self.get_somebody(top='保证人2:', bottom='第一章') |
| 722 | top='保证人2:', bottom='第一章') | ||
| 723 | self.init_result['page_2']['保证人2']['name'] = second_guarantor_name | 728 | self.init_result['page_2']['保证人2']['name'] = second_guarantor_name |
| 724 | self.init_result['page_2']['保证人2']['id'] = second_guarantor_id | 729 | self.init_result['page_2']['保证人2']['id'] = second_guarantor_id |
| 725 | # 所购车辆价格 | 730 | # 所购车辆价格 |
| ... | @@ -740,6 +745,9 @@ class Finder: | ... | @@ -740,6 +745,9 @@ class Finder: |
| 740 | # 贷款期限 | 745 | # 贷款期限 |
| 741 | loan_term = self.get_loan_term(page_num='1') | 746 | loan_term = self.get_loan_term(page_num='1') |
| 742 | self.init_result['page_2']['贷款期限'] = loan_term | 747 | self.init_result['page_2']['贷款期限'] = loan_term |
| 748 | # 本合同当期的标准利率 | ||
| 749 | standard_rate = self.get_standard_rate(page_num='1') | ||
| 750 | self.init_result['page_2']['标准利率'] = standard_rate | ||
| 743 | # 还款账户 | 751 | # 还款账户 |
| 744 | account, account_name, account_bank = self.get_payback_account() | 752 | account, account_name, account_bank = self.get_payback_account() |
| 745 | self.init_result['page_2']['还款账户']['账号'] = account | 753 | self.init_result['page_2']['还款账户']['账号'] = account | ... | ... |
-
Please register or sign in to post a comment