KWOM_July
Showing
3 changed files
with
183 additions
and
92 deletions
| ... | @@ -25,6 +25,15 @@ def extract_info(ocr_results): | ... | @@ -25,6 +25,15 @@ def extract_info(ocr_results): |
| 25 | 25 | ||
| 26 | 26 | ||
| 27 | def predict(pdf_info, is_qrs=False, is_fsm=False): | 27 | def predict(pdf_info, is_qrs=False, is_fsm=False): |
| 28 | pop_seceond_page_info = {} | ||
| 29 | if not is_fsm and not is_qrs and len(pdf_info) == 9: | ||
| 30 | pop_seceond_page_info = pdf_info.pop('1', {}) | ||
| 31 | for pno in range(8): | ||
| 32 | if pno == 0: | ||
| 33 | pdf_info[str(pno)]['blocks'].extend(pop_seceond_page_info.get('blocks', [])) | ||
| 34 | else: | ||
| 35 | pdf_info[str(pno)] = pdf_info.pop(str(pno+1)) | ||
| 36 | |||
| 28 | ocr_results = {} | 37 | ocr_results = {} |
| 29 | for pno in pdf_info: | 38 | for pno in pdf_info: |
| 30 | ocr_results[pno] = {} | 39 | ocr_results[pno] = {} | ... | ... |
| ... | @@ -13,6 +13,7 @@ class Finder: | ... | @@ -13,6 +13,7 @@ class Finder: |
| 13 | self.item = {"words": None, | 13 | self.item = {"words": None, |
| 14 | "position": None, | 14 | "position": None, |
| 15 | } | 15 | } |
| 16 | self.cn_re = re.compile(u'[\u4e00-\u9fa5]') | ||
| 16 | 17 | ||
| 17 | def gen_init_result(self, is_asp): | 18 | def gen_init_result(self, is_asp): |
| 18 | # 格式化算法输出 | 19 | # 格式化算法输出 |
| ... | @@ -187,6 +188,11 @@ class Finder: | ... | @@ -187,6 +188,11 @@ class Finder: |
| 187 | vin['position'] = location | 188 | vin['position'] = location |
| 188 | return vin | 189 | return vin |
| 189 | 190 | ||
| 191 | def cn_char_filter(self, src_str): | ||
| 192 | cn_chars = re.findall(self.cn_re, src_str) | ||
| 193 | cn_str = ''.join(cn_chars) | ||
| 194 | return cn_str | ||
| 195 | |||
| 190 | def get_loan_principal(self, page_num='0'): | 196 | def get_loan_principal(self, page_num='0'): |
| 191 | chinese_keywords = ['壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖', '拾', | 197 | chinese_keywords = ['壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖', '拾', |
| 192 | '佰', '仟', '万', '亿', '元', '角', '分', '零', '整'] | 198 | '佰', '仟', '万', '亿', '元', '角', '分', '零', '整'] |
| ... | @@ -201,7 +207,7 @@ class Finder: | ... | @@ -201,7 +207,7 @@ class Finder: |
| 201 | for line in block['lines']: | 207 | for line in block['lines']: |
| 202 | for span in line['spans']: | 208 | for span in line['spans']: |
| 203 | bbox, text = span['bbox'], span['text'] | 209 | bbox, text = span['bbox'], span['text'] |
| 204 | if fuzz.ratio(''.join(chinese_keywords), text) > 15: | 210 | if fuzz.ratio(''.join(chinese_keywords), self.cn_char_filter(text)) >= 10: |
| 205 | text = text.split(':')[-1].strip() | 211 | text = text.split(':')[-1].strip() |
| 206 | upper['position'] = bbox | 212 | upper['position'] = bbox |
| 207 | upper['words'] = text | 213 | upper['words'] = text | ... | ... |
| ... | @@ -3,7 +3,7 @@ | ... | @@ -3,7 +3,7 @@ |
| 3 | # @Email : 9428.al@gmail.com | 3 | # @Email : 9428.al@gmail.com |
| 4 | # @Create Date : 2021-07-20 16:42:41 | 4 | # @Create Date : 2021-07-20 16:42:41 |
| 5 | # @Last Modified : 2021-10-28 17:41:00 | 5 | # @Last Modified : 2021-10-28 17:41:00 |
| 6 | # @Description : | 6 | # @Description : |
| 7 | 7 | ||
| 8 | import re | 8 | import re |
| 9 | import numpy as np | 9 | import numpy as np |
| ... | @@ -21,7 +21,7 @@ def caculate_iou(g, p): | ... | @@ -21,7 +21,7 @@ def caculate_iou(g, p): |
| 21 | 21 | ||
| 22 | def get_table_info(bbox_1, bbox_2, ocr_result): | 22 | def get_table_info(bbox_1, bbox_2, ocr_result): |
| 23 | anchor = [bbox_2[0], bbox_1[1], bbox_2[2], bbox_1[3], | 23 | anchor = [bbox_2[0], bbox_1[1], bbox_2[2], bbox_1[3], |
| 24 | bbox_2[4], bbox_1[5], bbox_2[6], bbox_1[7]] | 24 | bbox_2[4], bbox_1[5], bbox_2[6], bbox_1[7]] |
| 25 | table_info = '' | 25 | table_info = '' |
| 26 | for span in ocr_result: | 26 | for span in ocr_result: |
| 27 | iou = caculate_iou(anchor, span[0]) | 27 | iou = caculate_iou(anchor, span[0]) |
| ... | @@ -31,6 +31,7 @@ def get_table_info(bbox_1, bbox_2, ocr_result): | ... | @@ -31,6 +31,7 @@ def get_table_info(bbox_1, bbox_2, ocr_result): |
| 31 | 31 | ||
| 32 | 32 | ||
| 33 | class Finder: | 33 | class Finder: |
| 34 | |||
| 34 | def __init__(self, pdf_info): | 35 | def __init__(self, pdf_info): |
| 35 | self.pdf_info = pdf_info | 36 | self.pdf_info = pdf_info |
| 36 | self.item = {"words": None, | 37 | self.item = {"words": None, |
| ... | @@ -42,12 +43,15 @@ class Finder: | ... | @@ -42,12 +43,15 @@ class Finder: |
| 42 | "承租人-姓名": self.item, | 43 | "承租人-姓名": self.item, |
| 43 | "承租人-证件号码": self.item, | 44 | "承租人-证件号码": self.item, |
| 44 | "承租人-法定代表人或授权代表": self.item, | 45 | "承租人-法定代表人或授权代表": self.item, |
| 46 | |||
| 45 | "共同承租人-姓名": self.item, | 47 | "共同承租人-姓名": self.item, |
| 46 | "共同承租人-证件号码": self.item, | 48 | "共同承租人-证件号码": self.item, |
| 47 | "共同承租人-法定代表人或授权代表": self.item, | 49 | "共同承租人-法定代表人或授权代表": self.item, |
| 50 | |||
| 48 | "保证人1-姓名": self.item, | 51 | "保证人1-姓名": self.item, |
| 49 | "保证人1-证件号码": self.item, | 52 | "保证人1-证件号码": self.item, |
| 50 | "保证人1-法定代表人或授权代表": self.item, | 53 | "保证人1-法定代表人或授权代表": self.item, |
| 54 | |||
| 51 | "保证人2-姓名": self.item, | 55 | "保证人2-姓名": self.item, |
| 52 | "保证人2-证件号码": self.item, | 56 | "保证人2-证件号码": self.item, |
| 53 | "保证人2-法定代表人或授权代表": self.item, | 57 | "保证人2-法定代表人或授权代表": self.item, |
| ... | @@ -67,15 +71,19 @@ class Finder: | ... | @@ -67,15 +71,19 @@ class Finder: |
| 67 | "银行账户-开户行": self.item, | 71 | "银行账户-开户行": self.item, |
| 68 | "签字页-承租人姓名": self.item, | 72 | "签字页-承租人姓名": self.item, |
| 69 | "签字页-承租人签章": self.item, | 73 | "签字页-承租人签章": self.item, |
| 74 | |||
| 70 | "签字页-共同承租人姓名": self.item, | 75 | "签字页-共同承租人姓名": self.item, |
| 71 | "签字页-共同承租人签章": self.item, | 76 | "签字页-共同承租人签章": self.item, |
| 77 | |||
| 72 | "签字页-保证人1姓名": self.item, | 78 | "签字页-保证人1姓名": self.item, |
| 73 | "签字页-保证人1签章": self.item, | 79 | "签字页-保证人1签章": self.item, |
| 80 | |||
| 74 | "签字页-保证人2姓名": self.item, | 81 | "签字页-保证人2姓名": self.item, |
| 75 | "签字页-保证人2签章": self.item, | 82 | "签字页-保证人2签章": self.item, |
| 76 | "签字页-保证人3姓名": self.item, | 83 | "签字页-保证人3姓名": self.item, |
| 77 | "签字页-保证人3签章": self.item, | 84 | "签字页-保证人3签章": self.item, |
| 78 | } | 85 | } |
| 86 | |||
| 79 | # 格式化输出 车辆处置协议 要是别的字段 | 87 | # 格式化输出 车辆处置协议 要是别的字段 |
| 80 | self.init_result_1 = {"合同编号": self.item, | 88 | self.init_result_1 = {"合同编号": self.item, |
| 81 | "承租人-姓名": self.item, | 89 | "承租人-姓名": self.item, |
| ... | @@ -88,6 +96,7 @@ class Finder: | ... | @@ -88,6 +96,7 @@ class Finder: |
| 88 | "签字页-销售经销商": self.item, | 96 | "签字页-销售经销商": self.item, |
| 89 | "签字页-销售经销商签章": self.item, | 97 | "签字页-销售经销商签章": self.item, |
| 90 | } | 98 | } |
| 99 | |||
| 91 | # 格式化输出 车辆租赁抵押合同 | 100 | # 格式化输出 车辆租赁抵押合同 |
| 92 | self.init_result_2 = {"合同编号": self.item, | 101 | self.init_result_2 = {"合同编号": self.item, |
| 93 | "合同编号(正文)": self.item, | 102 | "合同编号(正文)": self.item, |
| ... | @@ -108,10 +117,10 @@ class Finder: | ... | @@ -108,10 +117,10 @@ class Finder: |
| 108 | """传入页码,查看该页码右上角的编号 | 117 | """传入页码,查看该页码右上角的编号 |
| 109 | 118 | ||
| 110 | Args: | 119 | Args: |
| 111 | page_num (string): | 120 | page_num (string): |
| 112 | 121 | ||
| 113 | Returns: | 122 | Returns: |
| 114 | sting: | 123 | sting: |
| 115 | """ | 124 | """ |
| 116 | contract_no = self.item.copy() | 125 | contract_no = self.item.copy() |
| 117 | # 只看第一页 | 126 | # 只看第一页 |
| ... | @@ -174,6 +183,7 @@ class Finder: | ... | @@ -174,6 +183,7 @@ class Finder: |
| 174 | # contract_no['words'] = words | 183 | # contract_no['words'] = words |
| 175 | contract_no['words'] = re.sub("\s", "", words).replace(")", "") | 184 | contract_no['words'] = re.sub("\s", "", words).replace(")", "") |
| 176 | return contract_no | 185 | return contract_no |
| 186 | |||
| 177 | matchObj = re.search(r'编号为(.*?)的', all_text) | 187 | matchObj = re.search(r'编号为(.*?)的', all_text) |
| 178 | if matchObj: | 188 | if matchObj: |
| 179 | words = matchObj.group(1).strip() | 189 | words = matchObj.group(1).strip() |
| ... | @@ -182,6 +192,7 @@ class Finder: | ... | @@ -182,6 +192,7 @@ class Finder: |
| 182 | # contract_no['words'] = words | 192 | # contract_no['words'] = words |
| 183 | contract_no['words'] = re.sub("\s", "", words).replace(")", "") | 193 | contract_no['words'] = re.sub("\s", "", words).replace(")", "") |
| 184 | return contract_no | 194 | return contract_no |
| 195 | |||
| 185 | matchObj = re.search(r'编号为(.*?))的', all_text) | 196 | matchObj = re.search(r'编号为(.*?))的', all_text) |
| 186 | if matchObj: | 197 | if matchObj: |
| 187 | words = matchObj.group(1).strip() | 198 | words = matchObj.group(1).strip() |
| ... | @@ -291,6 +302,7 @@ class Finder: | ... | @@ -291,6 +302,7 @@ class Finder: |
| 291 | 302 | ||
| 292 | def get_asp_details(self, page_num): | 303 | def get_asp_details(self, page_num): |
| 293 | asp_details_table_term = self.item.copy() | 304 | asp_details_table_term = self.item.copy() |
| 305 | |||
| 294 | asp_details_table = [] | 306 | asp_details_table = [] |
| 295 | asp_details_text_list = [] | 307 | asp_details_text_list = [] |
| 296 | table = False | 308 | table = False |
| ... | @@ -306,20 +318,25 @@ class Finder: | ... | @@ -306,20 +318,25 @@ class Finder: |
| 306 | table = False | 318 | table = False |
| 307 | if table == True: | 319 | if table == True: |
| 308 | asp_details_text_list.append(text) | 320 | asp_details_text_list.append(text) |
| 309 | for i in range((len(asp_details_text_list) + 2) // 3): | 321 | |
| 322 | for i in range((len(asp_details_text_list)+2)//3): | ||
| 323 | |||
| 310 | line = [] | 324 | line = [] |
| 311 | if i == 0: | 325 | if i == 0: |
| 312 | line = [asp_details_text_list[0]] | 326 | line = [asp_details_text_list[0]] |
| 313 | else: | 327 | else: |
| 314 | for j in range(3): | 328 | for j in range(3): |
| 315 | line.append(asp_details_text_list[i * 3 - 2 + j]) | 329 | line.append(asp_details_text_list[i*3-2+j]) |
| 330 | |||
| 316 | asp_details_table.append(line) | 331 | asp_details_table.append(line) |
| 332 | |||
| 317 | if len(asp_details_table) > 0: | 333 | if len(asp_details_table) > 0: |
| 318 | asp_details_table_term['words'] = asp_details_table | 334 | asp_details_table_term['words'] = asp_details_table |
| 319 | return asp_details_table_term | 335 | return asp_details_table_term |
| 320 | 336 | ||
| 321 | def get_signature(self): | 337 | def get_signature(self): |
| 322 | signature = self.item.copy() | 338 | signature = self.item.copy() |
| 339 | |||
| 323 | for block in self.pdf_info['0']['blocks']: | 340 | for block in self.pdf_info['0']['blocks']: |
| 324 | if block['type'] != 0: | 341 | if block['type'] != 0: |
| 325 | continue | 342 | continue |
| ... | @@ -448,6 +465,7 @@ class Finder: | ... | @@ -448,6 +465,7 @@ class Finder: |
| 448 | 465 | ||
| 449 | def get_repayment_schedule(self): | 466 | def get_repayment_schedule(self): |
| 450 | repayment_schedule = self.item.copy() | 467 | repayment_schedule = self.item.copy() |
| 468 | |||
| 451 | repayment_schedule_text_list = [] | 469 | repayment_schedule_text_list = [] |
| 452 | table = False | 470 | table = False |
| 453 | page = None | 471 | page = None |
| ... | @@ -475,6 +493,7 @@ class Finder: | ... | @@ -475,6 +493,7 @@ class Finder: |
| 475 | if not left < bbox[0] < right: | 493 | if not left < bbox[0] < right: |
| 476 | continue | 494 | continue |
| 477 | repayment_schedule_text_list.append(text) | 495 | repayment_schedule_text_list.append(text) |
| 496 | |||
| 478 | if text.strip() == "61.": | 497 | if text.strip() == "61.": |
| 479 | page = pno | 498 | page = pno |
| 480 | table = True | 499 | table = True |
| ... | @@ -482,14 +501,17 @@ class Finder: | ... | @@ -482,14 +501,17 @@ class Finder: |
| 482 | # print("repayment_schedule_text_list = ", repayment_schedule_text_list) | 501 | # print("repayment_schedule_text_list = ", repayment_schedule_text_list) |
| 483 | # repayment_schedule_table = [['序号', '融资租赁成本', '融资租赁费用', '租金', '剩余融资租赁成本']] | 502 | # repayment_schedule_table = [['序号', '融资租赁成本', '融资租赁费用', '租金', '剩余融资租赁成本']] |
| 484 | repayment_schedule_table = [['序号', '租金']] | 503 | repayment_schedule_table = [['序号', '租金']] |
| 485 | for i in range(len(repayment_schedule_text_list) // 4): | 504 | for i in range(len(repayment_schedule_text_list)//4): |
| 486 | line = [f'{i + 1}.'] | 505 | line = [f'{i+1}.'] |
| 487 | # 4表示4列的意思 | 506 | # 4表示4列的意思 |
| 488 | for j in range(4): | 507 | for j in range(4): |
| 489 | line.append(repayment_schedule_text_list[i * 4 + j]) | 508 | line.append(repayment_schedule_text_list[i*4+j]) |
| 509 | |||
| 490 | # 只保留序号和租金列 | 510 | # 只保留序号和租金列 |
| 491 | line = [line[0].replace('.', ''), line[3]] | 511 | line = [line[0].replace('.', ''), line[3]] |
| 512 | |||
| 492 | repayment_schedule_table.append(line) | 513 | repayment_schedule_table.append(line) |
| 514 | |||
| 493 | repayment_schedule['words'] = repayment_schedule_table | 515 | repayment_schedule['words'] = repayment_schedule_table |
| 494 | repayment_schedule['page'] = page | 516 | repayment_schedule['page'] = page |
| 495 | return repayment_schedule | 517 | return repayment_schedule |
| ... | @@ -538,7 +560,8 @@ class Finder: | ... | @@ -538,7 +560,8 @@ class Finder: |
| 538 | else: | 560 | else: |
| 539 | words = '无' | 561 | words = '无' |
| 540 | boxes = np.array(boxes).reshape((-1, 2)) | 562 | boxes = np.array(boxes).reshape((-1, 2)) |
| 541 | position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])] | 563 | position = [min(boxes[:, 0]), min(boxes[:, 1]), |
| 564 | max(boxes[:, 0]), max(boxes[:, 1])] | ||
| 542 | signature_role_2['page_num'] = page_num | 565 | signature_role_2['page_num'] = page_num |
| 543 | signature_role_2['position'] = position | 566 | signature_role_2['position'] = position |
| 544 | signature_role_2['words'] = words | 567 | signature_role_2['words'] = words |
| ... | @@ -573,7 +596,8 @@ class Finder: | ... | @@ -573,7 +596,8 @@ class Finder: |
| 573 | else: | 596 | else: |
| 574 | words = '无' | 597 | words = '无' |
| 575 | boxes = np.array(boxes).reshape((-1, 2)) | 598 | boxes = np.array(boxes).reshape((-1, 2)) |
| 576 | position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])] | 599 | position = [min(boxes[:, 0]), min(boxes[:, 1]), |
| 600 | max(boxes[:, 0]), max(boxes[:, 1])] | ||
| 577 | signature_role_3['page_num'] = page_num | 601 | signature_role_3['page_num'] = page_num |
| 578 | signature_role_3['position'] = position | 602 | signature_role_3['position'] = position |
| 579 | signature_role_3['words'] = words | 603 | signature_role_3['words'] = words |
| ... | @@ -608,7 +632,8 @@ class Finder: | ... | @@ -608,7 +632,8 @@ class Finder: |
| 608 | else: | 632 | else: |
| 609 | words = '无' | 633 | words = '无' |
| 610 | boxes = np.array(boxes).reshape((-1, 2)) | 634 | boxes = np.array(boxes).reshape((-1, 2)) |
| 611 | position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])] | 635 | position = [min(boxes[:, 0]), min(boxes[:, 1]), |
| 636 | max(boxes[:, 0]), max(boxes[:, 1])] | ||
| 612 | signature_role_4['page_num'] = page_num | 637 | signature_role_4['page_num'] = page_num |
| 613 | signature_role_4['position'] = position | 638 | signature_role_4['position'] = position |
| 614 | signature_role_4['words'] = words | 639 | signature_role_4['words'] = words |
| ... | @@ -644,7 +669,8 @@ class Finder: | ... | @@ -644,7 +669,8 @@ class Finder: |
| 644 | else: | 669 | else: |
| 645 | words = '无' | 670 | words = '无' |
| 646 | boxes = np.array(boxes).reshape((-1, 2)) | 671 | boxes = np.array(boxes).reshape((-1, 2)) |
| 647 | position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])] | 672 | position = [min(boxes[:, 0]), min(boxes[:, 1]), |
| 673 | max(boxes[:, 0]), max(boxes[:, 1])] | ||
| 648 | signature_role_5['page_num'] = page_num | 674 | signature_role_5['page_num'] = page_num |
| 649 | signature_role_5['position'] = position | 675 | signature_role_5['position'] = position |
| 650 | signature_role_5['words'] = words | 676 | signature_role_5['words'] = words |
| ... | @@ -717,6 +743,7 @@ class Finder: | ... | @@ -717,6 +743,7 @@ class Finder: |
| 717 | name = self.item.copy() | 743 | name = self.item.copy() |
| 718 | id_num = self.item.copy() | 744 | id_num = self.item.copy() |
| 719 | representative = self.item.copy() | 745 | representative = self.item.copy() |
| 746 | |||
| 720 | # 以保证人3 的左上角为定位点 | 747 | # 以保证人3 的左上角为定位点 |
| 721 | anchor = None | 748 | anchor = None |
| 722 | for block in self.pdf_info[page_num]['blocks']: | 749 | for block in self.pdf_info[page_num]['blocks']: |
| ... | @@ -728,6 +755,7 @@ class Finder: | ... | @@ -728,6 +755,7 @@ class Finder: |
| 728 | # 找到角色姓名 | 755 | # 找到角色姓名 |
| 729 | if re.match('保证人3', text) is not None: | 756 | if re.match('保证人3', text) is not None: |
| 730 | anchor = [bbox[0], bbox[1]] | 757 | anchor = [bbox[0], bbox[1]] |
| 758 | |||
| 731 | if anchor is not None: | 759 | if anchor is not None: |
| 732 | for block in self.pdf_info[page_num]['blocks']: | 760 | for block in self.pdf_info[page_num]['blocks']: |
| 733 | if block['type'] != 0: | 761 | if block['type'] != 0: |
| ... | @@ -743,60 +771,52 @@ class Finder: | ... | @@ -743,60 +771,52 @@ class Finder: |
| 743 | name['position'] = bbox | 771 | name['position'] = bbox |
| 744 | if role_key == '承租人:': | 772 | if role_key == '承租人:': |
| 745 | # 找到证件号码且确定位置 | 773 | # 找到证件号码且确定位置 |
| 746 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean( | 774 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]: |
| 747 | bbox[1::2]) < anchor[1]: | ||
| 748 | words = text.split(':')[-1] | 775 | words = text.split(':')[-1] |
| 749 | id_num['words'] = words | 776 | id_num['words'] = words |
| 750 | id_num['page'] = page_num | 777 | id_num['page'] = page_num |
| 751 | id_num['position'] = bbox | 778 | id_num['position'] = bbox |
| 752 | # 找到法人代表且确定位置 | 779 | # 找到法人代表且确定位置 |
| 753 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean( | 780 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]: |
| 754 | bbox[1::2]) < anchor[1]: | ||
| 755 | words = text.split(':')[-1] | 781 | words = text.split(':')[-1] |
| 756 | representative['words'] = words | 782 | representative['words'] = words |
| 757 | representative['page'] = page_num | 783 | representative['page'] = page_num |
| 758 | representative['position'] = bbox | 784 | representative['position'] = bbox |
| 759 | if role_key == '保证人1:': | 785 | if role_key == '保证人1:': |
| 760 | # 找到证件号码且确定位置 | 786 | # 找到证件号码且确定位置 |
| 761 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean( | 787 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]: |
| 762 | bbox[1::2]) > anchor[1]: | ||
| 763 | words = text.split(':')[-1] | 788 | words = text.split(':')[-1] |
| 764 | id_num['words'] = words | 789 | id_num['words'] = words |
| 765 | id_num['page'] = page_num | 790 | id_num['page'] = page_num |
| 766 | id_num['position'] = bbox | 791 | id_num['position'] = bbox |
| 767 | # 找到法人代表且确定位置 | 792 | # 找到法人代表且确定位置 |
| 768 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean( | 793 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]: |
| 769 | bbox[1::2]) > anchor[1]: | ||
| 770 | words = text.split(':')[-1] | 794 | words = text.split(':')[-1] |
| 771 | representative['words'] = words | 795 | representative['words'] = words |
| 772 | representative['page'] = page_num | 796 | representative['page'] = page_num |
| 773 | representative['position'] = bbox | 797 | representative['position'] = bbox |
| 774 | if role_key == '保证人2:': | 798 | if role_key == '保证人2:': |
| 775 | # 找到证件号码且确定位置 | 799 | # 找到证件号码且确定位置 |
| 776 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean( | 800 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]: |
| 777 | bbox[1::2]) < anchor[1]: | ||
| 778 | words = text.split(':')[-1] | 801 | words = text.split(':')[-1] |
| 779 | id_num['words'] = words | 802 | id_num['words'] = words |
| 780 | id_num['page'] = page_num | 803 | id_num['page'] = page_num |
| 781 | id_num['position'] = bbox | 804 | id_num['position'] = bbox |
| 782 | # 找到法人代表且确定位置 | 805 | # 找到法人代表且确定位置 |
| 783 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean( | 806 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]: |
| 784 | bbox[1::2]) < anchor[1]: | ||
| 785 | words = text.split(':')[-1] | 807 | words = text.split(':')[-1] |
| 786 | representative['words'] = words | 808 | representative['words'] = words |
| 787 | representative['page'] = page_num | 809 | representative['page'] = page_num |
| 788 | representative['position'] = bbox | 810 | representative['position'] = bbox |
| 789 | if role_key == '保证人3:': | 811 | if role_key == '保证人3:': |
| 790 | # 找到证件号码且确定位置 | 812 | # 找到证件号码且确定位置 |
| 791 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean( | 813 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]: |
| 792 | bbox[1::2]) > anchor[1]: | ||
| 793 | words = text.split(':')[-1] | 814 | words = text.split(':')[-1] |
| 794 | id_num['words'] = words | 815 | id_num['words'] = words |
| 795 | id_num['page'] = page_num | 816 | id_num['page'] = page_num |
| 796 | id_num['position'] = bbox | 817 | id_num['position'] = bbox |
| 797 | # 找到法人代表且确定位置 | 818 | # 找到法人代表且确定位置 |
| 798 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean( | 819 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]: |
| 799 | bbox[1::2]) > anchor[1]: | ||
| 800 | words = text.split(':')[-1] | 820 | words = text.split(':')[-1] |
| 801 | representative['words'] = words | 821 | representative['words'] = words |
| 802 | representative['page'] = page_num | 822 | representative['page'] = page_num |
| ... | @@ -805,6 +825,7 @@ class Finder: | ... | @@ -805,6 +825,7 @@ class Finder: |
| 805 | 825 | ||
| 806 | def get_table_add_product(self): | 826 | def get_table_add_product(self): |
| 807 | table_add_product = self.item.copy() | 827 | table_add_product = self.item.copy() |
| 828 | |||
| 808 | add_product_page_num = None | 829 | add_product_page_num = None |
| 809 | for pno in self.pdf_info: | 830 | for pno in self.pdf_info: |
| 810 | for block in self.pdf_info[f'{pno}']['blocks']: | 831 | for block in self.pdf_info[f'{pno}']['blocks']: |
| ... | @@ -825,11 +846,14 @@ class Finder: | ... | @@ -825,11 +846,14 @@ class Finder: |
| 825 | xmin, ymin, xmax, ymax = bbox | 846 | xmin, ymin, xmax, ymax = bbox |
| 826 | bbox = [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax] | 847 | bbox = [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax] |
| 827 | ocr_results.append([bbox, text]) | 848 | ocr_results.append([bbox, text]) |
| 849 | |||
| 828 | lines = [['项目', '购买价格', '实际融资金额']] | 850 | lines = [['项目', '购买价格', '实际融资金额']] |
| 851 | |||
| 829 | key_xm = None | 852 | key_xm = None |
| 830 | key_gmjg = None | 853 | key_gmjg = None |
| 831 | key_sjrzje = None | 854 | key_sjrzje = None |
| 832 | key_total = None | 855 | key_total = None |
| 856 | |||
| 833 | for index, span in enumerate(ocr_results): | 857 | for index, span in enumerate(ocr_results): |
| 834 | if span[1] == '项目': | 858 | if span[1] == '项目': |
| 835 | key_xm = index | 859 | key_xm = index |
| ... | @@ -839,22 +863,29 @@ class Finder: | ... | @@ -839,22 +863,29 @@ class Finder: |
| 839 | key_sjrzje = index | 863 | key_sjrzje = index |
| 840 | if span[1] == '总计': | 864 | if span[1] == '总计': |
| 841 | key_total = index | 865 | key_total = index |
| 866 | |||
| 842 | bbox, text = ocr_results[key_xm] | 867 | bbox, text = ocr_results[key_xm] |
| 843 | rh = abs(bbox[1] - bbox[-1]) | 868 | rh = abs(bbox[1]-bbox[-1]) |
| 844 | anchor = np.array(bbox).reshape((-1, 2)) | 869 | anchor = np.array(bbox).reshape((-1, 2)) |
| 845 | anchor[:, 0] += 2 * rh | 870 | anchor[:, 0] += 2*rh |
| 846 | anchor[:, 1] += rh | 871 | anchor[:, 1] += rh |
| 872 | |||
| 847 | for i in range(5): | 873 | for i in range(5): |
| 848 | for span in ocr_results: | 874 | for span in ocr_results: |
| 849 | iou = caculate_iou(anchor, span[0]) | 875 | iou = caculate_iou(anchor, span[0]) |
| 850 | if iou > 0.01 and span[1].strip() != '所购': | 876 | if iou > 0.01 and span[1].strip() != '所购': |
| 851 | x = get_table_info(span[0], ocr_results[key_gmjg][0], ocr_results) | 877 | x = get_table_info( |
| 852 | y = get_table_info(span[0], ocr_results[key_sjrzje][0], ocr_results) | 878 | span[0], ocr_results[key_gmjg][0], ocr_results) |
| 879 | y = get_table_info( | ||
| 880 | span[0], ocr_results[key_sjrzje][0], ocr_results) | ||
| 853 | line = [span[1].replace('\u3000', ' '), x, y] | 881 | line = [span[1].replace('\u3000', ' '), x, y] |
| 882 | # print(line) | ||
| 854 | lines.append(line) | 883 | lines.append(line) |
| 855 | anchor = np.array(span[0]).reshape((-1, 2)) | 884 | anchor = np.array(span[0]).reshape((-1, 2)) |
| 856 | anchor[:, 1] += rh | 885 | anchor[:, 1] += rh |
| 857 | total = get_table_info(ocr_results[key_total][0], ocr_results[key_sjrzje][0], ocr_results) | 886 | |
| 887 | total = get_table_info( | ||
| 888 | ocr_results[key_total][0], ocr_results[key_sjrzje][0], ocr_results) | ||
| 858 | lines.append(['总计', '', total]) | 889 | lines.append(['总计', '', total]) |
| 859 | 890 | ||
| 860 | # 所购 BMW悦然焕 | 891 | # 所购 BMW悦然焕 |
| ... | @@ -880,7 +911,6 @@ class Finder: | ... | @@ -880,7 +911,6 @@ class Finder: |
| 880 | if '事故维修补' in line[0]: | 911 | if '事故维修补' in line[0]: |
| 881 | line[0] = '所购 事故维修补偿方案' | 912 | line[0] = '所购 事故维修补偿方案' |
| 882 | filtered_lines.append(line) | 913 | filtered_lines.append(line) |
| 883 | |||
| 884 | table_add_product['words'] = filtered_lines | 914 | table_add_product['words'] = filtered_lines |
| 885 | table_add_product['page'] = add_product_page_num | 915 | table_add_product['page'] = add_product_page_num |
| 886 | table_add_product['position'] = None | 916 | table_add_product['position'] = None |
| ... | @@ -889,6 +919,7 @@ class Finder: | ... | @@ -889,6 +919,7 @@ class Finder: |
| 889 | def get_contract_no_dy(self): | 919 | def get_contract_no_dy(self): |
| 890 | # 查找抵押合同编号 | 920 | # 查找抵押合同编号 |
| 891 | contract_no = self.item.copy() | 921 | contract_no = self.item.copy() |
| 922 | |||
| 892 | key_box = None | 923 | key_box = None |
| 893 | for pno in self.pdf_info: | 924 | for pno in self.pdf_info: |
| 894 | for block in self.pdf_info[pno]['blocks']: | 925 | for block in self.pdf_info[pno]['blocks']: |
| ... | @@ -899,6 +930,7 @@ class Finder: | ... | @@ -899,6 +930,7 @@ class Finder: |
| 899 | bbox, text = span['bbox'], span['text'] | 930 | bbox, text = span['bbox'], span['text'] |
| 900 | if '抵押合同编号' in text: | 931 | if '抵押合同编号' in text: |
| 901 | key_box = bbox | 932 | key_box = bbox |
| 933 | |||
| 902 | if key_box is not None: | 934 | if key_box is not None: |
| 903 | for pno in self.pdf_info: | 935 | for pno in self.pdf_info: |
| 904 | for block in self.pdf_info[pno]['blocks']: | 936 | for block in self.pdf_info[pno]['blocks']: |
| ... | @@ -916,6 +948,7 @@ class Finder: | ... | @@ -916,6 +948,7 @@ class Finder: |
| 916 | def get_dyr_name_id(self): | 948 | def get_dyr_name_id(self): |
| 917 | name = self.item.copy() | 949 | name = self.item.copy() |
| 918 | _id = self.item.copy() | 950 | _id = self.item.copy() |
| 951 | |||
| 919 | key_box = None | 952 | key_box = None |
| 920 | for pno in self.pdf_info: | 953 | for pno in self.pdf_info: |
| 921 | for block in self.pdf_info[pno]['blocks']: | 954 | for block in self.pdf_info[pno]['blocks']: |
| ... | @@ -928,7 +961,7 @@ class Finder: | ... | @@ -928,7 +961,7 @@ class Finder: |
| 928 | key_box = bbox | 961 | key_box = bbox |
| 929 | 962 | ||
| 930 | if key_box is not None: | 963 | if key_box is not None: |
| 931 | rh = abs(key_box[1] - key_box[3]) | 964 | rh = abs(key_box[1]-key_box[3]) |
| 932 | for pno in self.pdf_info: | 965 | for pno in self.pdf_info: |
| 933 | for block in self.pdf_info[pno]['blocks']: | 966 | for block in self.pdf_info[pno]['blocks']: |
| 934 | if block['type'] != 0: | 967 | if block['type'] != 0: |
| ... | @@ -936,12 +969,12 @@ class Finder: | ... | @@ -936,12 +969,12 @@ class Finder: |
| 936 | for line in block['lines']: | 969 | for line in block['lines']: |
| 937 | for span in line['spans']: | 970 | for span in line['spans']: |
| 938 | bbox, text = span['bbox'], span['text'] | 971 | bbox, text = span['bbox'], span['text'] |
| 939 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3] + rh * 3 and '姓名' in text: | 972 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '姓名' in text: |
| 940 | words = text.split(':')[-1] | 973 | words = text.split(':')[-1] |
| 941 | name['position'] = bbox | 974 | name['position'] = bbox |
| 942 | name['page'] = pno | 975 | name['page'] = pno |
| 943 | name['words'] = words | 976 | name['words'] = words |
| 944 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3] + rh * 3 and '证件号码' in text: | 977 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '证件号码' in text: |
| 945 | words = text.split(':')[-1] | 978 | words = text.split(':')[-1] |
| 946 | _id['position'] = bbox | 979 | _id['position'] = bbox |
| 947 | _id['page'] = pno | 980 | _id['page'] = pno |
| ... | @@ -951,6 +984,7 @@ class Finder: | ... | @@ -951,6 +984,7 @@ class Finder: |
| 951 | def get_dyrpo_name_id(self): | 984 | def get_dyrpo_name_id(self): |
| 952 | name = self.item.copy() | 985 | name = self.item.copy() |
| 953 | _id = self.item.copy() | 986 | _id = self.item.copy() |
| 987 | |||
| 954 | key_box = None | 988 | key_box = None |
| 955 | for pno in self.pdf_info: | 989 | for pno in self.pdf_info: |
| 956 | for block in self.pdf_info[pno]['blocks']: | 990 | for block in self.pdf_info[pno]['blocks']: |
| ... | @@ -961,8 +995,9 @@ class Finder: | ... | @@ -961,8 +995,9 @@ class Finder: |
| 961 | bbox, text = span['bbox'], span['text'] | 995 | bbox, text = span['bbox'], span['text'] |
| 962 | if text == '抵押人配偶(如适': | 996 | if text == '抵押人配偶(如适': |
| 963 | key_box = bbox | 997 | key_box = bbox |
| 998 | |||
| 964 | if key_box is not None: | 999 | if key_box is not None: |
| 965 | rh = abs(key_box[1] - key_box[3]) | 1000 | rh = abs(key_box[1]-key_box[3]) |
| 966 | for pno in self.pdf_info: | 1001 | for pno in self.pdf_info: |
| 967 | for block in self.pdf_info[pno]['blocks']: | 1002 | for block in self.pdf_info[pno]['blocks']: |
| 968 | if block['type'] != 0: | 1003 | if block['type'] != 0: |
| ... | @@ -970,13 +1005,13 @@ class Finder: | ... | @@ -970,13 +1005,13 @@ class Finder: |
| 970 | for line in block['lines']: | 1005 | for line in block['lines']: |
| 971 | for span in line['spans']: | 1006 | for span in line['spans']: |
| 972 | bbox, text = span['bbox'], span['text'] | 1007 | bbox, text = span['bbox'], span['text'] |
| 973 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3] + rh * 3 and '姓名' in text: | 1008 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '姓名' in text: |
| 974 | words = text.split(':')[-1] | 1009 | words = text.split(':')[-1] |
| 975 | name['position'] = bbox | 1010 | name['position'] = bbox |
| 976 | name['page'] = pno | 1011 | name['page'] = pno |
| 977 | name['words'] = words | 1012 | name['words'] = words |
| 978 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3] + rh * 3 and '证件号码' in text: | 1013 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '证件号码' in text: |
| 979 | words = text.split(':')[-1] | 1014 | words = text.split(':')[-1].strip() |
| 980 | _id['position'] = bbox | 1015 | _id['position'] = bbox |
| 981 | _id['page'] = pno | 1016 | _id['page'] = pno |
| 982 | _id['words'] = words | 1017 | _id['words'] = words |
| ... | @@ -984,6 +1019,7 @@ class Finder: | ... | @@ -984,6 +1019,7 @@ class Finder: |
| 984 | 1019 | ||
| 985 | def get_key_value_position(self, key): | 1020 | def get_key_value_position(self, key): |
| 986 | value = self.item.copy() | 1021 | value = self.item.copy() |
| 1022 | |||
| 987 | key_box = None | 1023 | key_box = None |
| 988 | for pno in self.pdf_info: | 1024 | for pno in self.pdf_info: |
| 989 | for block in self.pdf_info[pno]['blocks']: | 1025 | for block in self.pdf_info[pno]['blocks']: |
| ... | @@ -994,8 +1030,9 @@ class Finder: | ... | @@ -994,8 +1030,9 @@ class Finder: |
| 994 | bbox, text = span['bbox'], span['text'] | 1030 | bbox, text = span['bbox'], span['text'] |
| 995 | if text == key: | 1031 | if text == key: |
| 996 | key_box = bbox | 1032 | key_box = bbox |
| 1033 | |||
| 997 | if key_box is not None: | 1034 | if key_box is not None: |
| 998 | rh = abs(key_box[1] - key_box[3]) | 1035 | rh = abs(key_box[1]-key_box[3]) |
| 999 | for pno in self.pdf_info: | 1036 | for pno in self.pdf_info: |
| 1000 | for block in self.pdf_info[pno]['blocks']: | 1037 | for block in self.pdf_info[pno]['blocks']: |
| 1001 | if block['type'] != 0: | 1038 | if block['type'] != 0: |
| ... | @@ -1003,9 +1040,8 @@ class Finder: | ... | @@ -1003,9 +1040,8 @@ class Finder: |
| 1003 | for line in block['lines']: | 1040 | for line in block['lines']: |
| 1004 | for span in line['spans']: | 1041 | for span in line['spans']: |
| 1005 | bbox, text = span['bbox'], span['text'] | 1042 | bbox, text = span['bbox'], span['text'] |
| 1006 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3] and key_box[0] < bbox[0] and abs( | 1043 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3] and key_box[0] < bbox[0] and abs(key_box[2]-bbox[0]) < rh*10: |
| 1007 | key_box[2] - bbox[0]) < rh * 10: | 1044 | words = text.strip() |
| 1008 | words = text | ||
| 1009 | value['position'] = bbox | 1045 | value['position'] = bbox |
| 1010 | value['page'] = pno | 1046 | value['page'] = pno |
| 1011 | value['words'] = words | 1047 | value['words'] = words |
| ... | @@ -1015,6 +1051,7 @@ class Finder: | ... | @@ -1015,6 +1051,7 @@ class Finder: |
| 1015 | name = self.item.copy() | 1051 | name = self.item.copy() |
| 1016 | id_num = self.item.copy() | 1052 | id_num = self.item.copy() |
| 1017 | representative = self.item.copy() | 1053 | representative = self.item.copy() |
| 1054 | |||
| 1018 | # 以保证人2 的左上角为定位点 | 1055 | # 以保证人2 的左上角为定位点 |
| 1019 | anchor = None | 1056 | anchor = None |
| 1020 | for block in self.pdf_info[page_num]['blocks']: | 1057 | for block in self.pdf_info[page_num]['blocks']: |
| ... | @@ -1026,6 +1063,7 @@ class Finder: | ... | @@ -1026,6 +1063,7 @@ class Finder: |
| 1026 | # 找到角色姓名 | 1063 | # 找到角色姓名 |
| 1027 | if re.match('保证人2', text) is not None: | 1064 | if re.match('保证人2', text) is not None: |
| 1028 | anchor = [bbox[0], bbox[1]] | 1065 | anchor = [bbox[0], bbox[1]] |
| 1066 | |||
| 1029 | if anchor is not None: | 1067 | if anchor is not None: |
| 1030 | for block in self.pdf_info[page_num]['blocks']: | 1068 | for block in self.pdf_info[page_num]['blocks']: |
| 1031 | if block['type'] != 0: | 1069 | if block['type'] != 0: |
| ... | @@ -1041,60 +1079,52 @@ class Finder: | ... | @@ -1041,60 +1079,52 @@ class Finder: |
| 1041 | name['position'] = bbox | 1079 | name['position'] = bbox |
| 1042 | if role_key == '承租人一:': | 1080 | if role_key == '承租人一:': |
| 1043 | # 找到证件号码且确定位置 | 1081 | # 找到证件号码且确定位置 |
| 1044 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean( | 1082 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]: |
| 1045 | bbox[1::2]) < anchor[1]: | ||
| 1046 | words = text.split(':')[-1] | 1083 | words = text.split(':')[-1] |
| 1047 | id_num['words'] = words | 1084 | id_num['words'] = words |
| 1048 | id_num['page'] = page_num | 1085 | id_num['page'] = page_num |
| 1049 | id_num['position'] = bbox | 1086 | id_num['position'] = bbox |
| 1050 | # 找到法人代表且确定位置 | 1087 | # 找到法人代表且确定位置 |
| 1051 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean( | 1088 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]: |
| 1052 | bbox[1::2]) < anchor[1]: | ||
| 1053 | words = text.split(':')[-1] | 1089 | words = text.split(':')[-1] |
| 1054 | representative['words'] = words | 1090 | representative['words'] = words |
| 1055 | representative['page'] = page_num | 1091 | representative['page'] = page_num |
| 1056 | representative['position'] = bbox | 1092 | representative['position'] = bbox |
| 1057 | if role_key == '共同承租人:': | 1093 | if role_key == '共同承租人:': |
| 1058 | # 找到证件号码且确定位置 | 1094 | # 找到证件号码且确定位置 |
| 1059 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean( | 1095 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]: |
| 1060 | bbox[1::2]) > anchor[1]: | ||
| 1061 | words = text.split(':')[-1] | 1096 | words = text.split(':')[-1] |
| 1062 | id_num['words'] = words | 1097 | id_num['words'] = words |
| 1063 | id_num['page'] = page_num | 1098 | id_num['page'] = page_num |
| 1064 | id_num['position'] = bbox | 1099 | id_num['position'] = bbox |
| 1065 | # 找到法人代表且确定位置 | 1100 | # 找到法人代表且确定位置 |
| 1066 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean( | 1101 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]: |
| 1067 | bbox[1::2]) > anchor[1]: | ||
| 1068 | words = text.split(':')[-1] | 1102 | words = text.split(':')[-1] |
| 1069 | representative['words'] = words | 1103 | representative['words'] = words |
| 1070 | representative['page'] = page_num | 1104 | representative['page'] = page_num |
| 1071 | representative['position'] = bbox | 1105 | representative['position'] = bbox |
| 1072 | if role_key == '保证人1:': | 1106 | if role_key == '保证人1:': |
| 1073 | # 找到证件号码且确定位置 | 1107 | # 找到证件号码且确定位置 |
| 1074 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean( | 1108 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]: |
| 1075 | bbox[1::2]) < anchor[1]: | ||
| 1076 | words = text.split(':')[-1] | 1109 | words = text.split(':')[-1] |
| 1077 | id_num['words'] = words | 1110 | id_num['words'] = words |
| 1078 | id_num['page'] = page_num | 1111 | id_num['page'] = page_num |
| 1079 | id_num['position'] = bbox | 1112 | id_num['position'] = bbox |
| 1080 | # 找到法人代表且确定位置 | 1113 | # 找到法人代表且确定位置 |
| 1081 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean( | 1114 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]: |
| 1082 | bbox[1::2]) < anchor[1]: | ||
| 1083 | words = text.split(':')[-1] | 1115 | words = text.split(':')[-1] |
| 1084 | representative['words'] = words | 1116 | representative['words'] = words |
| 1085 | representative['page'] = page_num | 1117 | representative['page'] = page_num |
| 1086 | representative['position'] = bbox | 1118 | representative['position'] = bbox |
| 1087 | if role_key == '保证人2:': | 1119 | if role_key == '保证人2:': |
| 1088 | # 找到证件号码且确定位置 | 1120 | # 找到证件号码且确定位置 |
| 1089 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean( | 1121 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]: |
| 1090 | bbox[1::2]) > anchor[1]: | ||
| 1091 | words = text.split(':')[-1] | 1122 | words = text.split(':')[-1] |
| 1092 | id_num['words'] = words | 1123 | id_num['words'] = words |
| 1093 | id_num['page'] = page_num | 1124 | id_num['page'] = page_num |
| 1094 | id_num['position'] = bbox | 1125 | id_num['position'] = bbox |
| 1095 | # 找到法人代表且确定位置 | 1126 | # 找到法人代表且确定位置 |
| 1096 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean( | 1127 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]: |
| 1097 | bbox[1::2]) > anchor[1]: | ||
| 1098 | words = text.split(':')[-1] | 1128 | words = text.split(':')[-1] |
| 1099 | representative['words'] = words | 1129 | representative['words'] = words |
| 1100 | representative['page'] = page_num | 1130 | representative['page'] = page_num |
| ... | @@ -1137,6 +1167,7 @@ class Finder: | ... | @@ -1137,6 +1167,7 @@ class Finder: |
| 1137 | # 取 Page 1 上的合同编号 | 1167 | # 取 Page 1 上的合同编号 |
| 1138 | contract_no = self.get_contract_no(page_num='0') | 1168 | contract_no = self.get_contract_no(page_num='0') |
| 1139 | self.init_result['合同编号'] = contract_no | 1169 | self.init_result['合同编号'] = contract_no |
| 1170 | |||
| 1140 | # 粗略判断是否是 ‘车贷分离版本’ 的合同 | 1171 | # 粗略判断是否是 ‘车贷分离版本’ 的合同 |
| 1141 | is_cdfl = False | 1172 | is_cdfl = False |
| 1142 | for block in self.pdf_info['0']['blocks']: | 1173 | for block in self.pdf_info['0']['blocks']: |
| ... | @@ -1147,60 +1178,81 @@ class Finder: | ... | @@ -1147,60 +1178,81 @@ class Finder: |
| 1147 | bbox, text = span['bbox'], span['text'] | 1178 | bbox, text = span['bbox'], span['text'] |
| 1148 | if '共同承租人:' in text: | 1179 | if '共同承租人:' in text: |
| 1149 | is_cdfl = True | 1180 | is_cdfl = True |
| 1181 | |||
| 1150 | if is_cdfl == False: | 1182 | if is_cdfl == False: |
| 1151 | # 从第一页上取四个角色的姓名和证件号码 | 1183 | # 从第一页上取四个角色的姓名和证件号码 |
| 1152 | name, id_num, representative = self.get_role_info(role_key='承租人:', page_num='0') | 1184 | name, id_num, representative = self.get_role_info( |
| 1185 | role_key='承租人:', page_num='0') | ||
| 1186 | |||
| 1153 | if name["words"] == None: | 1187 | if name["words"] == None: |
| 1154 | name, id_num, representative = self.get_role_info_3_3(role_key='承租人一:', page_num='0') | 1188 | name, id_num, representative = self.get_role_info_3_3( |
| 1189 | role_key='承租人一:', page_num='0') | ||
| 1155 | self.init_result['承租人-姓名'] = name | 1190 | self.init_result['承租人-姓名'] = name |
| 1156 | self.init_result['承租人-证件号码'] = id_num | 1191 | self.init_result['承租人-证件号码'] = id_num |
| 1157 | self.init_result['承租人-法定代表人或授权代表'] = representative | 1192 | self.init_result['承租人-法定代表人或授权代表'] = representative |
| 1158 | name, id_num, representative = self.get_role_info(role_key='保证人1:', page_num='0') | 1193 | |
| 1194 | name, id_num, representative = self.get_role_info( | ||
| 1195 | role_key='保证人1:', page_num='0') | ||
| 1159 | self.init_result['保证人1-姓名'] = name | 1196 | self.init_result['保证人1-姓名'] = name |
| 1160 | self.init_result['保证人1-证件号码'] = id_num | 1197 | self.init_result['保证人1-证件号码'] = id_num |
| 1161 | self.init_result['保证人1-法定代表人或授权代表'] = representative | 1198 | self.init_result['保证人1-法定代表人或授权代表'] = representative |
| 1162 | # if条件判别 对应3_3版本 | 1199 | # if条件判别 对应3_3版本 |
| 1163 | if name["words"] == None: | 1200 | if name["words"] == None: |
| 1164 | name, id_num, representative = self.get_role_info_3_3(role_key='共同承租人:', page_num='0') | 1201 | name, id_num, representative = self.get_role_info_3_3( |
| 1202 | role_key='共同承租人:', page_num='0') | ||
| 1165 | self.init_result['共同承租人-姓名'] = name | 1203 | self.init_result['共同承租人-姓名'] = name |
| 1166 | self.init_result['共同承租人-证件号码'] = id_num | 1204 | self.init_result['共同承租人-证件号码'] = id_num |
| 1167 | self.init_result['共同承租人-法定代表人或授权代表'] = representative | 1205 | self.init_result['共同承租人-法定代表人或授权代表'] = representative |
| 1168 | name, id_num, representative = self.get_role_info(role_key='保证人2:', page_num='0') | 1206 | |
| 1207 | name, id_num, representative = self.get_role_info( | ||
| 1208 | role_key='保证人2:', page_num='0') | ||
| 1169 | self.init_result['保证人2-姓名'] = name | 1209 | self.init_result['保证人2-姓名'] = name |
| 1170 | self.init_result['保证人2-证件号码'] = id_num | 1210 | self.init_result['保证人2-证件号码'] = id_num |
| 1171 | self.init_result['保证人2-法定代表人或授权代表'] = representative | 1211 | self.init_result['保证人2-法定代表人或授权代表'] = representative |
| 1172 | # if条件判别 对应3_3版本 | 1212 | # if条件判别 对应3_3版本 |
| 1173 | if name["words"] == None: | 1213 | if name["words"] == None: |
| 1174 | name, id_num, representative = self.get_role_info_3_3(role_key='保证人1:', page_num='0') | 1214 | name, id_num, representative = self.get_role_info_3_3( |
| 1215 | role_key='保证人1:', page_num='0') | ||
| 1175 | self.init_result['保证人2-姓名'] = name | 1216 | self.init_result['保证人2-姓名'] = name |
| 1176 | self.init_result['保证人2-证件号码'] = id_num | 1217 | self.init_result['保证人2-证件号码'] = id_num |
| 1177 | self.init_result['保证人2-法定代表人或授权代表'] = representative | 1218 | self.init_result['保证人2-法定代表人或授权代表'] = representative |
| 1178 | name, id_num, representative = self.get_role_info(role_key='保证人3:', page_num='0') | 1219 | |
| 1220 | name, id_num, representative = self.get_role_info( | ||
| 1221 | role_key='保证人3:', page_num='0') | ||
| 1179 | self.init_result['保证人3-姓名'] = name | 1222 | self.init_result['保证人3-姓名'] = name |
| 1180 | self.init_result['保证人3-证件号码'] = id_num | 1223 | self.init_result['保证人3-证件号码'] = id_num |
| 1181 | self.init_result['保证人3-法定代表人或授权代表'] = representative | 1224 | self.init_result['保证人3-法定代表人或授权代表'] = representative |
| 1182 | if name["words"] == None: | 1225 | if name["words"] == None: |
| 1183 | name, id_num, representative = self.get_role_info_3_3(role_key='保证人2:', page_num='0') | 1226 | name, id_num, representative = self.get_role_info_3_3( |
| 1227 | role_key='保证人2:', page_num='0') | ||
| 1184 | self.init_result['保证人3-姓名'] = name | 1228 | self.init_result['保证人3-姓名'] = name |
| 1185 | self.init_result['保证人3-证件号码'] = id_num | 1229 | self.init_result['保证人3-证件号码'] = id_num |
| 1186 | self.init_result['保证人3-法定代表人或授权代表'] = representative | 1230 | self.init_result['保证人3-法定代表人或授权代表'] = representative |
| 1187 | else: | 1231 | else: |
| 1188 | name, id_num, representative = self.get_role_info_3_3(role_key='承租人一:', page_num='0') | 1232 | name, id_num, representative = self.get_role_info_3_3( |
| 1233 | role_key='承租人一:', page_num='0') | ||
| 1189 | self.init_result['承租人-姓名'] = name | 1234 | self.init_result['承租人-姓名'] = name |
| 1190 | self.init_result['承租人-证件号码'] = id_num | 1235 | self.init_result['承租人-证件号码'] = id_num |
| 1191 | self.init_result['承租人-法定代表人或授权代表'] = representative | 1236 | self.init_result['承租人-法定代表人或授权代表'] = representative |
| 1192 | name, id_num, representative = self.get_role_info_3_3(role_key='共同承租人:', page_num='0') | 1237 | |
| 1238 | name, id_num, representative = self.get_role_info_3_3( | ||
| 1239 | role_key='共同承租人:', page_num='0') | ||
| 1193 | self.init_result['共同承租人-姓名'] = name | 1240 | self.init_result['共同承租人-姓名'] = name |
| 1194 | self.init_result['共同承租人-证件号码'] = id_num | 1241 | self.init_result['共同承租人-证件号码'] = id_num |
| 1195 | self.init_result['共同承租人-法定代表人或授权代表'] = representative | 1242 | self.init_result['共同承租人-法定代表人或授权代表'] = representative |
| 1196 | name, id_num, representative = self.get_role_info_3_3(role_key='保证人1:', page_num='0') | 1243 | |
| 1244 | name, id_num, representative = self.get_role_info_3_3( | ||
| 1245 | role_key='保证人1:', page_num='0') | ||
| 1197 | self.init_result['保证人1-姓名'] = name | 1246 | self.init_result['保证人1-姓名'] = name |
| 1198 | self.init_result['保证人1-证件号码'] = id_num | 1247 | self.init_result['保证人1-证件号码'] = id_num |
| 1199 | self.init_result['保证人1-法定代表人或授权代表'] = representative | 1248 | self.init_result['保证人1-法定代表人或授权代表'] = representative |
| 1200 | name, id_num, representative = self.get_role_info_3_3(role_key='保证人2:', page_num='0') | 1249 | |
| 1250 | name, id_num, representative = self.get_role_info_3_3( | ||
| 1251 | role_key='保证人2:', page_num='0') | ||
| 1201 | self.init_result['保证人2-姓名'] = name | 1252 | self.init_result['保证人2-姓名'] = name |
| 1202 | self.init_result['保证人2-证件号码'] = id_num | 1253 | self.init_result['保证人2-证件号码'] = id_num |
| 1203 | self.init_result['保证人2-法定代表人或授权代表'] = representative | 1254 | self.init_result['保证人2-法定代表人或授权代表'] = representative |
| 1255 | |||
| 1204 | # 在所有页面中找正文中(第二部分 融资租赁主要条款及付款计划)的那个编号,因为存在换行的情况所以暂时不带位置输出 | 1256 | # 在所有页面中找正文中(第二部分 融资租赁主要条款及付款计划)的那个编号,因为存在换行的情况所以暂时不带位置输出 |
| 1205 | contract_no = self.get_contract_no_one() | 1257 | contract_no = self.get_contract_no_one() |
| 1206 | self.init_result['合同编号(正文)'] = contract_no | 1258 | self.init_result['合同编号(正文)'] = contract_no |
| ... | @@ -1211,7 +1263,8 @@ class Finder: | ... | @@ -1211,7 +1263,8 @@ class Finder: |
| 1211 | seller = self.get_key_value(key='车辆卖方(经销商):') | 1263 | seller = self.get_key_value(key='车辆卖方(经销商):') |
| 1212 | self.init_result['车辆卖方(经销商)'] = seller | 1264 | self.init_result['车辆卖方(经销商)'] = seller |
| 1213 | # 找到 —— 车辆原始销售价格 | 1265 | # 找到 —— 车辆原始销售价格 |
| 1214 | vehicle_price = self.get_key_value(key='车辆原始销售价格(《机动车销售统一发票》所列金额):') | 1266 | vehicle_price = self.get_key_value( |
| 1267 | key='车辆原始销售价格(《机动车销售统一发票》所列金额):') | ||
| 1215 | self.init_result['车辆原始销售价格(《机动车销售统一发票》所列金额)'] = vehicle_price | 1268 | self.init_result['车辆原始销售价格(《机动车销售统一发票》所列金额)'] = vehicle_price |
| 1216 | # 找车辆附加产品明细(表) | 1269 | # 找车辆附加产品明细(表) |
| 1217 | table_add_product = self.get_table_add_product() | 1270 | table_add_product = self.get_table_add_product() |
| ... | @@ -1232,66 +1285,85 @@ class Finder: | ... | @@ -1232,66 +1285,85 @@ class Finder: |
| 1232 | self.init_result['银行账户-银行账号'] = account | 1285 | self.init_result['银行账户-银行账号'] = account |
| 1233 | bank = self.get_key_value(key='开户银行:') | 1286 | bank = self.get_key_value(key='开户银行:') |
| 1234 | self.init_result['银行账户-开户行'] = bank | 1287 | self.init_result['银行账户-开户行'] = bank |
| 1288 | |||
| 1235 | # 找签字页上的系列信息 | 1289 | # 找签字页上的系列信息 |
| 1236 | # 承租人姓名、签章 | 1290 | # 承租人姓名、签章 |
| 1237 | if is_cdfl == False: | 1291 | if is_cdfl == False: |
| 1238 | name = self.get_key_value(key='承租人姓名:') | 1292 | name = self.get_key_value(key='承租人姓名:') |
| 1239 | electronic_signature = self.get_electronic_signature(top='承租人姓名:', bottom='保证人1姓名:') | 1293 | electronic_signature = self.get_electronic_signature( |
| 1294 | top='承租人姓名:', bottom='保证人1姓名:') | ||
| 1295 | |||
| 1240 | if name["words"] == None: | 1296 | if name["words"] == None: |
| 1241 | name = self.get_key_value(key='承租人一姓名:') | 1297 | name = self.get_key_value(key='承租人一姓名:') |
| 1242 | electronic_signature = self.get_electronic_signature(top='承租人一姓名:', bottom='共同承租人名称:') | 1298 | electronic_signature = self.get_electronic_signature( |
| 1299 | top='承租人一姓名:', bottom='共同承租人名称:') | ||
| 1300 | |||
| 1243 | self.init_result['签字页-承租人姓名'] = name | 1301 | self.init_result['签字页-承租人姓名'] = name |
| 1244 | self.init_result['签字页-承租人签章'] = electronic_signature | 1302 | self.init_result['签字页-承租人签章'] = electronic_signature |
| 1245 | # 保证人1姓名、签章 | 1303 | # 保证人1姓名、签章 |
| 1246 | name = self.get_key_value(key='保证人1姓名:') | 1304 | name = self.get_key_value(key='保证人1姓名:') |
| 1247 | electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:') | 1305 | electronic_signature = self.get_electronic_signature( |
| 1306 | top='保证人1姓名:', bottom='保证人2姓名:') | ||
| 1248 | self.init_result['签字页-保证人1姓名'] = name | 1307 | self.init_result['签字页-保证人1姓名'] = name |
| 1249 | self.init_result['签字页-保证人1签章'] = electronic_signature | 1308 | self.init_result['签字页-保证人1签章'] = electronic_signature |
| 1250 | # 这里用的是 name["words"] == "" | 1309 | # 这里用的是 name["words"] == "" |
| 1251 | if name["words"] == "": | 1310 | if name["words"] == "": |
| 1252 | name = self.get_key_value(key='共同承租人名称:') | 1311 | name = self.get_key_value(key='共同承租人名称:') |
| 1253 | electronic_signature = self.get_electronic_signature(top='共同承租人名称:', bottom='保证人1姓名:') | 1312 | electronic_signature = self.get_electronic_signature( |
| 1313 | top='共同承租人名称:', bottom='保证人1姓名:') | ||
| 1254 | self.init_result['签字页-共同承租人姓名'] = name | 1314 | self.init_result['签字页-共同承租人姓名'] = name |
| 1255 | self.init_result['签字页-共同承租人签章'] = electronic_signature | 1315 | self.init_result['签字页-共同承租人签章'] = electronic_signature |
| 1256 | # 保证人2姓名、签章 | 1316 | # 保证人2姓名、签章 |
| 1257 | name = self.get_key_value(key='保证人2姓名:') | 1317 | name = self.get_key_value(key='保证人2姓名:') |
| 1258 | electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='保证人3姓名:') | 1318 | electronic_signature = self.get_electronic_signature( |
| 1319 | top='保证人2姓名:', bottom='保证人3姓名:') | ||
| 1259 | self.init_result['签字页-保证人2姓名'] = name | 1320 | self.init_result['签字页-保证人2姓名'] = name |
| 1260 | self.init_result['签字页-保证人2签章'] = electronic_signature | 1321 | self.init_result['签字页-保证人2签章'] = electronic_signature |
| 1261 | # if判断条件对应3_3版本 | 1322 | # if判断条件对应3_3版本 |
| 1262 | if name["words"] == "": | 1323 | if name["words"] == "": |
| 1263 | name = self.get_key_value(key='保证人1姓名:') | 1324 | name = self.get_key_value(key='保证人1姓名:') |
| 1264 | electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:') | 1325 | electronic_signature = self.get_electronic_signature( |
| 1326 | top='保证人1姓名:', bottom='保证人2姓名:') | ||
| 1265 | self.init_result['签字页-保证人1姓名'] = name | 1327 | self.init_result['签字页-保证人1姓名'] = name |
| 1266 | self.init_result['签字页-保证人1签章'] = electronic_signature | 1328 | self.init_result['签字页-保证人1签章'] = electronic_signature |
| 1267 | # 保证人3姓名、签章 | 1329 | # 保证人3姓名、签章 |
| 1268 | name = self.get_key_value(key='保证人3姓名:') | 1330 | name = self.get_key_value(key='保证人3姓名:') |
| 1269 | electronic_signature = self.get_electronic_signature(top='保证人3姓名:', bottom='日期:') | 1331 | electronic_signature = self.get_electronic_signature( |
| 1332 | top='保证人3姓名:', bottom='日期:') | ||
| 1270 | self.init_result['签字页-保证人3姓名'] = name | 1333 | self.init_result['签字页-保证人3姓名'] = name |
| 1271 | self.init_result['签字页-保证人3签章'] = electronic_signature | 1334 | self.init_result['签字页-保证人3签章'] = electronic_signature |
| 1272 | # if判断条件对应3_3版本 | 1335 | # if判断条件对应3_3版本 |
| 1273 | if name["words"] == None: | 1336 | if name["words"] == None: |
| 1274 | name = self.get_key_value(key='保证人2姓名:') | 1337 | name = self.get_key_value(key='保证人2姓名:') |
| 1275 | electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='日期:') | 1338 | electronic_signature = self.get_electronic_signature( |
| 1339 | top='保证人2姓名:', bottom='日期:') | ||
| 1276 | self.init_result['签字页-保证人2姓名'] = name | 1340 | self.init_result['签字页-保证人2姓名'] = name |
| 1277 | self.init_result['签字页-保证人2签章'] = electronic_signature | 1341 | self.init_result['签字页-保证人2签章'] = electronic_signature |
| 1278 | else: | 1342 | else: |
| 1279 | name = self.get_key_value(key='承租人一姓名:') | 1343 | name = self.get_key_value(key='承租人一姓名:') |
| 1280 | electronic_signature = self.get_electronic_signature(top='承租人一姓名:', bottom='共同承租人名称:') | 1344 | electronic_signature = self.get_electronic_signature( |
| 1345 | top='承租人一姓名:', bottom='共同承租人名称:') | ||
| 1281 | self.init_result['签字页-承租人姓名'] = name | 1346 | self.init_result['签字页-承租人姓名'] = name |
| 1282 | self.init_result['签字页-承租人签章'] = electronic_signature | 1347 | self.init_result['签字页-承租人签章'] = electronic_signature |
| 1348 | |||
| 1283 | name = self.get_key_value(key='共同承租人名称:') | 1349 | name = self.get_key_value(key='共同承租人名称:') |
| 1284 | electronic_signature = self.get_electronic_signature(top='共同承租人名称:', bottom='保证人1姓名:') | 1350 | electronic_signature = self.get_electronic_signature( |
| 1351 | top='共同承租人名称:', bottom='保证人1姓名:') | ||
| 1285 | self.init_result['签字页-共同承租人姓名'] = name | 1352 | self.init_result['签字页-共同承租人姓名'] = name |
| 1286 | self.init_result['签字页-共同承租人签章'] = electronic_signature | 1353 | self.init_result['签字页-共同承租人签章'] = electronic_signature |
| 1354 | |||
| 1287 | name = self.get_key_value(key='保证人1姓名:') | 1355 | name = self.get_key_value(key='保证人1姓名:') |
| 1288 | electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:') | 1356 | electronic_signature = self.get_electronic_signature( |
| 1357 | top='保证人1姓名:', bottom='保证人2姓名:') | ||
| 1289 | self.init_result['签字页-保证人1姓名'] = name | 1358 | self.init_result['签字页-保证人1姓名'] = name |
| 1290 | self.init_result['签字页-保证人1签章'] = electronic_signature | 1359 | self.init_result['签字页-保证人1签章'] = electronic_signature |
| 1360 | |||
| 1291 | name = self.get_key_value(key='保证人2姓名:') | 1361 | name = self.get_key_value(key='保证人2姓名:') |
| 1292 | electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='保证人3姓名:') | 1362 | electronic_signature = self.get_electronic_signature( |
| 1363 | top='保证人2姓名:', bottom='保证人3姓名:') | ||
| 1293 | self.init_result['签字页-保证人2姓名'] = name | 1364 | self.init_result['签字页-保证人2姓名'] = name |
| 1294 | self.init_result['签字页-保证人2签章'] = electronic_signature | 1365 | self.init_result['签字页-保证人2签章'] = electronic_signature |
| 1366 | |||
| 1295 | return self.init_result | 1367 | return self.init_result |
| 1296 | 1368 | ||
| 1297 | def get_info_1(self): | 1369 | def get_info_1(self): |
| ... | @@ -1307,7 +1379,8 @@ class Finder: | ... | @@ -1307,7 +1379,8 @@ class Finder: |
| 1307 | # 销售经销商 | 1379 | # 销售经销商 |
| 1308 | seller = self.get_key_value(key='销售经销商:', page_num='0') | 1380 | seller = self.get_key_value(key='销售经销商:', page_num='0') |
| 1309 | if seller['words'] == "": | 1381 | if seller['words'] == "": |
| 1310 | seller = self.get_value_by_findall('销售经销商:', '地址:', page_num='0') | 1382 | seller = self.get_value_by_findall( |
| 1383 | '销售经销商:', '地址:', page_num='0') | ||
| 1311 | self.init_result_1['销售经销商'] = seller | 1384 | self.init_result_1['销售经销商'] = seller |
| 1312 | # 合同编号(正文) | 1385 | # 合同编号(正文) |
| 1313 | contract_no = self.get_contract_no_one() | 1386 | contract_no = self.get_contract_no_one() |
| ... | @@ -1325,7 +1398,8 @@ class Finder: | ... | @@ -1325,7 +1398,8 @@ class Finder: |
| 1325 | seller = self.get_key_value(key='销售经销商:') | 1398 | seller = self.get_key_value(key='销售经销商:') |
| 1326 | if seller['words'] == "": | 1399 | if seller['words'] == "": |
| 1327 | # 销售经销商:深圳市宝创汽车贸易有限公司南山分公司(请授权代表签字并请盖章) | 1400 | # 销售经销商:深圳市宝创汽车贸易有限公司南山分公司(请授权代表签字并请盖章) |
| 1328 | seller = self.get_value_by_findall('销售经销商:', '(请授权代表签字并请盖章)', page_num='3') | 1401 | seller = self.get_value_by_findall( |
| 1402 | '销售经销商:', '(请授权代表签字并请盖章)', page_num='3') | ||
| 1329 | self.init_result_1['签字页-销售经销商'] = seller | 1403 | self.init_result_1['签字页-销售经销商'] = seller |
| 1330 | # 经销商签章 | 1404 | # 经销商签章 |
| 1331 | pass | 1405 | pass |
| ... | @@ -1357,12 +1431,14 @@ class Finder: | ... | @@ -1357,12 +1431,14 @@ class Finder: |
| 1357 | self.init_result_2['融资租赁期限'] = lease_term | 1431 | self.init_result_2['融资租赁期限'] = lease_term |
| 1358 | # 签字页抵押人姓名和签章 | 1432 | # 签字页抵押人姓名和签章 |
| 1359 | name = self.get_key_value(key='抵押人姓名:') | 1433 | name = self.get_key_value(key='抵押人姓名:') |
| 1360 | electronic_signature = self.get_electronic_signature(top='抵押权人盖章', bottom='抵押人配偶姓名:') | 1434 | electronic_signature = self.get_electronic_signature( |
| 1435 | top='抵押权人盖章', bottom='抵押人配偶姓名:') | ||
| 1361 | self.init_result_2['签字页-抵押人姓名'] = name | 1436 | self.init_result_2['签字页-抵押人姓名'] = name |
| 1362 | self.init_result_2['签字页-抵押人签章'] = electronic_signature | 1437 | self.init_result_2['签字页-抵押人签章'] = electronic_signature |
| 1363 | # 签字页抵押人配偶姓名和签章 | 1438 | # 签字页抵押人配偶姓名和签章 |
| 1364 | name = self.get_key_value(key='抵押人配偶姓名:') | 1439 | name = self.get_key_value(key='抵押人配偶姓名:') |
| 1365 | electronic_signature = self.get_electronic_signature(top='抵押人配偶姓名:', bottom='日期') | 1440 | electronic_signature = self.get_electronic_signature( |
| 1441 | top='抵押人配偶姓名:', bottom='日期') | ||
| 1366 | self.init_result_2['签字页-抵押人配偶姓名'] = name | 1442 | self.init_result_2['签字页-抵押人配偶姓名'] = name |
| 1367 | self.init_result_2['签字页-抵押人配偶签章'] = electronic_signature | 1443 | self.init_result_2['签字页-抵押人配偶签章'] = electronic_signature |
| 1368 | return self.init_result_2 | 1444 | return self.init_result_2 | ... | ... |
-
Please register or sign in to post a comment