93c7cc0a by 周伟奇

KWOM_July

1 parent b10ff66a
...@@ -25,6 +25,15 @@ def extract_info(ocr_results): ...@@ -25,6 +25,15 @@ def extract_info(ocr_results):
25 25
26 26
27 def predict(pdf_info, is_qrs=False, is_fsm=False): 27 def predict(pdf_info, is_qrs=False, is_fsm=False):
28 pop_seceond_page_info = {}
29 if not is_fsm and not is_qrs and len(pdf_info) == 9:
30 pop_seceond_page_info = pdf_info.pop('1', {})
31 for pno in range(8):
32 if pno == 0:
33 pdf_info[str(pno)]['blocks'].extend(pop_seceond_page_info.get('blocks', []))
34 else:
35 pdf_info[str(pno)] = pdf_info.pop(str(pno+1))
36
28 ocr_results = {} 37 ocr_results = {}
29 for pno in pdf_info: 38 for pno in pdf_info:
30 ocr_results[pno] = {} 39 ocr_results[pno] = {}
......
...@@ -13,6 +13,7 @@ class Finder: ...@@ -13,6 +13,7 @@ class Finder:
13 self.item = {"words": None, 13 self.item = {"words": None,
14 "position": None, 14 "position": None,
15 } 15 }
16 self.cn_re = re.compile(u'[\u4e00-\u9fa5]')
16 17
17 def gen_init_result(self, is_asp): 18 def gen_init_result(self, is_asp):
18 # 格式化算法输出 19 # 格式化算法输出
...@@ -187,6 +188,11 @@ class Finder: ...@@ -187,6 +188,11 @@ class Finder:
187 vin['position'] = location 188 vin['position'] = location
188 return vin 189 return vin
189 190
191 def cn_char_filter(self, src_str):
192 cn_chars = re.findall(self.cn_re, src_str)
193 cn_str = ''.join(cn_chars)
194 return cn_str
195
190 def get_loan_principal(self, page_num='0'): 196 def get_loan_principal(self, page_num='0'):
191 chinese_keywords = ['壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖', '拾', 197 chinese_keywords = ['壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖', '拾',
192 '佰', '仟', '万', '亿', '元', '角', '分', '零', '整'] 198 '佰', '仟', '万', '亿', '元', '角', '分', '零', '整']
...@@ -201,7 +207,7 @@ class Finder: ...@@ -201,7 +207,7 @@ class Finder:
201 for line in block['lines']: 207 for line in block['lines']:
202 for span in line['spans']: 208 for span in line['spans']:
203 bbox, text = span['bbox'], span['text'] 209 bbox, text = span['bbox'], span['text']
204 if fuzz.ratio(''.join(chinese_keywords), text) > 15: 210 if fuzz.ratio(''.join(chinese_keywords), self.cn_char_filter(text)) >= 10:
205 text = text.split(':')[-1].strip() 211 text = text.split(':')[-1].strip()
206 upper['position'] = bbox 212 upper['position'] = bbox
207 upper['words'] = text 213 upper['words'] = text
......
...@@ -31,6 +31,7 @@ def get_table_info(bbox_1, bbox_2, ocr_result): ...@@ -31,6 +31,7 @@ def get_table_info(bbox_1, bbox_2, ocr_result):
31 31
32 32
33 class Finder: 33 class Finder:
34
34 def __init__(self, pdf_info): 35 def __init__(self, pdf_info):
35 self.pdf_info = pdf_info 36 self.pdf_info = pdf_info
36 self.item = {"words": None, 37 self.item = {"words": None,
...@@ -42,12 +43,15 @@ class Finder: ...@@ -42,12 +43,15 @@ class Finder:
42 "承租人-姓名": self.item, 43 "承租人-姓名": self.item,
43 "承租人-证件号码": self.item, 44 "承租人-证件号码": self.item,
44 "承租人-法定代表人或授权代表": self.item, 45 "承租人-法定代表人或授权代表": self.item,
46
45 "共同承租人-姓名": self.item, 47 "共同承租人-姓名": self.item,
46 "共同承租人-证件号码": self.item, 48 "共同承租人-证件号码": self.item,
47 "共同承租人-法定代表人或授权代表": self.item, 49 "共同承租人-法定代表人或授权代表": self.item,
50
48 "保证人1-姓名": self.item, 51 "保证人1-姓名": self.item,
49 "保证人1-证件号码": self.item, 52 "保证人1-证件号码": self.item,
50 "保证人1-法定代表人或授权代表": self.item, 53 "保证人1-法定代表人或授权代表": self.item,
54
51 "保证人2-姓名": self.item, 55 "保证人2-姓名": self.item,
52 "保证人2-证件号码": self.item, 56 "保证人2-证件号码": self.item,
53 "保证人2-法定代表人或授权代表": self.item, 57 "保证人2-法定代表人或授权代表": self.item,
...@@ -67,15 +71,19 @@ class Finder: ...@@ -67,15 +71,19 @@ class Finder:
67 "银行账户-开户行": self.item, 71 "银行账户-开户行": self.item,
68 "签字页-承租人姓名": self.item, 72 "签字页-承租人姓名": self.item,
69 "签字页-承租人签章": self.item, 73 "签字页-承租人签章": self.item,
74
70 "签字页-共同承租人姓名": self.item, 75 "签字页-共同承租人姓名": self.item,
71 "签字页-共同承租人签章": self.item, 76 "签字页-共同承租人签章": self.item,
77
72 "签字页-保证人1姓名": self.item, 78 "签字页-保证人1姓名": self.item,
73 "签字页-保证人1签章": self.item, 79 "签字页-保证人1签章": self.item,
80
74 "签字页-保证人2姓名": self.item, 81 "签字页-保证人2姓名": self.item,
75 "签字页-保证人2签章": self.item, 82 "签字页-保证人2签章": self.item,
76 "签字页-保证人3姓名": self.item, 83 "签字页-保证人3姓名": self.item,
77 "签字页-保证人3签章": self.item, 84 "签字页-保证人3签章": self.item,
78 } 85 }
86
79 # 格式化输出 车辆处置协议 要是别的字段 87 # 格式化输出 车辆处置协议 要是别的字段
80 self.init_result_1 = {"合同编号": self.item, 88 self.init_result_1 = {"合同编号": self.item,
81 "承租人-姓名": self.item, 89 "承租人-姓名": self.item,
...@@ -88,6 +96,7 @@ class Finder: ...@@ -88,6 +96,7 @@ class Finder:
88 "签字页-销售经销商": self.item, 96 "签字页-销售经销商": self.item,
89 "签字页-销售经销商签章": self.item, 97 "签字页-销售经销商签章": self.item,
90 } 98 }
99
91 # 格式化输出 车辆租赁抵押合同 100 # 格式化输出 车辆租赁抵押合同
92 self.init_result_2 = {"合同编号": self.item, 101 self.init_result_2 = {"合同编号": self.item,
93 "合同编号(正文)": self.item, 102 "合同编号(正文)": self.item,
...@@ -174,6 +183,7 @@ class Finder: ...@@ -174,6 +183,7 @@ class Finder:
174 # contract_no['words'] = words 183 # contract_no['words'] = words
175 contract_no['words'] = re.sub("\s", "", words).replace(")", "") 184 contract_no['words'] = re.sub("\s", "", words).replace(")", "")
176 return contract_no 185 return contract_no
186
177 matchObj = re.search(r'编号为(.*?)的', all_text) 187 matchObj = re.search(r'编号为(.*?)的', all_text)
178 if matchObj: 188 if matchObj:
179 words = matchObj.group(1).strip() 189 words = matchObj.group(1).strip()
...@@ -182,6 +192,7 @@ class Finder: ...@@ -182,6 +192,7 @@ class Finder:
182 # contract_no['words'] = words 192 # contract_no['words'] = words
183 contract_no['words'] = re.sub("\s", "", words).replace(")", "") 193 contract_no['words'] = re.sub("\s", "", words).replace(")", "")
184 return contract_no 194 return contract_no
195
185 matchObj = re.search(r'编号为(.*?))的', all_text) 196 matchObj = re.search(r'编号为(.*?))的', all_text)
186 if matchObj: 197 if matchObj:
187 words = matchObj.group(1).strip() 198 words = matchObj.group(1).strip()
...@@ -291,6 +302,7 @@ class Finder: ...@@ -291,6 +302,7 @@ class Finder:
291 302
292 def get_asp_details(self, page_num): 303 def get_asp_details(self, page_num):
293 asp_details_table_term = self.item.copy() 304 asp_details_table_term = self.item.copy()
305
294 asp_details_table = [] 306 asp_details_table = []
295 asp_details_text_list = [] 307 asp_details_text_list = []
296 table = False 308 table = False
...@@ -306,20 +318,25 @@ class Finder: ...@@ -306,20 +318,25 @@ class Finder:
306 table = False 318 table = False
307 if table == True: 319 if table == True:
308 asp_details_text_list.append(text) 320 asp_details_text_list.append(text)
309 for i in range((len(asp_details_text_list) + 2) // 3): 321
322 for i in range((len(asp_details_text_list)+2)//3):
323
310 line = [] 324 line = []
311 if i == 0: 325 if i == 0:
312 line = [asp_details_text_list[0]] 326 line = [asp_details_text_list[0]]
313 else: 327 else:
314 for j in range(3): 328 for j in range(3):
315 line.append(asp_details_text_list[i * 3 - 2 + j]) 329 line.append(asp_details_text_list[i*3-2+j])
330
316 asp_details_table.append(line) 331 asp_details_table.append(line)
332
317 if len(asp_details_table) > 0: 333 if len(asp_details_table) > 0:
318 asp_details_table_term['words'] = asp_details_table 334 asp_details_table_term['words'] = asp_details_table
319 return asp_details_table_term 335 return asp_details_table_term
320 336
321 def get_signature(self): 337 def get_signature(self):
322 signature = self.item.copy() 338 signature = self.item.copy()
339
323 for block in self.pdf_info['0']['blocks']: 340 for block in self.pdf_info['0']['blocks']:
324 if block['type'] != 0: 341 if block['type'] != 0:
325 continue 342 continue
...@@ -448,6 +465,7 @@ class Finder: ...@@ -448,6 +465,7 @@ class Finder:
448 465
449 def get_repayment_schedule(self): 466 def get_repayment_schedule(self):
450 repayment_schedule = self.item.copy() 467 repayment_schedule = self.item.copy()
468
451 repayment_schedule_text_list = [] 469 repayment_schedule_text_list = []
452 table = False 470 table = False
453 page = None 471 page = None
...@@ -475,6 +493,7 @@ class Finder: ...@@ -475,6 +493,7 @@ class Finder:
475 if not left < bbox[0] < right: 493 if not left < bbox[0] < right:
476 continue 494 continue
477 repayment_schedule_text_list.append(text) 495 repayment_schedule_text_list.append(text)
496
478 if text.strip() == "61.": 497 if text.strip() == "61.":
479 page = pno 498 page = pno
480 table = True 499 table = True
...@@ -482,14 +501,17 @@ class Finder: ...@@ -482,14 +501,17 @@ class Finder:
482 # print("repayment_schedule_text_list = ", repayment_schedule_text_list) 501 # print("repayment_schedule_text_list = ", repayment_schedule_text_list)
483 # repayment_schedule_table = [['序号', '融资租赁成本', '融资租赁费用', '租金', '剩余融资租赁成本']] 502 # repayment_schedule_table = [['序号', '融资租赁成本', '融资租赁费用', '租金', '剩余融资租赁成本']]
484 repayment_schedule_table = [['序号', '租金']] 503 repayment_schedule_table = [['序号', '租金']]
485 for i in range(len(repayment_schedule_text_list) // 4): 504 for i in range(len(repayment_schedule_text_list)//4):
486 line = [f'{i + 1}.'] 505 line = [f'{i+1}.']
487 # 4表示4列的意思 506 # 4表示4列的意思
488 for j in range(4): 507 for j in range(4):
489 line.append(repayment_schedule_text_list[i * 4 + j]) 508 line.append(repayment_schedule_text_list[i*4+j])
509
490 # 只保留序号和租金列 510 # 只保留序号和租金列
491 line = [line[0].replace('.', ''), line[3]] 511 line = [line[0].replace('.', ''), line[3]]
512
492 repayment_schedule_table.append(line) 513 repayment_schedule_table.append(line)
514
493 repayment_schedule['words'] = repayment_schedule_table 515 repayment_schedule['words'] = repayment_schedule_table
494 repayment_schedule['page'] = page 516 repayment_schedule['page'] = page
495 return repayment_schedule 517 return repayment_schedule
...@@ -538,7 +560,8 @@ class Finder: ...@@ -538,7 +560,8 @@ class Finder:
538 else: 560 else:
539 words = '无' 561 words = '无'
540 boxes = np.array(boxes).reshape((-1, 2)) 562 boxes = np.array(boxes).reshape((-1, 2))
541 position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])] 563 position = [min(boxes[:, 0]), min(boxes[:, 1]),
564 max(boxes[:, 0]), max(boxes[:, 1])]
542 signature_role_2['page_num'] = page_num 565 signature_role_2['page_num'] = page_num
543 signature_role_2['position'] = position 566 signature_role_2['position'] = position
544 signature_role_2['words'] = words 567 signature_role_2['words'] = words
...@@ -573,7 +596,8 @@ class Finder: ...@@ -573,7 +596,8 @@ class Finder:
573 else: 596 else:
574 words = '无' 597 words = '无'
575 boxes = np.array(boxes).reshape((-1, 2)) 598 boxes = np.array(boxes).reshape((-1, 2))
576 position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])] 599 position = [min(boxes[:, 0]), min(boxes[:, 1]),
600 max(boxes[:, 0]), max(boxes[:, 1])]
577 signature_role_3['page_num'] = page_num 601 signature_role_3['page_num'] = page_num
578 signature_role_3['position'] = position 602 signature_role_3['position'] = position
579 signature_role_3['words'] = words 603 signature_role_3['words'] = words
...@@ -608,7 +632,8 @@ class Finder: ...@@ -608,7 +632,8 @@ class Finder:
608 else: 632 else:
609 words = '无' 633 words = '无'
610 boxes = np.array(boxes).reshape((-1, 2)) 634 boxes = np.array(boxes).reshape((-1, 2))
611 position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])] 635 position = [min(boxes[:, 0]), min(boxes[:, 1]),
636 max(boxes[:, 0]), max(boxes[:, 1])]
612 signature_role_4['page_num'] = page_num 637 signature_role_4['page_num'] = page_num
613 signature_role_4['position'] = position 638 signature_role_4['position'] = position
614 signature_role_4['words'] = words 639 signature_role_4['words'] = words
...@@ -644,7 +669,8 @@ class Finder: ...@@ -644,7 +669,8 @@ class Finder:
644 else: 669 else:
645 words = '无' 670 words = '无'
646 boxes = np.array(boxes).reshape((-1, 2)) 671 boxes = np.array(boxes).reshape((-1, 2))
647 position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])] 672 position = [min(boxes[:, 0]), min(boxes[:, 1]),
673 max(boxes[:, 0]), max(boxes[:, 1])]
648 signature_role_5['page_num'] = page_num 674 signature_role_5['page_num'] = page_num
649 signature_role_5['position'] = position 675 signature_role_5['position'] = position
650 signature_role_5['words'] = words 676 signature_role_5['words'] = words
...@@ -717,6 +743,7 @@ class Finder: ...@@ -717,6 +743,7 @@ class Finder:
717 name = self.item.copy() 743 name = self.item.copy()
718 id_num = self.item.copy() 744 id_num = self.item.copy()
719 representative = self.item.copy() 745 representative = self.item.copy()
746
720 # 以保证人3 的左上角为定位点 747 # 以保证人3 的左上角为定位点
721 anchor = None 748 anchor = None
722 for block in self.pdf_info[page_num]['blocks']: 749 for block in self.pdf_info[page_num]['blocks']:
...@@ -728,6 +755,7 @@ class Finder: ...@@ -728,6 +755,7 @@ class Finder:
728 # 找到角色姓名 755 # 找到角色姓名
729 if re.match('保证人3', text) is not None: 756 if re.match('保证人3', text) is not None:
730 anchor = [bbox[0], bbox[1]] 757 anchor = [bbox[0], bbox[1]]
758
731 if anchor is not None: 759 if anchor is not None:
732 for block in self.pdf_info[page_num]['blocks']: 760 for block in self.pdf_info[page_num]['blocks']:
733 if block['type'] != 0: 761 if block['type'] != 0:
...@@ -743,60 +771,52 @@ class Finder: ...@@ -743,60 +771,52 @@ class Finder:
743 name['position'] = bbox 771 name['position'] = bbox
744 if role_key == '承租人:': 772 if role_key == '承租人:':
745 # 找到证件号码且确定位置 773 # 找到证件号码且确定位置
746 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean( 774 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
747 bbox[1::2]) < anchor[1]:
748 words = text.split(':')[-1] 775 words = text.split(':')[-1]
749 id_num['words'] = words 776 id_num['words'] = words
750 id_num['page'] = page_num 777 id_num['page'] = page_num
751 id_num['position'] = bbox 778 id_num['position'] = bbox
752 # 找到法人代表且确定位置 779 # 找到法人代表且确定位置
753 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean( 780 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
754 bbox[1::2]) < anchor[1]:
755 words = text.split(':')[-1] 781 words = text.split(':')[-1]
756 representative['words'] = words 782 representative['words'] = words
757 representative['page'] = page_num 783 representative['page'] = page_num
758 representative['position'] = bbox 784 representative['position'] = bbox
759 if role_key == '保证人1:': 785 if role_key == '保证人1:':
760 # 找到证件号码且确定位置 786 # 找到证件号码且确定位置
761 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean( 787 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
762 bbox[1::2]) > anchor[1]:
763 words = text.split(':')[-1] 788 words = text.split(':')[-1]
764 id_num['words'] = words 789 id_num['words'] = words
765 id_num['page'] = page_num 790 id_num['page'] = page_num
766 id_num['position'] = bbox 791 id_num['position'] = bbox
767 # 找到法人代表且确定位置 792 # 找到法人代表且确定位置
768 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean( 793 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
769 bbox[1::2]) > anchor[1]:
770 words = text.split(':')[-1] 794 words = text.split(':')[-1]
771 representative['words'] = words 795 representative['words'] = words
772 representative['page'] = page_num 796 representative['page'] = page_num
773 representative['position'] = bbox 797 representative['position'] = bbox
774 if role_key == '保证人2:': 798 if role_key == '保证人2:':
775 # 找到证件号码且确定位置 799 # 找到证件号码且确定位置
776 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean( 800 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
777 bbox[1::2]) < anchor[1]:
778 words = text.split(':')[-1] 801 words = text.split(':')[-1]
779 id_num['words'] = words 802 id_num['words'] = words
780 id_num['page'] = page_num 803 id_num['page'] = page_num
781 id_num['position'] = bbox 804 id_num['position'] = bbox
782 # 找到法人代表且确定位置 805 # 找到法人代表且确定位置
783 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean( 806 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
784 bbox[1::2]) < anchor[1]:
785 words = text.split(':')[-1] 807 words = text.split(':')[-1]
786 representative['words'] = words 808 representative['words'] = words
787 representative['page'] = page_num 809 representative['page'] = page_num
788 representative['position'] = bbox 810 representative['position'] = bbox
789 if role_key == '保证人3:': 811 if role_key == '保证人3:':
790 # 找到证件号码且确定位置 812 # 找到证件号码且确定位置
791 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean( 813 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
792 bbox[1::2]) > anchor[1]:
793 words = text.split(':')[-1] 814 words = text.split(':')[-1]
794 id_num['words'] = words 815 id_num['words'] = words
795 id_num['page'] = page_num 816 id_num['page'] = page_num
796 id_num['position'] = bbox 817 id_num['position'] = bbox
797 # 找到法人代表且确定位置 818 # 找到法人代表且确定位置
798 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean( 819 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
799 bbox[1::2]) > anchor[1]:
800 words = text.split(':')[-1] 820 words = text.split(':')[-1]
801 representative['words'] = words 821 representative['words'] = words
802 representative['page'] = page_num 822 representative['page'] = page_num
...@@ -805,6 +825,7 @@ class Finder: ...@@ -805,6 +825,7 @@ class Finder:
805 825
806 def get_table_add_product(self): 826 def get_table_add_product(self):
807 table_add_product = self.item.copy() 827 table_add_product = self.item.copy()
828
808 add_product_page_num = None 829 add_product_page_num = None
809 for pno in self.pdf_info: 830 for pno in self.pdf_info:
810 for block in self.pdf_info[f'{pno}']['blocks']: 831 for block in self.pdf_info[f'{pno}']['blocks']:
...@@ -825,11 +846,14 @@ class Finder: ...@@ -825,11 +846,14 @@ class Finder:
825 xmin, ymin, xmax, ymax = bbox 846 xmin, ymin, xmax, ymax = bbox
826 bbox = [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax] 847 bbox = [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax]
827 ocr_results.append([bbox, text]) 848 ocr_results.append([bbox, text])
849
828 lines = [['项目', '购买价格', '实际融资金额']] 850 lines = [['项目', '购买价格', '实际融资金额']]
851
829 key_xm = None 852 key_xm = None
830 key_gmjg = None 853 key_gmjg = None
831 key_sjrzje = None 854 key_sjrzje = None
832 key_total = None 855 key_total = None
856
833 for index, span in enumerate(ocr_results): 857 for index, span in enumerate(ocr_results):
834 if span[1] == '项目': 858 if span[1] == '项目':
835 key_xm = index 859 key_xm = index
...@@ -839,22 +863,29 @@ class Finder: ...@@ -839,22 +863,29 @@ class Finder:
839 key_sjrzje = index 863 key_sjrzje = index
840 if span[1] == '总计': 864 if span[1] == '总计':
841 key_total = index 865 key_total = index
866
842 bbox, text = ocr_results[key_xm] 867 bbox, text = ocr_results[key_xm]
843 rh = abs(bbox[1] - bbox[-1]) 868 rh = abs(bbox[1]-bbox[-1])
844 anchor = np.array(bbox).reshape((-1, 2)) 869 anchor = np.array(bbox).reshape((-1, 2))
845 anchor[:, 0] += 2 * rh 870 anchor[:, 0] += 2*rh
846 anchor[:, 1] += rh 871 anchor[:, 1] += rh
872
847 for i in range(5): 873 for i in range(5):
848 for span in ocr_results: 874 for span in ocr_results:
849 iou = caculate_iou(anchor, span[0]) 875 iou = caculate_iou(anchor, span[0])
850 if iou > 0.01 and span[1].strip() != '所购': 876 if iou > 0.01 and span[1].strip() != '所购':
851 x = get_table_info(span[0], ocr_results[key_gmjg][0], ocr_results) 877 x = get_table_info(
852 y = get_table_info(span[0], ocr_results[key_sjrzje][0], ocr_results) 878 span[0], ocr_results[key_gmjg][0], ocr_results)
879 y = get_table_info(
880 span[0], ocr_results[key_sjrzje][0], ocr_results)
853 line = [span[1].replace('\u3000', ' '), x, y] 881 line = [span[1].replace('\u3000', ' '), x, y]
882 # print(line)
854 lines.append(line) 883 lines.append(line)
855 anchor = np.array(span[0]).reshape((-1, 2)) 884 anchor = np.array(span[0]).reshape((-1, 2))
856 anchor[:, 1] += rh 885 anchor[:, 1] += rh
857 total = get_table_info(ocr_results[key_total][0], ocr_results[key_sjrzje][0], ocr_results) 886
887 total = get_table_info(
888 ocr_results[key_total][0], ocr_results[key_sjrzje][0], ocr_results)
858 lines.append(['总计', '', total]) 889 lines.append(['总计', '', total])
859 890
860 # 所购 BMW悦然焕 891 # 所购 BMW悦然焕
...@@ -880,7 +911,6 @@ class Finder: ...@@ -880,7 +911,6 @@ class Finder:
880 if '事故维修补' in line[0]: 911 if '事故维修补' in line[0]:
881 line[0] = '所购 事故维修补偿方案' 912 line[0] = '所购 事故维修补偿方案'
882 filtered_lines.append(line) 913 filtered_lines.append(line)
883
884 table_add_product['words'] = filtered_lines 914 table_add_product['words'] = filtered_lines
885 table_add_product['page'] = add_product_page_num 915 table_add_product['page'] = add_product_page_num
886 table_add_product['position'] = None 916 table_add_product['position'] = None
...@@ -889,6 +919,7 @@ class Finder: ...@@ -889,6 +919,7 @@ class Finder:
889 def get_contract_no_dy(self): 919 def get_contract_no_dy(self):
890 # 查找抵押合同编号 920 # 查找抵押合同编号
891 contract_no = self.item.copy() 921 contract_no = self.item.copy()
922
892 key_box = None 923 key_box = None
893 for pno in self.pdf_info: 924 for pno in self.pdf_info:
894 for block in self.pdf_info[pno]['blocks']: 925 for block in self.pdf_info[pno]['blocks']:
...@@ -899,6 +930,7 @@ class Finder: ...@@ -899,6 +930,7 @@ class Finder:
899 bbox, text = span['bbox'], span['text'] 930 bbox, text = span['bbox'], span['text']
900 if '抵押合同编号' in text: 931 if '抵押合同编号' in text:
901 key_box = bbox 932 key_box = bbox
933
902 if key_box is not None: 934 if key_box is not None:
903 for pno in self.pdf_info: 935 for pno in self.pdf_info:
904 for block in self.pdf_info[pno]['blocks']: 936 for block in self.pdf_info[pno]['blocks']:
...@@ -916,6 +948,7 @@ class Finder: ...@@ -916,6 +948,7 @@ class Finder:
916 def get_dyr_name_id(self): 948 def get_dyr_name_id(self):
917 name = self.item.copy() 949 name = self.item.copy()
918 _id = self.item.copy() 950 _id = self.item.copy()
951
919 key_box = None 952 key_box = None
920 for pno in self.pdf_info: 953 for pno in self.pdf_info:
921 for block in self.pdf_info[pno]['blocks']: 954 for block in self.pdf_info[pno]['blocks']:
...@@ -928,7 +961,7 @@ class Finder: ...@@ -928,7 +961,7 @@ class Finder:
928 key_box = bbox 961 key_box = bbox
929 962
930 if key_box is not None: 963 if key_box is not None:
931 rh = abs(key_box[1] - key_box[3]) 964 rh = abs(key_box[1]-key_box[3])
932 for pno in self.pdf_info: 965 for pno in self.pdf_info:
933 for block in self.pdf_info[pno]['blocks']: 966 for block in self.pdf_info[pno]['blocks']:
934 if block['type'] != 0: 967 if block['type'] != 0:
...@@ -936,12 +969,12 @@ class Finder: ...@@ -936,12 +969,12 @@ class Finder:
936 for line in block['lines']: 969 for line in block['lines']:
937 for span in line['spans']: 970 for span in line['spans']:
938 bbox, text = span['bbox'], span['text'] 971 bbox, text = span['bbox'], span['text']
939 if key_box[1] < np.mean(bbox[1::2]) < key_box[3] + rh * 3 and '姓名' in text: 972 if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '姓名' in text:
940 words = text.split(':')[-1] 973 words = text.split(':')[-1]
941 name['position'] = bbox 974 name['position'] = bbox
942 name['page'] = pno 975 name['page'] = pno
943 name['words'] = words 976 name['words'] = words
944 if key_box[1] < np.mean(bbox[1::2]) < key_box[3] + rh * 3 and '证件号码' in text: 977 if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '证件号码' in text:
945 words = text.split(':')[-1] 978 words = text.split(':')[-1]
946 _id['position'] = bbox 979 _id['position'] = bbox
947 _id['page'] = pno 980 _id['page'] = pno
...@@ -951,6 +984,7 @@ class Finder: ...@@ -951,6 +984,7 @@ class Finder:
951 def get_dyrpo_name_id(self): 984 def get_dyrpo_name_id(self):
952 name = self.item.copy() 985 name = self.item.copy()
953 _id = self.item.copy() 986 _id = self.item.copy()
987
954 key_box = None 988 key_box = None
955 for pno in self.pdf_info: 989 for pno in self.pdf_info:
956 for block in self.pdf_info[pno]['blocks']: 990 for block in self.pdf_info[pno]['blocks']:
...@@ -961,8 +995,9 @@ class Finder: ...@@ -961,8 +995,9 @@ class Finder:
961 bbox, text = span['bbox'], span['text'] 995 bbox, text = span['bbox'], span['text']
962 if text == '抵押人配偶(如适': 996 if text == '抵押人配偶(如适':
963 key_box = bbox 997 key_box = bbox
998
964 if key_box is not None: 999 if key_box is not None:
965 rh = abs(key_box[1] - key_box[3]) 1000 rh = abs(key_box[1]-key_box[3])
966 for pno in self.pdf_info: 1001 for pno in self.pdf_info:
967 for block in self.pdf_info[pno]['blocks']: 1002 for block in self.pdf_info[pno]['blocks']:
968 if block['type'] != 0: 1003 if block['type'] != 0:
...@@ -970,13 +1005,13 @@ class Finder: ...@@ -970,13 +1005,13 @@ class Finder:
970 for line in block['lines']: 1005 for line in block['lines']:
971 for span in line['spans']: 1006 for span in line['spans']:
972 bbox, text = span['bbox'], span['text'] 1007 bbox, text = span['bbox'], span['text']
973 if key_box[1] < np.mean(bbox[1::2]) < key_box[3] + rh * 3 and '姓名' in text: 1008 if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '姓名' in text:
974 words = text.split(':')[-1] 1009 words = text.split(':')[-1]
975 name['position'] = bbox 1010 name['position'] = bbox
976 name['page'] = pno 1011 name['page'] = pno
977 name['words'] = words 1012 name['words'] = words
978 if key_box[1] < np.mean(bbox[1::2]) < key_box[3] + rh * 3 and '证件号码' in text: 1013 if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '证件号码' in text:
979 words = text.split(':')[-1] 1014 words = text.split(':')[-1].strip()
980 _id['position'] = bbox 1015 _id['position'] = bbox
981 _id['page'] = pno 1016 _id['page'] = pno
982 _id['words'] = words 1017 _id['words'] = words
...@@ -984,6 +1019,7 @@ class Finder: ...@@ -984,6 +1019,7 @@ class Finder:
984 1019
985 def get_key_value_position(self, key): 1020 def get_key_value_position(self, key):
986 value = self.item.copy() 1021 value = self.item.copy()
1022
987 key_box = None 1023 key_box = None
988 for pno in self.pdf_info: 1024 for pno in self.pdf_info:
989 for block in self.pdf_info[pno]['blocks']: 1025 for block in self.pdf_info[pno]['blocks']:
...@@ -994,8 +1030,9 @@ class Finder: ...@@ -994,8 +1030,9 @@ class Finder:
994 bbox, text = span['bbox'], span['text'] 1030 bbox, text = span['bbox'], span['text']
995 if text == key: 1031 if text == key:
996 key_box = bbox 1032 key_box = bbox
1033
997 if key_box is not None: 1034 if key_box is not None:
998 rh = abs(key_box[1] - key_box[3]) 1035 rh = abs(key_box[1]-key_box[3])
999 for pno in self.pdf_info: 1036 for pno in self.pdf_info:
1000 for block in self.pdf_info[pno]['blocks']: 1037 for block in self.pdf_info[pno]['blocks']:
1001 if block['type'] != 0: 1038 if block['type'] != 0:
...@@ -1003,9 +1040,8 @@ class Finder: ...@@ -1003,9 +1040,8 @@ class Finder:
1003 for line in block['lines']: 1040 for line in block['lines']:
1004 for span in line['spans']: 1041 for span in line['spans']:
1005 bbox, text = span['bbox'], span['text'] 1042 bbox, text = span['bbox'], span['text']
1006 if key_box[1] < np.mean(bbox[1::2]) < key_box[3] and key_box[0] < bbox[0] and abs( 1043 if key_box[1] < np.mean(bbox[1::2]) < key_box[3] and key_box[0] < bbox[0] and abs(key_box[2]-bbox[0]) < rh*10:
1007 key_box[2] - bbox[0]) < rh * 10: 1044 words = text.strip()
1008 words = text
1009 value['position'] = bbox 1045 value['position'] = bbox
1010 value['page'] = pno 1046 value['page'] = pno
1011 value['words'] = words 1047 value['words'] = words
...@@ -1015,6 +1051,7 @@ class Finder: ...@@ -1015,6 +1051,7 @@ class Finder:
1015 name = self.item.copy() 1051 name = self.item.copy()
1016 id_num = self.item.copy() 1052 id_num = self.item.copy()
1017 representative = self.item.copy() 1053 representative = self.item.copy()
1054
1018 # 以保证人2 的左上角为定位点 1055 # 以保证人2 的左上角为定位点
1019 anchor = None 1056 anchor = None
1020 for block in self.pdf_info[page_num]['blocks']: 1057 for block in self.pdf_info[page_num]['blocks']:
...@@ -1026,6 +1063,7 @@ class Finder: ...@@ -1026,6 +1063,7 @@ class Finder:
1026 # 找到角色姓名 1063 # 找到角色姓名
1027 if re.match('保证人2', text) is not None: 1064 if re.match('保证人2', text) is not None:
1028 anchor = [bbox[0], bbox[1]] 1065 anchor = [bbox[0], bbox[1]]
1066
1029 if anchor is not None: 1067 if anchor is not None:
1030 for block in self.pdf_info[page_num]['blocks']: 1068 for block in self.pdf_info[page_num]['blocks']:
1031 if block['type'] != 0: 1069 if block['type'] != 0:
...@@ -1041,60 +1079,52 @@ class Finder: ...@@ -1041,60 +1079,52 @@ class Finder:
1041 name['position'] = bbox 1079 name['position'] = bbox
1042 if role_key == '承租人一:': 1080 if role_key == '承租人一:':
1043 # 找到证件号码且确定位置 1081 # 找到证件号码且确定位置
1044 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean( 1082 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
1045 bbox[1::2]) < anchor[1]:
1046 words = text.split(':')[-1] 1083 words = text.split(':')[-1]
1047 id_num['words'] = words 1084 id_num['words'] = words
1048 id_num['page'] = page_num 1085 id_num['page'] = page_num
1049 id_num['position'] = bbox 1086 id_num['position'] = bbox
1050 # 找到法人代表且确定位置 1087 # 找到法人代表且确定位置
1051 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean( 1088 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
1052 bbox[1::2]) < anchor[1]:
1053 words = text.split(':')[-1] 1089 words = text.split(':')[-1]
1054 representative['words'] = words 1090 representative['words'] = words
1055 representative['page'] = page_num 1091 representative['page'] = page_num
1056 representative['position'] = bbox 1092 representative['position'] = bbox
1057 if role_key == '共同承租人:': 1093 if role_key == '共同承租人:':
1058 # 找到证件号码且确定位置 1094 # 找到证件号码且确定位置
1059 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean( 1095 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
1060 bbox[1::2]) > anchor[1]:
1061 words = text.split(':')[-1] 1096 words = text.split(':')[-1]
1062 id_num['words'] = words 1097 id_num['words'] = words
1063 id_num['page'] = page_num 1098 id_num['page'] = page_num
1064 id_num['position'] = bbox 1099 id_num['position'] = bbox
1065 # 找到法人代表且确定位置 1100 # 找到法人代表且确定位置
1066 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean( 1101 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
1067 bbox[1::2]) > anchor[1]:
1068 words = text.split(':')[-1] 1102 words = text.split(':')[-1]
1069 representative['words'] = words 1103 representative['words'] = words
1070 representative['page'] = page_num 1104 representative['page'] = page_num
1071 representative['position'] = bbox 1105 representative['position'] = bbox
1072 if role_key == '保证人1:': 1106 if role_key == '保证人1:':
1073 # 找到证件号码且确定位置 1107 # 找到证件号码且确定位置
1074 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean( 1108 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
1075 bbox[1::2]) < anchor[1]:
1076 words = text.split(':')[-1] 1109 words = text.split(':')[-1]
1077 id_num['words'] = words 1110 id_num['words'] = words
1078 id_num['page'] = page_num 1111 id_num['page'] = page_num
1079 id_num['position'] = bbox 1112 id_num['position'] = bbox
1080 # 找到法人代表且确定位置 1113 # 找到法人代表且确定位置
1081 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean( 1114 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
1082 bbox[1::2]) < anchor[1]:
1083 words = text.split(':')[-1] 1115 words = text.split(':')[-1]
1084 representative['words'] = words 1116 representative['words'] = words
1085 representative['page'] = page_num 1117 representative['page'] = page_num
1086 representative['position'] = bbox 1118 representative['position'] = bbox
1087 if role_key == '保证人2:': 1119 if role_key == '保证人2:':
1088 # 找到证件号码且确定位置 1120 # 找到证件号码且确定位置
1089 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean( 1121 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
1090 bbox[1::2]) > anchor[1]:
1091 words = text.split(':')[-1] 1122 words = text.split(':')[-1]
1092 id_num['words'] = words 1123 id_num['words'] = words
1093 id_num['page'] = page_num 1124 id_num['page'] = page_num
1094 id_num['position'] = bbox 1125 id_num['position'] = bbox
1095 # 找到法人代表且确定位置 1126 # 找到法人代表且确定位置
1096 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean( 1127 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
1097 bbox[1::2]) > anchor[1]:
1098 words = text.split(':')[-1] 1128 words = text.split(':')[-1]
1099 representative['words'] = words 1129 representative['words'] = words
1100 representative['page'] = page_num 1130 representative['page'] = page_num
...@@ -1137,6 +1167,7 @@ class Finder: ...@@ -1137,6 +1167,7 @@ class Finder:
1137 # 取 Page 1 上的合同编号 1167 # 取 Page 1 上的合同编号
1138 contract_no = self.get_contract_no(page_num='0') 1168 contract_no = self.get_contract_no(page_num='0')
1139 self.init_result['合同编号'] = contract_no 1169 self.init_result['合同编号'] = contract_no
1170
1140 # 粗略判断是否是 ‘车贷分离版本’ 的合同 1171 # 粗略判断是否是 ‘车贷分离版本’ 的合同
1141 is_cdfl = False 1172 is_cdfl = False
1142 for block in self.pdf_info['0']['blocks']: 1173 for block in self.pdf_info['0']['blocks']:
...@@ -1147,60 +1178,81 @@ class Finder: ...@@ -1147,60 +1178,81 @@ class Finder:
1147 bbox, text = span['bbox'], span['text'] 1178 bbox, text = span['bbox'], span['text']
1148 if '共同承租人:' in text: 1179 if '共同承租人:' in text:
1149 is_cdfl = True 1180 is_cdfl = True
1181
1150 if is_cdfl == False: 1182 if is_cdfl == False:
1151 # 从第一页上取四个角色的姓名和证件号码 1183 # 从第一页上取四个角色的姓名和证件号码
1152 name, id_num, representative = self.get_role_info(role_key='承租人:', page_num='0') 1184 name, id_num, representative = self.get_role_info(
1185 role_key='承租人:', page_num='0')
1186
1153 if name["words"] == None: 1187 if name["words"] == None:
1154 name, id_num, representative = self.get_role_info_3_3(role_key='承租人一:', page_num='0') 1188 name, id_num, representative = self.get_role_info_3_3(
1189 role_key='承租人一:', page_num='0')
1155 self.init_result['承租人-姓名'] = name 1190 self.init_result['承租人-姓名'] = name
1156 self.init_result['承租人-证件号码'] = id_num 1191 self.init_result['承租人-证件号码'] = id_num
1157 self.init_result['承租人-法定代表人或授权代表'] = representative 1192 self.init_result['承租人-法定代表人或授权代表'] = representative
1158 name, id_num, representative = self.get_role_info(role_key='保证人1:', page_num='0') 1193
1194 name, id_num, representative = self.get_role_info(
1195 role_key='保证人1:', page_num='0')
1159 self.init_result['保证人1-姓名'] = name 1196 self.init_result['保证人1-姓名'] = name
1160 self.init_result['保证人1-证件号码'] = id_num 1197 self.init_result['保证人1-证件号码'] = id_num
1161 self.init_result['保证人1-法定代表人或授权代表'] = representative 1198 self.init_result['保证人1-法定代表人或授权代表'] = representative
1162 # if条件判别 对应3_3版本 1199 # if条件判别 对应3_3版本
1163 if name["words"] == None: 1200 if name["words"] == None:
1164 name, id_num, representative = self.get_role_info_3_3(role_key='共同承租人:', page_num='0') 1201 name, id_num, representative = self.get_role_info_3_3(
1202 role_key='共同承租人:', page_num='0')
1165 self.init_result['共同承租人-姓名'] = name 1203 self.init_result['共同承租人-姓名'] = name
1166 self.init_result['共同承租人-证件号码'] = id_num 1204 self.init_result['共同承租人-证件号码'] = id_num
1167 self.init_result['共同承租人-法定代表人或授权代表'] = representative 1205 self.init_result['共同承租人-法定代表人或授权代表'] = representative
1168 name, id_num, representative = self.get_role_info(role_key='保证人2:', page_num='0') 1206
1207 name, id_num, representative = self.get_role_info(
1208 role_key='保证人2:', page_num='0')
1169 self.init_result['保证人2-姓名'] = name 1209 self.init_result['保证人2-姓名'] = name
1170 self.init_result['保证人2-证件号码'] = id_num 1210 self.init_result['保证人2-证件号码'] = id_num
1171 self.init_result['保证人2-法定代表人或授权代表'] = representative 1211 self.init_result['保证人2-法定代表人或授权代表'] = representative
1172 # if条件判别 对应3_3版本 1212 # if条件判别 对应3_3版本
1173 if name["words"] == None: 1213 if name["words"] == None:
1174 name, id_num, representative = self.get_role_info_3_3(role_key='保证人1:', page_num='0') 1214 name, id_num, representative = self.get_role_info_3_3(
1215 role_key='保证人1:', page_num='0')
1175 self.init_result['保证人2-姓名'] = name 1216 self.init_result['保证人2-姓名'] = name
1176 self.init_result['保证人2-证件号码'] = id_num 1217 self.init_result['保证人2-证件号码'] = id_num
1177 self.init_result['保证人2-法定代表人或授权代表'] = representative 1218 self.init_result['保证人2-法定代表人或授权代表'] = representative
1178 name, id_num, representative = self.get_role_info(role_key='保证人3:', page_num='0') 1219
1220 name, id_num, representative = self.get_role_info(
1221 role_key='保证人3:', page_num='0')
1179 self.init_result['保证人3-姓名'] = name 1222 self.init_result['保证人3-姓名'] = name
1180 self.init_result['保证人3-证件号码'] = id_num 1223 self.init_result['保证人3-证件号码'] = id_num
1181 self.init_result['保证人3-法定代表人或授权代表'] = representative 1224 self.init_result['保证人3-法定代表人或授权代表'] = representative
1182 if name["words"] == None: 1225 if name["words"] == None:
1183 name, id_num, representative = self.get_role_info_3_3(role_key='保证人2:', page_num='0') 1226 name, id_num, representative = self.get_role_info_3_3(
1227 role_key='保证人2:', page_num='0')
1184 self.init_result['保证人3-姓名'] = name 1228 self.init_result['保证人3-姓名'] = name
1185 self.init_result['保证人3-证件号码'] = id_num 1229 self.init_result['保证人3-证件号码'] = id_num
1186 self.init_result['保证人3-法定代表人或授权代表'] = representative 1230 self.init_result['保证人3-法定代表人或授权代表'] = representative
1187 else: 1231 else:
1188 name, id_num, representative = self.get_role_info_3_3(role_key='承租人一:', page_num='0') 1232 name, id_num, representative = self.get_role_info_3_3(
1233 role_key='承租人一:', page_num='0')
1189 self.init_result['承租人-姓名'] = name 1234 self.init_result['承租人-姓名'] = name
1190 self.init_result['承租人-证件号码'] = id_num 1235 self.init_result['承租人-证件号码'] = id_num
1191 self.init_result['承租人-法定代表人或授权代表'] = representative 1236 self.init_result['承租人-法定代表人或授权代表'] = representative
1192 name, id_num, representative = self.get_role_info_3_3(role_key='共同承租人:', page_num='0') 1237
1238 name, id_num, representative = self.get_role_info_3_3(
1239 role_key='共同承租人:', page_num='0')
1193 self.init_result['共同承租人-姓名'] = name 1240 self.init_result['共同承租人-姓名'] = name
1194 self.init_result['共同承租人-证件号码'] = id_num 1241 self.init_result['共同承租人-证件号码'] = id_num
1195 self.init_result['共同承租人-法定代表人或授权代表'] = representative 1242 self.init_result['共同承租人-法定代表人或授权代表'] = representative
1196 name, id_num, representative = self.get_role_info_3_3(role_key='保证人1:', page_num='0') 1243
1244 name, id_num, representative = self.get_role_info_3_3(
1245 role_key='保证人1:', page_num='0')
1197 self.init_result['保证人1-姓名'] = name 1246 self.init_result['保证人1-姓名'] = name
1198 self.init_result['保证人1-证件号码'] = id_num 1247 self.init_result['保证人1-证件号码'] = id_num
1199 self.init_result['保证人1-法定代表人或授权代表'] = representative 1248 self.init_result['保证人1-法定代表人或授权代表'] = representative
1200 name, id_num, representative = self.get_role_info_3_3(role_key='保证人2:', page_num='0') 1249
1250 name, id_num, representative = self.get_role_info_3_3(
1251 role_key='保证人2:', page_num='0')
1201 self.init_result['保证人2-姓名'] = name 1252 self.init_result['保证人2-姓名'] = name
1202 self.init_result['保证人2-证件号码'] = id_num 1253 self.init_result['保证人2-证件号码'] = id_num
1203 self.init_result['保证人2-法定代表人或授权代表'] = representative 1254 self.init_result['保证人2-法定代表人或授权代表'] = representative
1255
1204 # 在所有页面中找正文中(第二部分 融资租赁主要条款及付款计划)的那个编号,因为存在换行的情况所以暂时不带位置输出 1256 # 在所有页面中找正文中(第二部分 融资租赁主要条款及付款计划)的那个编号,因为存在换行的情况所以暂时不带位置输出
1205 contract_no = self.get_contract_no_one() 1257 contract_no = self.get_contract_no_one()
1206 self.init_result['合同编号(正文)'] = contract_no 1258 self.init_result['合同编号(正文)'] = contract_no
...@@ -1211,7 +1263,8 @@ class Finder: ...@@ -1211,7 +1263,8 @@ class Finder:
1211 seller = self.get_key_value(key='车辆卖方(经销商):') 1263 seller = self.get_key_value(key='车辆卖方(经销商):')
1212 self.init_result['车辆卖方(经销商)'] = seller 1264 self.init_result['车辆卖方(经销商)'] = seller
1213 # 找到 —— 车辆原始销售价格 1265 # 找到 —— 车辆原始销售价格
1214 vehicle_price = self.get_key_value(key='车辆原始销售价格(《机动车销售统一发票》所列金额):') 1266 vehicle_price = self.get_key_value(
1267 key='车辆原始销售价格(《机动车销售统一发票》所列金额):')
1215 self.init_result['车辆原始销售价格(《机动车销售统一发票》所列金额)'] = vehicle_price 1268 self.init_result['车辆原始销售价格(《机动车销售统一发票》所列金额)'] = vehicle_price
1216 # 找车辆附加产品明细(表) 1269 # 找车辆附加产品明细(表)
1217 table_add_product = self.get_table_add_product() 1270 table_add_product = self.get_table_add_product()
...@@ -1232,66 +1285,85 @@ class Finder: ...@@ -1232,66 +1285,85 @@ class Finder:
1232 self.init_result['银行账户-银行账号'] = account 1285 self.init_result['银行账户-银行账号'] = account
1233 bank = self.get_key_value(key='开户银行:') 1286 bank = self.get_key_value(key='开户银行:')
1234 self.init_result['银行账户-开户行'] = bank 1287 self.init_result['银行账户-开户行'] = bank
1288
1235 # 找签字页上的系列信息 1289 # 找签字页上的系列信息
1236 # 承租人姓名、签章 1290 # 承租人姓名、签章
1237 if is_cdfl == False: 1291 if is_cdfl == False:
1238 name = self.get_key_value(key='承租人姓名:') 1292 name = self.get_key_value(key='承租人姓名:')
1239 electronic_signature = self.get_electronic_signature(top='承租人姓名:', bottom='保证人1姓名:') 1293 electronic_signature = self.get_electronic_signature(
1294 top='承租人姓名:', bottom='保证人1姓名:')
1295
1240 if name["words"] == None: 1296 if name["words"] == None:
1241 name = self.get_key_value(key='承租人一姓名:') 1297 name = self.get_key_value(key='承租人一姓名:')
1242 electronic_signature = self.get_electronic_signature(top='承租人一姓名:', bottom='共同承租人名称:') 1298 electronic_signature = self.get_electronic_signature(
1299 top='承租人一姓名:', bottom='共同承租人名称:')
1300
1243 self.init_result['签字页-承租人姓名'] = name 1301 self.init_result['签字页-承租人姓名'] = name
1244 self.init_result['签字页-承租人签章'] = electronic_signature 1302 self.init_result['签字页-承租人签章'] = electronic_signature
1245 # 保证人1姓名、签章 1303 # 保证人1姓名、签章
1246 name = self.get_key_value(key='保证人1姓名:') 1304 name = self.get_key_value(key='保证人1姓名:')
1247 electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:') 1305 electronic_signature = self.get_electronic_signature(
1306 top='保证人1姓名:', bottom='保证人2姓名:')
1248 self.init_result['签字页-保证人1姓名'] = name 1307 self.init_result['签字页-保证人1姓名'] = name
1249 self.init_result['签字页-保证人1签章'] = electronic_signature 1308 self.init_result['签字页-保证人1签章'] = electronic_signature
1250 # 这里用的是 name["words"] == "" 1309 # 这里用的是 name["words"] == ""
1251 if name["words"] == "": 1310 if name["words"] == "":
1252 name = self.get_key_value(key='共同承租人名称:') 1311 name = self.get_key_value(key='共同承租人名称:')
1253 electronic_signature = self.get_electronic_signature(top='共同承租人名称:', bottom='保证人1姓名:') 1312 electronic_signature = self.get_electronic_signature(
1313 top='共同承租人名称:', bottom='保证人1姓名:')
1254 self.init_result['签字页-共同承租人姓名'] = name 1314 self.init_result['签字页-共同承租人姓名'] = name
1255 self.init_result['签字页-共同承租人签章'] = electronic_signature 1315 self.init_result['签字页-共同承租人签章'] = electronic_signature
1256 # 保证人2姓名、签章 1316 # 保证人2姓名、签章
1257 name = self.get_key_value(key='保证人2姓名:') 1317 name = self.get_key_value(key='保证人2姓名:')
1258 electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='保证人3姓名:') 1318 electronic_signature = self.get_electronic_signature(
1319 top='保证人2姓名:', bottom='保证人3姓名:')
1259 self.init_result['签字页-保证人2姓名'] = name 1320 self.init_result['签字页-保证人2姓名'] = name
1260 self.init_result['签字页-保证人2签章'] = electronic_signature 1321 self.init_result['签字页-保证人2签章'] = electronic_signature
1261 # if判断条件对应3_3版本 1322 # if判断条件对应3_3版本
1262 if name["words"] == "": 1323 if name["words"] == "":
1263 name = self.get_key_value(key='保证人1姓名:') 1324 name = self.get_key_value(key='保证人1姓名:')
1264 electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:') 1325 electronic_signature = self.get_electronic_signature(
1326 top='保证人1姓名:', bottom='保证人2姓名:')
1265 self.init_result['签字页-保证人1姓名'] = name 1327 self.init_result['签字页-保证人1姓名'] = name
1266 self.init_result['签字页-保证人1签章'] = electronic_signature 1328 self.init_result['签字页-保证人1签章'] = electronic_signature
1267 # 保证人3姓名、签章 1329 # 保证人3姓名、签章
1268 name = self.get_key_value(key='保证人3姓名:') 1330 name = self.get_key_value(key='保证人3姓名:')
1269 electronic_signature = self.get_electronic_signature(top='保证人3姓名:', bottom='日期:') 1331 electronic_signature = self.get_electronic_signature(
1332 top='保证人3姓名:', bottom='日期:')
1270 self.init_result['签字页-保证人3姓名'] = name 1333 self.init_result['签字页-保证人3姓名'] = name
1271 self.init_result['签字页-保证人3签章'] = electronic_signature 1334 self.init_result['签字页-保证人3签章'] = electronic_signature
1272 # if判断条件对应3_3版本 1335 # if判断条件对应3_3版本
1273 if name["words"] == None: 1336 if name["words"] == None:
1274 name = self.get_key_value(key='保证人2姓名:') 1337 name = self.get_key_value(key='保证人2姓名:')
1275 electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='日期:') 1338 electronic_signature = self.get_electronic_signature(
1339 top='保证人2姓名:', bottom='日期:')
1276 self.init_result['签字页-保证人2姓名'] = name 1340 self.init_result['签字页-保证人2姓名'] = name
1277 self.init_result['签字页-保证人2签章'] = electronic_signature 1341 self.init_result['签字页-保证人2签章'] = electronic_signature
1278 else: 1342 else:
1279 name = self.get_key_value(key='承租人一姓名:') 1343 name = self.get_key_value(key='承租人一姓名:')
1280 electronic_signature = self.get_electronic_signature(top='承租人一姓名:', bottom='共同承租人名称:') 1344 electronic_signature = self.get_electronic_signature(
1345 top='承租人一姓名:', bottom='共同承租人名称:')
1281 self.init_result['签字页-承租人姓名'] = name 1346 self.init_result['签字页-承租人姓名'] = name
1282 self.init_result['签字页-承租人签章'] = electronic_signature 1347 self.init_result['签字页-承租人签章'] = electronic_signature
1348
1283 name = self.get_key_value(key='共同承租人名称:') 1349 name = self.get_key_value(key='共同承租人名称:')
1284 electronic_signature = self.get_electronic_signature(top='共同承租人名称:', bottom='保证人1姓名:') 1350 electronic_signature = self.get_electronic_signature(
1351 top='共同承租人名称:', bottom='保证人1姓名:')
1285 self.init_result['签字页-共同承租人姓名'] = name 1352 self.init_result['签字页-共同承租人姓名'] = name
1286 self.init_result['签字页-共同承租人签章'] = electronic_signature 1353 self.init_result['签字页-共同承租人签章'] = electronic_signature
1354
1287 name = self.get_key_value(key='保证人1姓名:') 1355 name = self.get_key_value(key='保证人1姓名:')
1288 electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:') 1356 electronic_signature = self.get_electronic_signature(
1357 top='保证人1姓名:', bottom='保证人2姓名:')
1289 self.init_result['签字页-保证人1姓名'] = name 1358 self.init_result['签字页-保证人1姓名'] = name
1290 self.init_result['签字页-保证人1签章'] = electronic_signature 1359 self.init_result['签字页-保证人1签章'] = electronic_signature
1360
1291 name = self.get_key_value(key='保证人2姓名:') 1361 name = self.get_key_value(key='保证人2姓名:')
1292 electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='保证人3姓名:') 1362 electronic_signature = self.get_electronic_signature(
1363 top='保证人2姓名:', bottom='保证人3姓名:')
1293 self.init_result['签字页-保证人2姓名'] = name 1364 self.init_result['签字页-保证人2姓名'] = name
1294 self.init_result['签字页-保证人2签章'] = electronic_signature 1365 self.init_result['签字页-保证人2签章'] = electronic_signature
1366
1295 return self.init_result 1367 return self.init_result
1296 1368
1297 def get_info_1(self): 1369 def get_info_1(self):
...@@ -1307,7 +1379,8 @@ class Finder: ...@@ -1307,7 +1379,8 @@ class Finder:
1307 # 销售经销商 1379 # 销售经销商
1308 seller = self.get_key_value(key='销售经销商:', page_num='0') 1380 seller = self.get_key_value(key='销售经销商:', page_num='0')
1309 if seller['words'] == "": 1381 if seller['words'] == "":
1310 seller = self.get_value_by_findall('销售经销商:', '地址:', page_num='0') 1382 seller = self.get_value_by_findall(
1383 '销售经销商:', '地址:', page_num='0')
1311 self.init_result_1['销售经销商'] = seller 1384 self.init_result_1['销售经销商'] = seller
1312 # 合同编号(正文) 1385 # 合同编号(正文)
1313 contract_no = self.get_contract_no_one() 1386 contract_no = self.get_contract_no_one()
...@@ -1325,7 +1398,8 @@ class Finder: ...@@ -1325,7 +1398,8 @@ class Finder:
1325 seller = self.get_key_value(key='销售经销商:') 1398 seller = self.get_key_value(key='销售经销商:')
1326 if seller['words'] == "": 1399 if seller['words'] == "":
1327 # 销售经销商:深圳市宝创汽车贸易有限公司南山分公司(请授权代表签字并请盖章) 1400 # 销售经销商:深圳市宝创汽车贸易有限公司南山分公司(请授权代表签字并请盖章)
1328 seller = self.get_value_by_findall('销售经销商:', '(请授权代表签字并请盖章)', page_num='3') 1401 seller = self.get_value_by_findall(
1402 '销售经销商:', '(请授权代表签字并请盖章)', page_num='3')
1329 self.init_result_1['签字页-销售经销商'] = seller 1403 self.init_result_1['签字页-销售经销商'] = seller
1330 # 经销商签章 1404 # 经销商签章
1331 pass 1405 pass
...@@ -1357,12 +1431,14 @@ class Finder: ...@@ -1357,12 +1431,14 @@ class Finder:
1357 self.init_result_2['融资租赁期限'] = lease_term 1431 self.init_result_2['融资租赁期限'] = lease_term
1358 # 签字页抵押人姓名和签章 1432 # 签字页抵押人姓名和签章
1359 name = self.get_key_value(key='抵押人姓名:') 1433 name = self.get_key_value(key='抵押人姓名:')
1360 electronic_signature = self.get_electronic_signature(top='抵押权人盖章', bottom='抵押人配偶姓名:') 1434 electronic_signature = self.get_electronic_signature(
1435 top='抵押权人盖章', bottom='抵押人配偶姓名:')
1361 self.init_result_2['签字页-抵押人姓名'] = name 1436 self.init_result_2['签字页-抵押人姓名'] = name
1362 self.init_result_2['签字页-抵押人签章'] = electronic_signature 1437 self.init_result_2['签字页-抵押人签章'] = electronic_signature
1363 # 签字页抵押人配偶姓名和签章 1438 # 签字页抵押人配偶姓名和签章
1364 name = self.get_key_value(key='抵押人配偶姓名:') 1439 name = self.get_key_value(key='抵押人配偶姓名:')
1365 electronic_signature = self.get_electronic_signature(top='抵押人配偶姓名:', bottom='日期') 1440 electronic_signature = self.get_electronic_signature(
1441 top='抵押人配偶姓名:', bottom='日期')
1366 self.init_result_2['签字页-抵押人配偶姓名'] = name 1442 self.init_result_2['签字页-抵押人配偶姓名'] = name
1367 self.init_result_2['签字页-抵押人配偶签章'] = electronic_signature 1443 self.init_result_2['签字页-抵押人配偶签章'] = electronic_signature
1368 return self.init_result_2 1444 return self.init_result_2
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!