fix HIL contract
Showing
2 changed files
with
199 additions
and
76 deletions
| ... | @@ -6,14 +6,11 @@ | ... | @@ -6,14 +6,11 @@ |
| 6 | # @Description : | 6 | # @Description : |
| 7 | 7 | ||
| 8 | import re | 8 | import re |
| 9 | import cv2 | ||
| 10 | import base64 | ||
| 11 | import numpy as np | 9 | import numpy as np |
| 12 | from fuzzywuzzy import fuzz | 10 | from fuzzywuzzy import fuzz |
| 13 | 11 | ||
| 14 | 12 | ||
| 15 | class Finder: | 13 | class Finder: |
| 16 | |||
| 17 | def __init__(self, pdf_info): | 14 | def __init__(self, pdf_info): |
| 18 | self.pdf_info = pdf_info | 15 | self.pdf_info = pdf_info |
| 19 | self.item = {"words": None, | 16 | self.item = {"words": None, |
| ... | @@ -25,6 +22,9 @@ class Finder: | ... | @@ -25,6 +22,9 @@ class Finder: |
| 25 | "承租人-姓名": self.item, | 22 | "承租人-姓名": self.item, |
| 26 | "承租人-证件号码": self.item, | 23 | "承租人-证件号码": self.item, |
| 27 | "承租人-法定代表人或授权代表": self.item, | 24 | "承租人-法定代表人或授权代表": self.item, |
| 25 | "共同承租人-姓名": self.item, | ||
| 26 | "共同承租人-证件号码": self.item, | ||
| 27 | "共同承租人-法定代表人或授权代表": self.item, | ||
| 28 | "保证人1-姓名": self.item, | 28 | "保证人1-姓名": self.item, |
| 29 | "保证人1-证件号码": self.item, | 29 | "保证人1-证件号码": self.item, |
| 30 | "保证人1-法定代表人或授权代表": self.item, | 30 | "保证人1-法定代表人或授权代表": self.item, |
| ... | @@ -47,6 +47,8 @@ class Finder: | ... | @@ -47,6 +47,8 @@ class Finder: |
| 47 | "银行账户-开户行": self.item, | 47 | "银行账户-开户行": self.item, |
| 48 | "签字页-承租人姓名": self.item, | 48 | "签字页-承租人姓名": self.item, |
| 49 | "签字页-承租人签章": self.item, | 49 | "签字页-承租人签章": self.item, |
| 50 | "签字页-共同承租人姓名": self.item, | ||
| 51 | "签字页-共同承租人签章": self.item, | ||
| 50 | "签字页-保证人1姓名": self.item, | 52 | "签字页-保证人1姓名": self.item, |
| 51 | "签字页-保证人1签章": self.item, | 53 | "签字页-保证人1签章": self.item, |
| 52 | "签字页-保证人2姓名": self.item, | 54 | "签字页-保证人2姓名": self.item, |
| ... | @@ -54,7 +56,6 @@ class Finder: | ... | @@ -54,7 +56,6 @@ class Finder: |
| 54 | "签字页-保证人3姓名": self.item, | 56 | "签字页-保证人3姓名": self.item, |
| 55 | "签字页-保证人3签章": self.item, | 57 | "签字页-保证人3签章": self.item, |
| 56 | } | 58 | } |
| 57 | |||
| 58 | # 格式化输出 车辆处置协议 要是别的字段 | 59 | # 格式化输出 车辆处置协议 要是别的字段 |
| 59 | self.init_result_1 = {"合同编号": self.item, | 60 | self.init_result_1 = {"合同编号": self.item, |
| 60 | "承租人-姓名": self.item, | 61 | "承租人-姓名": self.item, |
| ... | @@ -66,9 +67,7 @@ class Finder: | ... | @@ -66,9 +67,7 @@ class Finder: |
| 66 | "签字页-承租人签章": self.item, | 67 | "签字页-承租人签章": self.item, |
| 67 | "签字页-销售经销商": self.item, | 68 | "签字页-销售经销商": self.item, |
| 68 | "签字页-销售经销商签章": self.item, | 69 | "签字页-销售经销商签章": self.item, |
| 69 | |||
| 70 | } | 70 | } |
| 71 | |||
| 72 | # 格式化输出 车辆租赁抵押合同 | 71 | # 格式化输出 车辆租赁抵押合同 |
| 73 | self.init_result_2 = {"合同编号": self.item, | 72 | self.init_result_2 = {"合同编号": self.item, |
| 74 | "合同编号(正文)": self.item, | 73 | "合同编号(正文)": self.item, |
| ... | @@ -150,23 +149,24 @@ class Finder: | ... | @@ -150,23 +149,24 @@ class Finder: |
| 150 | words = matchObj.group(1) | 149 | words = matchObj.group(1) |
| 151 | contract_no['position'] = None | 150 | contract_no['position'] = None |
| 152 | contract_no['page'] = pno | 151 | contract_no['page'] = pno |
| 153 | contract_no['words'] = words | 152 | # contract_no['words'] = words |
| 153 | contract_no['words'] = re.sub("\s", "", words).replace(")", "") | ||
| 154 | return contract_no | 154 | return contract_no |
| 155 | |||
| 156 | matchObj = re.search(r'编号为(.*?)的', all_text) | 155 | matchObj = re.search(r'编号为(.*?)的', all_text) |
| 157 | if matchObj: | 156 | if matchObj: |
| 158 | words = matchObj.group(1).strip() | 157 | words = matchObj.group(1).strip() |
| 159 | contract_no['position'] = None | 158 | contract_no['position'] = None |
| 160 | contract_no['page'] = pno | 159 | contract_no['page'] = pno |
| 161 | contract_no['words'] = words | 160 | # contract_no['words'] = words |
| 161 | contract_no['words'] = re.sub("\s", "", words).replace(")", "") | ||
| 162 | return contract_no | 162 | return contract_no |
| 163 | |||
| 164 | matchObj = re.search(r'编号为(.*?))的', all_text) | 163 | matchObj = re.search(r'编号为(.*?))的', all_text) |
| 165 | if matchObj: | 164 | if matchObj: |
| 166 | words = matchObj.group(1).strip() | 165 | words = matchObj.group(1).strip() |
| 167 | contract_no['position'] = None | 166 | contract_no['position'] = None |
| 168 | contract_no['page'] = pno | 167 | contract_no['page'] = pno |
| 169 | contract_no['words'] = words | 168 | # contract_no['words'] = words |
| 169 | contract_no['words'] = re.sub("\s", "", words) | ||
| 170 | return contract_no | 170 | return contract_no |
| 171 | 171 | ||
| 172 | def get_key_value(self, key, page_num=None): | 172 | def get_key_value(self, key, page_num=None): |
| ... | @@ -180,10 +180,11 @@ class Finder: | ... | @@ -180,10 +180,11 @@ class Finder: |
| 180 | for span in line['spans']: | 180 | for span in line['spans']: |
| 181 | bbox, text = span['bbox'], span['text'] | 181 | bbox, text = span['bbox'], span['text'] |
| 182 | if key in text: | 182 | if key in text: |
| 183 | words = text.split(':')[-1] | 183 | words = text.split(':')[-1].replace("。", "") |
| 184 | value['position'] = bbox | 184 | value['position'] = bbox |
| 185 | value['page'] = pno | 185 | value['page'] = pno |
| 186 | value['words'] = words | 186 | # value['words'] = words |
| 187 | value['words'] = re.sub("\s", "", words) | ||
| 187 | else: | 188 | else: |
| 188 | for pno in self.pdf_info: | 189 | for pno in self.pdf_info: |
| 189 | for block in self.pdf_info[pno]['blocks']: | 190 | for block in self.pdf_info[pno]['blocks']: |
| ... | @@ -194,10 +195,11 @@ class Finder: | ... | @@ -194,10 +195,11 @@ class Finder: |
| 194 | bbox, text = span['bbox'], span['text'] | 195 | bbox, text = span['bbox'], span['text'] |
| 195 | if key in text: | 196 | if key in text: |
| 196 | # print(self.pdf_info[pno]) | 197 | # print(self.pdf_info[pno]) |
| 197 | words = text.split(':')[-1] | 198 | words = text.split(':')[-1].replace("。", "") |
| 198 | value['position'] = bbox | 199 | value['position'] = bbox |
| 199 | value['page'] = pno | 200 | value['page'] = pno |
| 200 | value['words'] = words | 201 | # value['words'] = words |
| 202 | value['words'] = re.sub("\s", "", words) | ||
| 201 | return value | 203 | return value |
| 202 | 204 | ||
| 203 | def get_loan_principal(self, page_num='0'): | 205 | def get_loan_principal(self, page_num='0'): |
| ... | @@ -267,7 +269,6 @@ class Finder: | ... | @@ -267,7 +269,6 @@ class Finder: |
| 267 | 269 | ||
| 268 | def get_asp_details(self, page_num): | 270 | def get_asp_details(self, page_num): |
| 269 | asp_details_table_term = self.item.copy() | 271 | asp_details_table_term = self.item.copy() |
| 270 | |||
| 271 | asp_details_table = [] | 272 | asp_details_table = [] |
| 272 | asp_details_text_list = [] | 273 | asp_details_text_list = [] |
| 273 | table = False | 274 | table = False |
| ... | @@ -283,25 +284,20 @@ class Finder: | ... | @@ -283,25 +284,20 @@ class Finder: |
| 283 | table = False | 284 | table = False |
| 284 | if table == True: | 285 | if table == True: |
| 285 | asp_details_text_list.append(text) | 286 | asp_details_text_list.append(text) |
| 286 | 287 | for i in range((len(asp_details_text_list) + 2) // 3): | |
| 287 | for i in range((len(asp_details_text_list)+2)//3): | ||
| 288 | |||
| 289 | line = [] | 288 | line = [] |
| 290 | if i == 0: | 289 | if i == 0: |
| 291 | line = [asp_details_text_list[0]] | 290 | line = [asp_details_text_list[0]] |
| 292 | else: | 291 | else: |
| 293 | for j in range(3): | 292 | for j in range(3): |
| 294 | line.append(asp_details_text_list[i*3-2+j]) | 293 | line.append(asp_details_text_list[i * 3 - 2 + j]) |
| 295 | |||
| 296 | asp_details_table.append(line) | 294 | asp_details_table.append(line) |
| 297 | |||
| 298 | if len(asp_details_table) > 0: | 295 | if len(asp_details_table) > 0: |
| 299 | asp_details_table_term['words'] = asp_details_table | 296 | asp_details_table_term['words'] = asp_details_table |
| 300 | return asp_details_table_term | 297 | return asp_details_table_term |
| 301 | 298 | ||
| 302 | def get_signature(self): | 299 | def get_signature(self): |
| 303 | signature = self.item.copy() | 300 | signature = self.item.copy() |
| 304 | |||
| 305 | for block in self.pdf_info['0']['blocks']: | 301 | for block in self.pdf_info['0']['blocks']: |
| 306 | if block['type'] != 0: | 302 | if block['type'] != 0: |
| 307 | continue | 303 | continue |
| ... | @@ -369,8 +365,8 @@ class Finder: | ... | @@ -369,8 +365,8 @@ class Finder: |
| 369 | for line in block['lines']: | 365 | for line in block['lines']: |
| 370 | for span in line['spans']: | 366 | for span in line['spans']: |
| 371 | bbox, text = span['bbox'], span['text'] | 367 | bbox, text = span['bbox'], span['text'] |
| 372 | if anchor_bbox[2]<np.mean(bbox[::2])<half_width and \ | 368 | if anchor_bbox[2] < np.mean(bbox[::2]) < half_width and \ |
| 373 | anchor_bbox[1]<np.mean(bbox[1::2])<anchor_bbox[3]: | 369 | anchor_bbox[1] < np.mean(bbox[1::2]) < anchor_bbox[3]: |
| 374 | seller['position'] = bbox | 370 | seller['position'] = bbox |
| 375 | seller['words'] = text | 371 | seller['words'] = text |
| 376 | return seller | 372 | return seller |
| ... | @@ -430,7 +426,6 @@ class Finder: | ... | @@ -430,7 +426,6 @@ class Finder: |
| 430 | 426 | ||
| 431 | def get_repayment_schedule(self): | 427 | def get_repayment_schedule(self): |
| 432 | repayment_schedule = self.item.copy() | 428 | repayment_schedule = self.item.copy() |
| 433 | |||
| 434 | repayment_schedule_text_list = [] | 429 | repayment_schedule_text_list = [] |
| 435 | table = False | 430 | table = False |
| 436 | page = None | 431 | page = None |
| ... | @@ -444,20 +439,25 @@ class Finder: | ... | @@ -444,20 +439,25 @@ class Finder: |
| 444 | if '以上表格中所列序号' in text: | 439 | if '以上表格中所列序号' in text: |
| 445 | table = False | 440 | table = False |
| 446 | if table == True: | 441 | if table == True: |
| 442 | # 过滤汉字 | ||
| 443 | if re.compile(r'[\u4e00-\u9fff]').search(text): | ||
| 444 | continue | ||
| 445 | # 过滤 1. - 61. 这些标题 | ||
| 446 | if re.findall("\d+", text): | ||
| 447 | if len(re.findall("\d+", text)) == 1: | ||
| 448 | continue | ||
| 447 | repayment_schedule_text_list.append(text) | 449 | repayment_schedule_text_list.append(text) |
| 448 | if '61.' in text: | 450 | if '61.' in text: |
| 449 | page = pno | 451 | page = pno |
| 450 | table = True | 452 | table = True |
| 451 | 453 | # print("repayment_schedule_text_list = ", repayment_schedule_text_list) | |
| 452 | repayment_schedule_table = [['序号', '融资租赁成本', '融资租赁费用', '租金', '剩余融资租赁成本']] | 454 | repayment_schedule_table = [['序号', '融资租赁成本', '融资租赁费用', '租金', '剩余融资租赁成本']] |
| 453 | for i in range(len(repayment_schedule_text_list)//4): | 455 | for i in range(len(repayment_schedule_text_list) // 4): |
| 454 | line = [f'{i+1}.'] | 456 | line = [f'{i + 1}.'] |
| 455 | # 4表示4列的意思 | 457 | # 4表示4列的意思 |
| 456 | for j in range(4): | 458 | for j in range(4): |
| 457 | line.append(repayment_schedule_text_list[i*4+j]) | 459 | line.append(repayment_schedule_text_list[i * 4 + j]) |
| 458 | |||
| 459 | repayment_schedule_table.append(line) | 460 | repayment_schedule_table.append(line) |
| 460 | |||
| 461 | repayment_schedule['words'] = repayment_schedule_table | 461 | repayment_schedule['words'] = repayment_schedule_table |
| 462 | repayment_schedule['page'] = page | 462 | repayment_schedule['page'] = page |
| 463 | return repayment_schedule | 463 | return repayment_schedule |
| ... | @@ -506,7 +506,7 @@ class Finder: | ... | @@ -506,7 +506,7 @@ class Finder: |
| 506 | else: | 506 | else: |
| 507 | words = '无' | 507 | words = '无' |
| 508 | boxes = np.array(boxes).reshape((-1, 2)) | 508 | boxes = np.array(boxes).reshape((-1, 2)) |
| 509 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | 509 | position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])] |
| 510 | signature_role_2['page_num'] = page_num | 510 | signature_role_2['page_num'] = page_num |
| 511 | signature_role_2['position'] = position | 511 | signature_role_2['position'] = position |
| 512 | signature_role_2['words'] = words | 512 | signature_role_2['words'] = words |
| ... | @@ -541,7 +541,7 @@ class Finder: | ... | @@ -541,7 +541,7 @@ class Finder: |
| 541 | else: | 541 | else: |
| 542 | words = '无' | 542 | words = '无' |
| 543 | boxes = np.array(boxes).reshape((-1, 2)) | 543 | boxes = np.array(boxes).reshape((-1, 2)) |
| 544 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | 544 | position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])] |
| 545 | signature_role_3['page_num'] = page_num | 545 | signature_role_3['page_num'] = page_num |
| 546 | signature_role_3['position'] = position | 546 | signature_role_3['position'] = position |
| 547 | signature_role_3['words'] = words | 547 | signature_role_3['words'] = words |
| ... | @@ -576,7 +576,7 @@ class Finder: | ... | @@ -576,7 +576,7 @@ class Finder: |
| 576 | else: | 576 | else: |
| 577 | words = '无' | 577 | words = '无' |
| 578 | boxes = np.array(boxes).reshape((-1, 2)) | 578 | boxes = np.array(boxes).reshape((-1, 2)) |
| 579 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | 579 | position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])] |
| 580 | signature_role_4['page_num'] = page_num | 580 | signature_role_4['page_num'] = page_num |
| 581 | signature_role_4['position'] = position | 581 | signature_role_4['position'] = position |
| 582 | signature_role_4['words'] = words | 582 | signature_role_4['words'] = words |
| ... | @@ -612,7 +612,7 @@ class Finder: | ... | @@ -612,7 +612,7 @@ class Finder: |
| 612 | else: | 612 | else: |
| 613 | words = '无' | 613 | words = '无' |
| 614 | boxes = np.array(boxes).reshape((-1, 2)) | 614 | boxes = np.array(boxes).reshape((-1, 2)) |
| 615 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | 615 | position = [min(boxes[:, 0]), min(boxes[:, 1]), max(boxes[:, 0]), max(boxes[:, 1])] |
| 616 | signature_role_5['page_num'] = page_num | 616 | signature_role_5['page_num'] = page_num |
| 617 | signature_role_5['position'] = position | 617 | signature_role_5['position'] = position |
| 618 | signature_role_5['words'] = words | 618 | signature_role_5['words'] = words |
| ... | @@ -640,7 +640,7 @@ class Finder: | ... | @@ -640,7 +640,7 @@ class Finder: |
| 640 | for line in block['lines']: | 640 | for line in block['lines']: |
| 641 | for span in line['spans']: | 641 | for span in line['spans']: |
| 642 | bbox, text = span['bbox'], span['text'] | 642 | bbox, text = span['bbox'], span['text'] |
| 643 | if '签署日期' in text and int(anchor_top)<np.mean(bbox[1::2])<int(anchor_bottom): | 643 | if '签署日期' in text and int(anchor_top) < np.mean(bbox[1::2]) < int(anchor_bottom): |
| 644 | name = text.split(' ')[0] | 644 | name = text.split(' ')[0] |
| 645 | date = text.split(':')[-1] | 645 | date = text.split(':')[-1] |
| 646 | signature_name['words'] = name | 646 | signature_name['words'] = name |
| ... | @@ -663,7 +663,7 @@ class Finder: | ... | @@ -663,7 +663,7 @@ class Finder: |
| 663 | if top in text: | 663 | if top in text: |
| 664 | anchor_top = bbox[1] | 664 | anchor_top = bbox[1] |
| 665 | if bottom in text: | 665 | if bottom in text: |
| 666 | anchor_bottom = bbox[1] | 666 | anchor_bottom = bbox[3] |
| 667 | if anchor_top is not None and anchor_bottom is not None: | 667 | if anchor_top is not None and anchor_bottom is not None: |
| 668 | for pno in self.pdf_info: | 668 | for pno in self.pdf_info: |
| 669 | for block in self.pdf_info[pno]['blocks']: | 669 | for block in self.pdf_info[pno]['blocks']: |
| ... | @@ -672,7 +672,9 @@ class Finder: | ... | @@ -672,7 +672,9 @@ class Finder: |
| 672 | for line in block['lines']: | 672 | for line in block['lines']: |
| 673 | for span in line['spans']: | 673 | for span in line['spans']: |
| 674 | bbox, text = span['bbox'], span['text'] | 674 | bbox, text = span['bbox'], span['text'] |
| 675 | if '签署日期' in text and int(anchor_top)<np.mean(bbox[1::2])<int(anchor_bottom): | 675 | # ------------ # |
| 676 | # print("--text = ", text) | ||
| 677 | if '签署日期' in text and int(anchor_top) < np.mean(bbox[1::2]) < int(anchor_bottom): | ||
| 676 | words = text | 678 | words = text |
| 677 | signature['words'] = words | 679 | signature['words'] = words |
| 678 | signature['page'] = pno | 680 | signature['page'] = pno |
| ... | @@ -683,7 +685,6 @@ class Finder: | ... | @@ -683,7 +685,6 @@ class Finder: |
| 683 | name = self.item.copy() | 685 | name = self.item.copy() |
| 684 | id_num = self.item.copy() | 686 | id_num = self.item.copy() |
| 685 | representative = self.item.copy() | 687 | representative = self.item.copy() |
| 686 | |||
| 687 | # 以保证人3 的左上角为定位点 | 688 | # 以保证人3 的左上角为定位点 |
| 688 | anchor = None | 689 | anchor = None |
| 689 | for block in self.pdf_info[page_num]['blocks']: | 690 | for block in self.pdf_info[page_num]['blocks']: |
| ... | @@ -695,7 +696,6 @@ class Finder: | ... | @@ -695,7 +696,6 @@ class Finder: |
| 695 | # 找到角色姓名 | 696 | # 找到角色姓名 |
| 696 | if re.match('保证人3', text) is not None: | 697 | if re.match('保证人3', text) is not None: |
| 697 | anchor = [bbox[0], bbox[1]] | 698 | anchor = [bbox[0], bbox[1]] |
| 698 | |||
| 699 | if anchor is not None: | 699 | if anchor is not None: |
| 700 | for block in self.pdf_info[page_num]['blocks']: | 700 | for block in self.pdf_info[page_num]['blocks']: |
| 701 | if block['type'] != 0: | 701 | if block['type'] != 0: |
| ... | @@ -711,52 +711,60 @@ class Finder: | ... | @@ -711,52 +711,60 @@ class Finder: |
| 711 | name['position'] = bbox | 711 | name['position'] = bbox |
| 712 | if role_key == '承租人:': | 712 | if role_key == '承租人:': |
| 713 | # 找到证件号码且确定位置 | 713 | # 找到证件号码且确定位置 |
| 714 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]: | 714 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean( |
| 715 | bbox[1::2]) < anchor[1]: | ||
| 715 | words = text.split(':')[-1] | 716 | words = text.split(':')[-1] |
| 716 | id_num['words'] = words | 717 | id_num['words'] = words |
| 717 | id_num['page'] = page_num | 718 | id_num['page'] = page_num |
| 718 | id_num['position'] = bbox | 719 | id_num['position'] = bbox |
| 719 | # 找到法人代表且确定位置 | 720 | # 找到法人代表且确定位置 |
| 720 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]: | 721 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean( |
| 722 | bbox[1::2]) < anchor[1]: | ||
| 721 | words = text.split(':')[-1] | 723 | words = text.split(':')[-1] |
| 722 | representative['words'] = words | 724 | representative['words'] = words |
| 723 | representative['page'] = page_num | 725 | representative['page'] = page_num |
| 724 | representative['position'] = bbox | 726 | representative['position'] = bbox |
| 725 | if role_key == '保证人1:': | 727 | if role_key == '保证人1:': |
| 726 | # 找到证件号码且确定位置 | 728 | # 找到证件号码且确定位置 |
| 727 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]: | 729 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean( |
| 730 | bbox[1::2]) > anchor[1]: | ||
| 728 | words = text.split(':')[-1] | 731 | words = text.split(':')[-1] |
| 729 | id_num['words'] = words | 732 | id_num['words'] = words |
| 730 | id_num['page'] = page_num | 733 | id_num['page'] = page_num |
| 731 | id_num['position'] = bbox | 734 | id_num['position'] = bbox |
| 732 | # 找到法人代表且确定位置 | 735 | # 找到法人代表且确定位置 |
| 733 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]: | 736 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean( |
| 737 | bbox[1::2]) > anchor[1]: | ||
| 734 | words = text.split(':')[-1] | 738 | words = text.split(':')[-1] |
| 735 | representative['words'] = words | 739 | representative['words'] = words |
| 736 | representative['page'] = page_num | 740 | representative['page'] = page_num |
| 737 | representative['position'] = bbox | 741 | representative['position'] = bbox |
| 738 | if role_key == '保证人2:': | 742 | if role_key == '保证人2:': |
| 739 | # 找到证件号码且确定位置 | 743 | # 找到证件号码且确定位置 |
| 740 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]: | 744 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean( |
| 745 | bbox[1::2]) < anchor[1]: | ||
| 741 | words = text.split(':')[-1] | 746 | words = text.split(':')[-1] |
| 742 | id_num['words'] = words | 747 | id_num['words'] = words |
| 743 | id_num['page'] = page_num | 748 | id_num['page'] = page_num |
| 744 | id_num['position'] = bbox | 749 | id_num['position'] = bbox |
| 745 | # 找到法人代表且确定位置 | 750 | # 找到法人代表且确定位置 |
| 746 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]: | 751 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean( |
| 752 | bbox[1::2]) < anchor[1]: | ||
| 747 | words = text.split(':')[-1] | 753 | words = text.split(':')[-1] |
| 748 | representative['words'] = words | 754 | representative['words'] = words |
| 749 | representative['page'] = page_num | 755 | representative['page'] = page_num |
| 750 | representative['position'] = bbox | 756 | representative['position'] = bbox |
| 751 | if role_key == '保证人3:': | 757 | if role_key == '保证人3:': |
| 752 | # 找到证件号码且确定位置 | 758 | # 找到证件号码且确定位置 |
| 753 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]: | 759 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean( |
| 760 | bbox[1::2]) > anchor[1]: | ||
| 754 | words = text.split(':')[-1] | 761 | words = text.split(':')[-1] |
| 755 | id_num['words'] = words | 762 | id_num['words'] = words |
| 756 | id_num['page'] = page_num | 763 | id_num['page'] = page_num |
| 757 | id_num['position'] = bbox | 764 | id_num['position'] = bbox |
| 758 | # 找到法人代表且确定位置 | 765 | # 找到法人代表且确定位置 |
| 759 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]: | 766 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean( |
| 767 | bbox[1::2]) > anchor[1]: | ||
| 760 | words = text.split(':')[-1] | 768 | words = text.split(':')[-1] |
| 761 | representative['words'] = words | 769 | representative['words'] = words |
| 762 | representative['page'] = page_num | 770 | representative['page'] = page_num |
| ... | @@ -783,12 +791,10 @@ class Finder: | ... | @@ -783,12 +791,10 @@ class Finder: |
| 783 | start = False | 791 | start = False |
| 784 | if start == True: | 792 | if start == True: |
| 785 | items.append(text) | 793 | items.append(text) |
| 786 | |||
| 787 | lines = [['项目', '购买价格', '实际融资金额']] | 794 | lines = [['项目', '购买价格', '实际融资金额']] |
| 788 | for i in range(len(items)//3): | 795 | for i in range(len(items) // 3): |
| 789 | line = [items[2+i*3+0], items[2+i*3+1], items[2+i*3+2]] | 796 | line = [items[2 + i * 3 + 0], items[2 + i * 3 + 1], items[2 + i * 3 + 2]] |
| 790 | lines.append(line) | 797 | lines.append(line) |
| 791 | |||
| 792 | if len(items) > 0: | 798 | if len(items) > 0: |
| 793 | lines.append([items[0], '', items[1]]) | 799 | lines.append([items[0], '', items[1]]) |
| 794 | 800 | ||
| ... | @@ -800,7 +806,6 @@ class Finder: | ... | @@ -800,7 +806,6 @@ class Finder: |
| 800 | def get_contract_no_dy(self): | 806 | def get_contract_no_dy(self): |
| 801 | # 查找抵押合同编号 | 807 | # 查找抵押合同编号 |
| 802 | contract_no = self.item.copy() | 808 | contract_no = self.item.copy() |
| 803 | |||
| 804 | key_box = None | 809 | key_box = None |
| 805 | for pno in self.pdf_info: | 810 | for pno in self.pdf_info: |
| 806 | for block in self.pdf_info[pno]['blocks']: | 811 | for block in self.pdf_info[pno]['blocks']: |
| ... | @@ -811,7 +816,6 @@ class Finder: | ... | @@ -811,7 +816,6 @@ class Finder: |
| 811 | bbox, text = span['bbox'], span['text'] | 816 | bbox, text = span['bbox'], span['text'] |
| 812 | if '抵押合同编号' in text: | 817 | if '抵押合同编号' in text: |
| 813 | key_box = bbox | 818 | key_box = bbox |
| 814 | |||
| 815 | if key_box is not None: | 819 | if key_box is not None: |
| 816 | for pno in self.pdf_info: | 820 | for pno in self.pdf_info: |
| 817 | for block in self.pdf_info[pno]['blocks']: | 821 | for block in self.pdf_info[pno]['blocks']: |
| ... | @@ -829,7 +833,6 @@ class Finder: | ... | @@ -829,7 +833,6 @@ class Finder: |
| 829 | def get_dyr_name_id(self): | 833 | def get_dyr_name_id(self): |
| 830 | name = self.item.copy() | 834 | name = self.item.copy() |
| 831 | _id = self.item.copy() | 835 | _id = self.item.copy() |
| 832 | |||
| 833 | key_box = None | 836 | key_box = None |
| 834 | for pno in self.pdf_info: | 837 | for pno in self.pdf_info: |
| 835 | for block in self.pdf_info[pno]['blocks']: | 838 | for block in self.pdf_info[pno]['blocks']: |
| ... | @@ -842,7 +845,7 @@ class Finder: | ... | @@ -842,7 +845,7 @@ class Finder: |
| 842 | key_box = bbox | 845 | key_box = bbox |
| 843 | 846 | ||
| 844 | if key_box is not None: | 847 | if key_box is not None: |
| 845 | rh = abs(key_box[1]-key_box[3]) | 848 | rh = abs(key_box[1] - key_box[3]) |
| 846 | for pno in self.pdf_info: | 849 | for pno in self.pdf_info: |
| 847 | for block in self.pdf_info[pno]['blocks']: | 850 | for block in self.pdf_info[pno]['blocks']: |
| 848 | if block['type'] != 0: | 851 | if block['type'] != 0: |
| ... | @@ -850,12 +853,12 @@ class Finder: | ... | @@ -850,12 +853,12 @@ class Finder: |
| 850 | for line in block['lines']: | 853 | for line in block['lines']: |
| 851 | for span in line['spans']: | 854 | for span in line['spans']: |
| 852 | bbox, text = span['bbox'], span['text'] | 855 | bbox, text = span['bbox'], span['text'] |
| 853 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '姓名' in text: | 856 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3] + rh * 3 and '姓名' in text: |
| 854 | words = text.split(':')[-1] | 857 | words = text.split(':')[-1] |
| 855 | name['position'] = bbox | 858 | name['position'] = bbox |
| 856 | name['page'] = pno | 859 | name['page'] = pno |
| 857 | name['words'] = words | 860 | name['words'] = words |
| 858 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '证件号码' in text: | 861 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3] + rh * 3 and '证件号码' in text: |
| 859 | words = text.split(':')[-1] | 862 | words = text.split(':')[-1] |
| 860 | _id['position'] = bbox | 863 | _id['position'] = bbox |
| 861 | _id['page'] = pno | 864 | _id['page'] = pno |
| ... | @@ -864,7 +867,6 @@ class Finder: | ... | @@ -864,7 +867,6 @@ class Finder: |
| 864 | 867 | ||
| 865 | def get_key_value_position(self, key): | 868 | def get_key_value_position(self, key): |
| 866 | value = self.item.copy() | 869 | value = self.item.copy() |
| 867 | |||
| 868 | key_box = None | 870 | key_box = None |
| 869 | for pno in self.pdf_info: | 871 | for pno in self.pdf_info: |
| 870 | for block in self.pdf_info[pno]['blocks']: | 872 | for block in self.pdf_info[pno]['blocks']: |
| ... | @@ -875,9 +877,8 @@ class Finder: | ... | @@ -875,9 +877,8 @@ class Finder: |
| 875 | bbox, text = span['bbox'], span['text'] | 877 | bbox, text = span['bbox'], span['text'] |
| 876 | if text == key: | 878 | if text == key: |
| 877 | key_box = bbox | 879 | key_box = bbox |
| 878 | |||
| 879 | if key_box is not None: | 880 | if key_box is not None: |
| 880 | rh = abs(key_box[1]-key_box[3]) | 881 | rh = abs(key_box[1] - key_box[3]) |
| 881 | for pno in self.pdf_info: | 882 | for pno in self.pdf_info: |
| 882 | for block in self.pdf_info[pno]['blocks']: | 883 | for block in self.pdf_info[pno]['blocks']: |
| 883 | if block['type'] != 0: | 884 | if block['type'] != 0: |
| ... | @@ -885,13 +886,104 @@ class Finder: | ... | @@ -885,13 +886,104 @@ class Finder: |
| 885 | for line in block['lines']: | 886 | for line in block['lines']: |
| 886 | for span in line['spans']: | 887 | for span in line['spans']: |
| 887 | bbox, text = span['bbox'], span['text'] | 888 | bbox, text = span['bbox'], span['text'] |
| 888 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3] and key_box[0] < bbox[0] and abs(key_box[2]-bbox[0]) < rh*10: | 889 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3] and key_box[0] < bbox[0] and abs( |
| 890 | key_box[2] - bbox[0]) < rh * 10: | ||
| 889 | words = text | 891 | words = text |
| 890 | value['position'] = bbox | 892 | value['position'] = bbox |
| 891 | value['page'] = pno | 893 | value['page'] = pno |
| 892 | value['words'] = words | 894 | value['words'] = words |
| 893 | return value | 895 | return value |
| 894 | 896 | ||
| 897 | def get_role_info_3_3(self, role_key, page_num='0'): | ||
| 898 | name = self.item.copy() | ||
| 899 | id_num = self.item.copy() | ||
| 900 | representative = self.item.copy() | ||
| 901 | # 以保证人2 的左上角为定位点 | ||
| 902 | anchor = None | ||
| 903 | for block in self.pdf_info[page_num]['blocks']: | ||
| 904 | if block['type'] != 0: | ||
| 905 | continue | ||
| 906 | for line in block['lines']: | ||
| 907 | for span in line['spans']: | ||
| 908 | bbox, text = span['bbox'], span['text'] | ||
| 909 | # 找到角色姓名 | ||
| 910 | if re.match('保证人2', text) is not None: | ||
| 911 | anchor = [bbox[0], bbox[1]] | ||
| 912 | if anchor is not None: | ||
| 913 | for block in self.pdf_info[page_num]['blocks']: | ||
| 914 | if block['type'] != 0: | ||
| 915 | continue | ||
| 916 | for line in block['lines']: | ||
| 917 | for span in line['spans']: | ||
| 918 | bbox, text = span['bbox'], span['text'] | ||
| 919 | # 找到角色姓名 | ||
| 920 | if re.match(role_key, text) is not None: | ||
| 921 | words = text.split(':')[-1] | ||
| 922 | name['words'] = words | ||
| 923 | name['page'] = page_num | ||
| 924 | name['position'] = bbox | ||
| 925 | if role_key == '承租人一:': | ||
| 926 | # 找到证件号码且确定位置 | ||
| 927 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean( | ||
| 928 | bbox[1::2]) < anchor[1]: | ||
| 929 | words = text.split(':')[-1] | ||
| 930 | id_num['words'] = words | ||
| 931 | id_num['page'] = page_num | ||
| 932 | id_num['position'] = bbox | ||
| 933 | # 找到法人代表且确定位置 | ||
| 934 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean( | ||
| 935 | bbox[1::2]) < anchor[1]: | ||
| 936 | words = text.split(':')[-1] | ||
| 937 | representative['words'] = words | ||
| 938 | representative['page'] = page_num | ||
| 939 | representative['position'] = bbox | ||
| 940 | if role_key == '共同承租人:': | ||
| 941 | # 找到证件号码且确定位置 | ||
| 942 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean( | ||
| 943 | bbox[1::2]) > anchor[1]: | ||
| 944 | words = text.split(':')[-1] | ||
| 945 | id_num['words'] = words | ||
| 946 | id_num['page'] = page_num | ||
| 947 | id_num['position'] = bbox | ||
| 948 | # 找到法人代表且确定位置 | ||
| 949 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean( | ||
| 950 | bbox[1::2]) > anchor[1]: | ||
| 951 | words = text.split(':')[-1] | ||
| 952 | representative['words'] = words | ||
| 953 | representative['page'] = page_num | ||
| 954 | representative['position'] = bbox | ||
| 955 | if role_key == '保证人1:': | ||
| 956 | # 找到证件号码且确定位置 | ||
| 957 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean( | ||
| 958 | bbox[1::2]) < anchor[1]: | ||
| 959 | words = text.split(':')[-1] | ||
| 960 | id_num['words'] = words | ||
| 961 | id_num['page'] = page_num | ||
| 962 | id_num['position'] = bbox | ||
| 963 | # 找到法人代表且确定位置 | ||
| 964 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean( | ||
| 965 | bbox[1::2]) < anchor[1]: | ||
| 966 | words = text.split(':')[-1] | ||
| 967 | representative['words'] = words | ||
| 968 | representative['page'] = page_num | ||
| 969 | representative['position'] = bbox | ||
| 970 | if role_key == '保证人2:': | ||
| 971 | # 找到证件号码且确定位置 | ||
| 972 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean( | ||
| 973 | bbox[1::2]) > anchor[1]: | ||
| 974 | words = text.split(':')[-1] | ||
| 975 | id_num['words'] = words | ||
| 976 | id_num['page'] = page_num | ||
| 977 | id_num['position'] = bbox | ||
| 978 | # 找到法人代表且确定位置 | ||
| 979 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean( | ||
| 980 | bbox[1::2]) > anchor[1]: | ||
| 981 | words = text.split(':')[-1] | ||
| 982 | representative['words'] = words | ||
| 983 | representative['page'] = page_num | ||
| 984 | representative['position'] = bbox | ||
| 985 | return name, id_num, representative | ||
| 986 | |||
| 895 | def get_info(self): | 987 | def get_info(self): |
| 896 | """ | 988 | """ |
| 897 | block['type'] == 0 : 表示该元素为图片 | 989 | block['type'] == 0 : 表示该元素为图片 |
| ... | @@ -905,6 +997,8 @@ class Finder: | ... | @@ -905,6 +997,8 @@ class Finder: |
| 905 | self.init_result['合同编号'] = contract_no | 997 | self.init_result['合同编号'] = contract_no |
| 906 | # 从第一页上取四个角色的姓名和证件号码 | 998 | # 从第一页上取四个角色的姓名和证件号码 |
| 907 | name, id_num, representative = self.get_role_info(role_key='承租人:', page_num='0') | 999 | name, id_num, representative = self.get_role_info(role_key='承租人:', page_num='0') |
| 1000 | if name["words"] == None: | ||
| 1001 | name, id_num, representative = self.get_role_info_3_3(role_key='承租人一:', page_num='0') | ||
| 908 | self.init_result['承租人-姓名'] = name | 1002 | self.init_result['承租人-姓名'] = name |
| 909 | self.init_result['承租人-证件号码'] = id_num | 1003 | self.init_result['承租人-证件号码'] = id_num |
| 910 | self.init_result['承租人-法定代表人或授权代表'] = representative | 1004 | self.init_result['承租人-法定代表人或授权代表'] = representative |
| ... | @@ -912,14 +1006,31 @@ class Finder: | ... | @@ -912,14 +1006,31 @@ class Finder: |
| 912 | self.init_result['保证人1-姓名'] = name | 1006 | self.init_result['保证人1-姓名'] = name |
| 913 | self.init_result['保证人1-证件号码'] = id_num | 1007 | self.init_result['保证人1-证件号码'] = id_num |
| 914 | self.init_result['保证人1-法定代表人或授权代表'] = representative | 1008 | self.init_result['保证人1-法定代表人或授权代表'] = representative |
| 1009 | # if条件判别 对应3_3版本 | ||
| 1010 | if name["words"] == None: | ||
| 1011 | name, id_num, representative = self.get_role_info_3_3(role_key='共同承租人:', page_num='0') | ||
| 1012 | self.init_result['共同承租人-姓名'] = name | ||
| 1013 | self.init_result['共同承租人-证件号码'] = id_num | ||
| 1014 | self.init_result['共同承租人-法定代表人或授权代表'] = representative | ||
| 915 | name, id_num, representative = self.get_role_info(role_key='保证人2:', page_num='0') | 1015 | name, id_num, representative = self.get_role_info(role_key='保证人2:', page_num='0') |
| 916 | self.init_result['保证人2-姓名'] = name | 1016 | self.init_result['保证人2-姓名'] = name |
| 917 | self.init_result['保证人2-证件号码'] = id_num | 1017 | self.init_result['保证人2-证件号码'] = id_num |
| 918 | self.init_result['保证人2-法定代表人或授权代表'] = representative | 1018 | self.init_result['保证人2-法定代表人或授权代表'] = representative |
| 1019 | # if条件判别 对应3_3版本 | ||
| 1020 | if name["words"] == None: | ||
| 1021 | name, id_num, representative = self.get_role_info_3_3(role_key='保证人1:', page_num='0') | ||
| 1022 | self.init_result['保证人2-姓名'] = name | ||
| 1023 | self.init_result['保证人2-证件号码'] = id_num | ||
| 1024 | self.init_result['保证人2-法定代表人或授权代表'] = representative | ||
| 919 | name, id_num, representative = self.get_role_info(role_key='保证人3:', page_num='0') | 1025 | name, id_num, representative = self.get_role_info(role_key='保证人3:', page_num='0') |
| 920 | self.init_result['保证人3-姓名'] = name | 1026 | self.init_result['保证人3-姓名'] = name |
| 921 | self.init_result['保证人3-证件号码'] = id_num | 1027 | self.init_result['保证人3-证件号码'] = id_num |
| 922 | self.init_result['保证人3-法定代表人或授权代表'] = representative | 1028 | self.init_result['保证人3-法定代表人或授权代表'] = representative |
| 1029 | if name["words"] == None: | ||
| 1030 | name, id_num, representative = self.get_role_info_3_3(role_key='保证人2:', page_num='0') | ||
| 1031 | self.init_result['保证人3-姓名'] = name | ||
| 1032 | self.init_result['保证人3-证件号码'] = id_num | ||
| 1033 | self.init_result['保证人3-法定代表人或授权代表'] = representative | ||
| 923 | # 在所有页面中找正文中(第二部分 融资租赁主要条款及付款计划)的那个编号,因为存在换行的情况所以暂时不带位置输出 | 1034 | # 在所有页面中找正文中(第二部分 融资租赁主要条款及付款计划)的那个编号,因为存在换行的情况所以暂时不带位置输出 |
| 924 | contract_no = self.get_contract_no_one() | 1035 | contract_no = self.get_contract_no_one() |
| 925 | self.init_result['合同编号(正文)'] = contract_no | 1036 | self.init_result['合同编号(正文)'] = contract_no |
| ... | @@ -955,6 +1066,9 @@ class Finder: | ... | @@ -955,6 +1066,9 @@ class Finder: |
| 955 | # 承租人姓名、签章 | 1066 | # 承租人姓名、签章 |
| 956 | name = self.get_key_value(key='承租人姓名:') | 1067 | name = self.get_key_value(key='承租人姓名:') |
| 957 | electronic_signature = self.get_electronic_signature(top='承租人姓名:', bottom='保证人1姓名:') | 1068 | electronic_signature = self.get_electronic_signature(top='承租人姓名:', bottom='保证人1姓名:') |
| 1069 | if name["words"] == None: | ||
| 1070 | name = self.get_key_value(key='承租人一姓名:') | ||
| 1071 | electronic_signature = self.get_electronic_signature(top='承租人一姓名:', bottom='共同承租人名称:') | ||
| 958 | self.init_result['签字页-承租人姓名'] = name | 1072 | self.init_result['签字页-承租人姓名'] = name |
| 959 | self.init_result['签字页-承租人签章'] = electronic_signature | 1073 | self.init_result['签字页-承租人签章'] = electronic_signature |
| 960 | # 保证人1姓名、签章 | 1074 | # 保证人1姓名、签章 |
| ... | @@ -962,19 +1076,35 @@ class Finder: | ... | @@ -962,19 +1076,35 @@ class Finder: |
| 962 | electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:') | 1076 | electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:') |
| 963 | self.init_result['签字页-保证人1姓名'] = name | 1077 | self.init_result['签字页-保证人1姓名'] = name |
| 964 | self.init_result['签字页-保证人1签章'] = electronic_signature | 1078 | self.init_result['签字页-保证人1签章'] = electronic_signature |
| 1079 | # 这里用的是 name["words"] == "" | ||
| 1080 | if name["words"] == "": | ||
| 1081 | name = self.get_key_value(key='共同承租人名称:') | ||
| 1082 | electronic_signature = self.get_electronic_signature(top='共同承租人名称:', bottom='保证人1姓名:') | ||
| 1083 | self.init_result['签字页-共同承租人姓名'] = name | ||
| 1084 | self.init_result['签字页-共同承租人签章'] = electronic_signature | ||
| 965 | # 保证人2姓名、签章 | 1085 | # 保证人2姓名、签章 |
| 966 | name = self.get_key_value(key='保证人2姓名:') | 1086 | name = self.get_key_value(key='保证人2姓名:') |
| 967 | electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='保证人3姓名:') | 1087 | electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='保证人3姓名:') |
| 968 | self.init_result['签字页-保证人2姓名'] = name | 1088 | self.init_result['签字页-保证人2姓名'] = name |
| 969 | self.init_result['签字页-保证人2签章'] = electronic_signature | 1089 | self.init_result['签字页-保证人2签章'] = electronic_signature |
| 970 | # 保证人2姓名、签章 | 1090 | # if判断条件对应3_3版本 |
| 1091 | if name["words"] == "": | ||
| 1092 | name = self.get_key_value(key='保证人1姓名:') | ||
| 1093 | electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:') | ||
| 1094 | self.init_result['签字页-保证人1姓名'] = name | ||
| 1095 | self.init_result['签字页-保证人1签章'] = electronic_signature | ||
| 1096 | # 保证人3姓名、签章 | ||
| 971 | name = self.get_key_value(key='保证人3姓名:') | 1097 | name = self.get_key_value(key='保证人3姓名:') |
| 972 | electronic_signature = self.get_electronic_signature(top='保证人3姓名:', bottom='日期:') | 1098 | electronic_signature = self.get_electronic_signature(top='保证人3姓名:', bottom='日期:') |
| 973 | self.init_result['签字页-保证人3姓名'] = name | 1099 | self.init_result['签字页-保证人3姓名'] = name |
| 974 | self.init_result['签字页-保证人3签章'] = electronic_signature | 1100 | self.init_result['签字页-保证人3签章'] = electronic_signature |
| 975 | 1101 | # if判断条件对应3_3版本 | |
| 1102 | if name["words"] == None: | ||
| 1103 | name = self.get_key_value(key='保证人2姓名:') | ||
| 1104 | electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='日期:') | ||
| 1105 | self.init_result['签字页-保证人2姓名'] = name | ||
| 1106 | self.init_result['签字页-保证人2签章'] = electronic_signature | ||
| 976 | return self.init_result | 1107 | return self.init_result |
| 977 | |||
| 978 | # results['is_shhz_contract'] = True | 1108 | # results['is_shhz_contract'] = True |
| 979 | # results['pdf_info'] = self.init_result | 1109 | # results['pdf_info'] = self.init_result |
| 980 | 1110 | ... | ... |
| ... | @@ -18,7 +18,6 @@ def predict(pdf_info, file_cls): | ... | @@ -18,7 +18,6 @@ def predict(pdf_info, file_cls): |
| 18 | Returns: | 18 | Returns: |
| 19 | TYPE: Description | 19 | TYPE: Description |
| 20 | """ | 20 | """ |
| 21 | |||
| 22 | # 0: 售后回租合同 | 21 | # 0: 售后回租合同 |
| 23 | pdf_info_0 = [] | 22 | pdf_info_0 = [] |
| 24 | for pno in pdf_info: | 23 | for pno in pdf_info: |
| ... | @@ -30,7 +29,6 @@ def predict(pdf_info, file_cls): | ... | @@ -30,7 +29,6 @@ def predict(pdf_info, file_cls): |
| 30 | bbox, text = span['bbox'], span['text'] | 29 | bbox, text = span['bbox'], span['text'] |
| 31 | if '售后回租合同_' in text: | 30 | if '售后回租合同_' in text: |
| 32 | pdf_info_0.append(pdf_info[pno]) | 31 | pdf_info_0.append(pdf_info[pno]) |
| 33 | |||
| 34 | # 1: 车辆处置协议 | 32 | # 1: 车辆处置协议 |
| 35 | pdf_info_1 = [] | 33 | pdf_info_1 = [] |
| 36 | for pno in pdf_info: | 34 | for pno in pdf_info: |
| ... | @@ -42,7 +40,6 @@ def predict(pdf_info, file_cls): | ... | @@ -42,7 +40,6 @@ def predict(pdf_info, file_cls): |
| 42 | bbox, text = span['bbox'], span['text'] | 40 | bbox, text = span['bbox'], span['text'] |
| 43 | if '售后回租合同附件一' in text: | 41 | if '售后回租合同附件一' in text: |
| 44 | pdf_info_1.append(pdf_info[pno]) | 42 | pdf_info_1.append(pdf_info[pno]) |
| 45 | |||
| 46 | # 2: 车辆租赁抵押合同 | 43 | # 2: 车辆租赁抵押合同 |
| 47 | pdf_info_2 = [] | 44 | pdf_info_2 = [] |
| 48 | for pno in pdf_info: | 45 | for pno in pdf_info: |
| ... | @@ -54,7 +51,6 @@ def predict(pdf_info, file_cls): | ... | @@ -54,7 +51,6 @@ def predict(pdf_info, file_cls): |
| 54 | bbox, text = span['bbox'], span['text'] | 51 | bbox, text = span['bbox'], span['text'] |
| 55 | if '车辆租赁抵押合同_' in text: | 52 | if '车辆租赁抵押合同_' in text: |
| 56 | pdf_info_2.append(pdf_info[pno]) | 53 | pdf_info_2.append(pdf_info[pno]) |
| 57 | |||
| 58 | is_clczxy = False | 54 | is_clczxy = False |
| 59 | # 如果 pdf_info_1 == 4 页,则说明此时输入包含了车辆处置协议 | 55 | # 如果 pdf_info_1 == 4 页,则说明此时输入包含了车辆处置协议 |
| 60 | if len(pdf_info_1) == 4 and file_cls == 1 and len(pdf_info_0) != 0: | 56 | if len(pdf_info_1) == 4 and file_cls == 1 and len(pdf_info_0) != 0: |
| ... | @@ -62,7 +58,6 @@ def predict(pdf_info, file_cls): | ... | @@ -62,7 +58,6 @@ def predict(pdf_info, file_cls): |
| 62 | pdf_info = dict() | 58 | pdf_info = dict() |
| 63 | for pno, page_info in enumerate(pdf_info_1): | 59 | for pno, page_info in enumerate(pdf_info_1): |
| 64 | pdf_info[str(pno)] = page_info | 60 | pdf_info[str(pno)] = page_info |
| 65 | |||
| 66 | f = Finder(pdf_info) | 61 | f = Finder(pdf_info) |
| 67 | if file_cls == 0: | 62 | if file_cls == 0: |
| 68 | results = f.get_info() | 63 | results = f.get_info() |
| ... | @@ -72,13 +67,11 @@ def predict(pdf_info, file_cls): | ... | @@ -72,13 +67,11 @@ def predict(pdf_info, file_cls): |
| 72 | if file_cls == 2: | 67 | if file_cls == 2: |
| 73 | # 提取信息 ———— 车辆租赁抵押合同 | 68 | # 提取信息 ———— 车辆租赁抵押合同 |
| 74 | results = f.get_info_2() | 69 | results = f.get_info_2() |
| 75 | 70 | if is_clczxy is True: | |
| 76 | if is_clczxy == True: | ||
| 77 | for key in results: | 71 | for key in results: |
| 78 | if results[key]['page'] is not None: | 72 | if results[key]['page'] is not None: |
| 79 | results[key]['page'] = str(int(results[key]['page'])+6) | 73 | results[key]['page'] = str(int(results[key]['page']) + 6) |
| 80 | |||
| 81 | for key in results: | 74 | for key in results: |
| 82 | if results[key]['page'] is not None: | 75 | if results[key]['page'] is not None: |
| 83 | results[key]['page'] = 'page_' + str(int(results[key]['page'])+1) | 76 | results[key]['page'] = 'page_' + str(int(results[key]['page']) + 1) |
| 84 | return results | 77 | return results | ... | ... |
-
Please register or sign in to post a comment