fdb7ca98 by 周伟奇

fix format

1 parent 87525e99
...@@ -18,6 +18,7 @@ class Finder: ...@@ -18,6 +18,7 @@ class Finder:
18 self.item = {"words": None, 18 self.item = {"words": None,
19 "position": None, 19 "position": None,
20 } 20 }
21
21 def gen_init_result(self, is_asp): 22 def gen_init_result(self, is_asp):
22 # 格式化算法输出 23 # 格式化算法输出
23 self.init_result = {"page_1": {"合同编号": self.item, 24 self.init_result = {"page_1": {"合同编号": self.item,
...@@ -108,10 +109,12 @@ class Finder: ...@@ -108,10 +109,12 @@ class Finder:
108 "日期": self.item, 109 "日期": self.item,
109 }, 110 },
110 } 111 }
112
111 def poly_to_rectangle(self, poly): 113 def poly_to_rectangle(self, poly):
112 xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax = poly 114 xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax = poly
113 bbox = [xmin, ymin, xmax, ymax] 115 bbox = [xmin, ymin, xmax, ymax]
114 return bbox 116 return bbox
117
115 def get_contract_no(self, page_num): 118 def get_contract_no(self, page_num):
116 """传入页码,查看该页码右上角的编号 119 """传入页码,查看该页码右上角的编号
117 120
...@@ -133,6 +136,7 @@ class Finder: ...@@ -133,6 +136,7 @@ class Finder:
133 contract_no['words'] = words 136 contract_no['words'] = words
134 contract_no['position'] = location 137 contract_no['position'] = location
135 return contract_no 138 return contract_no
139
136 def get_vehicle_price(self, page_num='0'): 140 def get_vehicle_price(self, page_num='0'):
137 vehicle_price = self.item.copy() 141 vehicle_price = self.item.copy()
138 # vehicle_price['words'] = '' 142 # vehicle_price['words'] = ''
...@@ -145,6 +149,7 @@ class Finder: ...@@ -145,6 +149,7 @@ class Finder:
145 vehicle_price['words'] = words 149 vehicle_price['words'] = words
146 vehicle_price['position'] = location 150 vehicle_price['position'] = location
147 return vehicle_price 151 return vehicle_price
152
148 def get_vin(self, page_num='0'): 153 def get_vin(self, page_num='0'):
149 vin = self.item.copy() 154 vin = self.item.copy()
150 # vin['words'] = '' 155 # vin['words'] = ''
...@@ -157,6 +162,7 @@ class Finder: ...@@ -157,6 +162,7 @@ class Finder:
157 vin['words'] = words 162 vin['words'] = words
158 vin['position'] = location 163 vin['position'] = location
159 return vin 164 return vin
165
160 def get_loan_principal(self, page_num='0'): 166 def get_loan_principal(self, page_num='0'):
161 chinese_keywords = ['壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖', '拾', 167 chinese_keywords = ['壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖', '拾',
162 '佰', '仟', '万', '亿', '元', '角', '分', '零', '整'] 168 '佰', '仟', '万', '亿', '元', '角', '分', '零', '整']
...@@ -197,6 +203,7 @@ class Finder: ...@@ -197,6 +203,7 @@ class Finder:
197 asp_2['position'] = bbox 203 asp_2['position'] = bbox
198 asp_2['words'] = words 204 asp_2['words'] = words
199 return upper, lower, asp_1, asp_2 205 return upper, lower, asp_1, asp_2
206
200 def get_loan_term(self, page_num='0'): 207 def get_loan_term(self, page_num='0'):
201 loan_term = self.item.copy() 208 loan_term = self.item.copy()
202 all_text = '' 209 all_text = ''
...@@ -220,6 +227,7 @@ class Finder: ...@@ -220,6 +227,7 @@ class Finder:
220 loan_term['position'] = bbox 227 loan_term['position'] = bbox
221 loan_term['words'] = words 228 loan_term['words'] = words
222 return loan_term 229 return loan_term
230
223 def mergelist(self, text_list): 231 def mergelist(self, text_list):
224 pattern = re.compile("[^\u4e00-\u9fa5]") # 匹配不是中文的其他字符 232 pattern = re.compile("[^\u4e00-\u9fa5]") # 匹配不是中文的其他字符
225 mergeindex = -1 233 mergeindex = -1
...@@ -230,8 +238,10 @@ class Finder: ...@@ -230,8 +238,10 @@ class Finder:
230 if mergeindex == -1: 238 if mergeindex == -1:
231 return text_list 239 return text_list
232 else: 240 else:
233 new_text_list = text_list[:mergeindex] + [text_list[mergeindex] + text_list[mergeindex+1]] + text_list[mergeindex+2:] 241 new_text_list = text_list[:mergeindex] + [
242 text_list[mergeindex] + text_list[mergeindex+1]] + text_list[mergeindex+2:]
234 return self.mergelist(new_text_list) 243 return self.mergelist(new_text_list)
244
235 def get_asp_details(self, page_num): 245 def get_asp_details(self, page_num):
236 asp_details_table_term = self.item.copy() 246 asp_details_table_term = self.item.copy()
237 asp_details_table = [] 247 asp_details_table = []
...@@ -262,6 +272,7 @@ class Finder: ...@@ -262,6 +272,7 @@ class Finder:
262 if len(asp_details_table) > 0: 272 if len(asp_details_table) > 0:
263 asp_details_table_term['words'] = asp_details_table 273 asp_details_table_term['words'] = asp_details_table
264 return asp_details_table_term 274 return asp_details_table_term
275
265 def get_signature(self): 276 def get_signature(self):
266 signature = self.item.copy() 277 signature = self.item.copy()
267 for block in self.pdf_info['0']['blocks']: 278 for block in self.pdf_info['0']['blocks']:
...@@ -275,6 +286,7 @@ class Finder: ...@@ -275,6 +286,7 @@ class Finder:
275 signature['words'] = words 286 signature['words'] = words
276 signature['position'] = bbox 287 signature['position'] = bbox
277 return signature 288 return signature
289
278 def get_somebody(self, top, bottom): 290 def get_somebody(self, top, bottom):
279 # 指定上下边界后,返回上下边界内的客户信息 291 # 指定上下边界后,返回上下边界内的客户信息
280 _name = self.item.copy() 292 _name = self.item.copy()
...@@ -309,6 +321,7 @@ class Finder: ...@@ -309,6 +321,7 @@ class Finder:
309 _id['position'] = bbox 321 _id['position'] = bbox
310 _id['words'] = words 322 _id['words'] = words
311 return _name, _id 323 return _name, _id
324
312 def get_seller(self): 325 def get_seller(self):
313 seller = self.item.copy() 326 seller = self.item.copy()
314 # 先找到 key 327 # 先找到 key
...@@ -330,11 +343,12 @@ class Finder: ...@@ -330,11 +343,12 @@ class Finder:
330 for line in block['lines']: 343 for line in block['lines']:
331 for span in line['spans']: 344 for span in line['spans']:
332 bbox, text = span['bbox'], span['text'] 345 bbox, text = span['bbox'], span['text']
333 if anchor_bbox[2]<np.mean(bbox[::2])<half_width and \ 346 if anchor_bbox[2] < np.mean(bbox[::2]) < half_width and \
334 anchor_bbox[1]<np.mean(bbox[1::2])<anchor_bbox[3]: 347 anchor_bbox[1] < np.mean(bbox[1::2]) < anchor_bbox[3]:
335 seller['position'] = bbox 348 seller['position'] = bbox
336 seller['words'] = text 349 seller['words'] = text
337 return seller 350 return seller
351
338 def get_payback_account(self): 352 def get_payback_account(self):
339 account = self.item.copy() 353 account = self.item.copy()
340 account_name = self.item.copy() 354 account_name = self.item.copy()
...@@ -387,6 +401,7 @@ class Finder: ...@@ -387,6 +401,7 @@ class Finder:
387 account_bank['position'] = bbox 401 account_bank['position'] = bbox
388 account_bank['words'] = words 402 account_bank['words'] = words
389 return account, account_name, account_bank 403 return account, account_name, account_bank
404
390 def get_repayment_schedule(self): 405 def get_repayment_schedule(self):
391 repayment_schedule = self.item.copy() 406 repayment_schedule = self.item.copy()
392 # 只看第二页 407 # 只看第二页
...@@ -416,6 +431,7 @@ class Finder: ...@@ -416,6 +431,7 @@ class Finder:
416 if len(repayment_schedule_table) > 0: 431 if len(repayment_schedule_table) > 0:
417 repayment_schedule['words'] = repayment_schedule_table 432 repayment_schedule['words'] = repayment_schedule_table
418 return repayment_schedule 433 return repayment_schedule
434
419 def get_signature_role_1(self): 435 def get_signature_role_1(self):
420 signature_role_1 = self.init_item.copy() 436 signature_role_1 = self.init_item.copy()
421 # 先定位签字区域 437 # 先定位签字区域
...@@ -445,11 +461,13 @@ class Finder: ...@@ -445,11 +461,13 @@ class Finder:
445 else: 461 else:
446 words = '无' 462 words = '无'
447 boxes = np.array(boxes).reshape((-1, 2)) 463 boxes = np.array(boxes).reshape((-1, 2))
448 position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] 464 position = [min(boxes[:, 0]), min(boxes[:, 1]),
465 max(boxes[:, 0]), max(boxes[:, 1])]
449 signature_role_1['page_num'] = page_num 466 signature_role_1['page_num'] = page_num
450 signature_role_1['position'] = position 467 signature_role_1['position'] = position
451 signature_role_1['words'] = words 468 signature_role_1['words'] = words
452 return signature_role_1 469 return signature_role_1
470
453 def get_signature_role_2(self): 471 def get_signature_role_2(self):
454 signature_role_2 = self.init_item.copy() 472 signature_role_2 = self.init_item.copy()
455 # 先定位签字区域 473 # 先定位签字区域
...@@ -479,11 +497,13 @@ class Finder: ...@@ -479,11 +497,13 @@ class Finder:
479 else: 497 else:
480 words = '无' 498 words = '无'
481 boxes = np.array(boxes).reshape((-1, 2)) 499 boxes = np.array(boxes).reshape((-1, 2))
482 position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] 500 position = [min(boxes[:, 0]), min(boxes[:, 1]),
501 max(boxes[:, 0]), max(boxes[:, 1])]
483 signature_role_2['page_num'] = page_num 502 signature_role_2['page_num'] = page_num
484 signature_role_2['position'] = position 503 signature_role_2['position'] = position
485 signature_role_2['words'] = words 504 signature_role_2['words'] = words
486 return signature_role_2 505 return signature_role_2
506
487 def get_signature_role_3(self): 507 def get_signature_role_3(self):
488 signature_role_3 = self.init_item.copy() 508 signature_role_3 = self.init_item.copy()
489 # 先定位签字区域 509 # 先定位签字区域
...@@ -513,11 +533,13 @@ class Finder: ...@@ -513,11 +533,13 @@ class Finder:
513 else: 533 else:
514 words = '无' 534 words = '无'
515 boxes = np.array(boxes).reshape((-1, 2)) 535 boxes = np.array(boxes).reshape((-1, 2))
516 position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] 536 position = [min(boxes[:, 0]), min(boxes[:, 1]),
537 max(boxes[:, 0]), max(boxes[:, 1])]
517 signature_role_3['page_num'] = page_num 538 signature_role_3['page_num'] = page_num
518 signature_role_3['position'] = position 539 signature_role_3['position'] = position
519 signature_role_3['words'] = words 540 signature_role_3['words'] = words
520 return signature_role_3 541 return signature_role_3
542
521 def get_signature_role_4(self): 543 def get_signature_role_4(self):
522 signature_role_4 = self.init_item.copy() 544 signature_role_4 = self.init_item.copy()
523 # 先定位签字区域 545 # 先定位签字区域
...@@ -547,11 +569,13 @@ class Finder: ...@@ -547,11 +569,13 @@ class Finder:
547 else: 569 else:
548 words = '无' 570 words = '无'
549 boxes = np.array(boxes).reshape((-1, 2)) 571 boxes = np.array(boxes).reshape((-1, 2))
550 position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] 572 position = [min(boxes[:, 0]), min(boxes[:, 1]),
573 max(boxes[:, 0]), max(boxes[:, 1])]
551 signature_role_4['page_num'] = page_num 574 signature_role_4['page_num'] = page_num
552 signature_role_4['position'] = position 575 signature_role_4['position'] = position
553 signature_role_4['words'] = words 576 signature_role_4['words'] = words
554 return signature_role_4 577 return signature_role_4
578
555 def get_signature_role_5(self): 579 def get_signature_role_5(self):
556 signature_role_5 = self.init_item.copy() 580 signature_role_5 = self.init_item.copy()
557 # 先定位签字区域 581 # 先定位签字区域
...@@ -582,11 +606,13 @@ class Finder: ...@@ -582,11 +606,13 @@ class Finder:
582 else: 606 else:
583 words = '无' 607 words = '无'
584 boxes = np.array(boxes).reshape((-1, 2)) 608 boxes = np.array(boxes).reshape((-1, 2))
585 position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] 609 position = [min(boxes[:, 0]), min(boxes[:, 1]),
610 max(boxes[:, 0]), max(boxes[:, 1])]
586 signature_role_5['page_num'] = page_num 611 signature_role_5['page_num'] = page_num
587 signature_role_5['position'] = position 612 signature_role_5['position'] = position
588 signature_role_5['words'] = words 613 signature_role_5['words'] = words
589 return signature_role_5 614 return signature_role_5
615
590 def get_last_page_signature(self, page_num, top, bottom): 616 def get_last_page_signature(self, page_num, top, bottom):
591 signature_name = self.item.copy() 617 signature_name = self.item.copy()
592 signature_date = self.item.copy() 618 signature_date = self.item.copy()
...@@ -610,7 +636,7 @@ class Finder: ...@@ -610,7 +636,7 @@ class Finder:
610 for line in block['lines']: 636 for line in block['lines']:
611 for span in line['spans']: 637 for span in line['spans']:
612 bbox, text = span['bbox'], span['text'] 638 bbox, text = span['bbox'], span['text']
613 if '签署日期' in text and int(anchor_top)<np.mean(bbox[1::2])<int(anchor_bottom): 639 if '签署日期' in text and int(anchor_top) < np.mean(bbox[1::2]) < int(anchor_bottom):
614 name = text.split(' ')[0] 640 name = text.split(' ')[0]
615 date = text.split(':')[-1] 641 date = text.split(':')[-1]
616 signature_name['words'] = name 642 signature_name['words'] = name
...@@ -618,6 +644,7 @@ class Finder: ...@@ -618,6 +644,7 @@ class Finder:
618 signature_date['words'] = date 644 signature_date['words'] = date
619 signature_date['position'] = bbox 645 signature_date['position'] = bbox
620 return signature_name, signature_date 646 return signature_name, signature_date
647
621 def get_info(self): 648 def get_info(self):
622 """ 649 """
623 block['type'] == 0 : 表示该元素为图片 650 block['type'] == 0 : 表示该元素为图片
...@@ -672,22 +699,27 @@ class Finder: ...@@ -672,22 +699,27 @@ class Finder:
672 contract_no = self.get_contract_no(page_num='0') 699 contract_no = self.get_contract_no(page_num='0')
673 self.init_result['page_2']['合同编号'] = contract_no 700 self.init_result['page_2']['合同编号'] = contract_no
674 # 找借款人及抵押人(地址字段原本有空格) 701 # 找借款人及抵押人(地址字段原本有空格)
675 borrower_name, borrower_id = self.get_somebody(top='借款人及抵押人:', bottom='共同借款人:') 702 borrower_name, borrower_id = self.get_somebody(
703 top='借款人及抵押人:', bottom='共同借款人:')
676 # 这是为了同时兼容 8.1 版本 704 # 这是为了同时兼容 8.1 版本
677 if borrower_name['words'] == None: 705 if borrower_name['words'] == None:
678 borrower_name, borrower_id = self.get_somebody(top='借款人及抵押人:', bottom='共同借款人及共同抵押人:') 706 borrower_name, borrower_id = self.get_somebody(
707 top='借款人及抵押人:', bottom='共同借款人及共同抵押人:')
679 self.init_result['page_2']['借款人及抵押人']['name'] = borrower_name 708 self.init_result['page_2']['借款人及抵押人']['name'] = borrower_name
680 self.init_result['page_2']['借款人及抵押人']['id'] = borrower_id 709 self.init_result['page_2']['借款人及抵押人']['id'] = borrower_id
681 # 找共同借款人及共同抵押人 710 # 找共同借款人及共同抵押人
682 co_borrower_name, co_borrower_id = self.get_somebody(top='共同借款人:', bottom='保证人1:') 711 co_borrower_name, co_borrower_id = self.get_somebody(
712 top='共同借款人:', bottom='保证人1:')
683 self.init_result['page_2']['共同借款人及共同抵押人']['name'] = co_borrower_name 713 self.init_result['page_2']['共同借款人及共同抵押人']['name'] = co_borrower_name
684 self.init_result['page_2']['共同借款人及共同抵押人']['id'] = co_borrower_id 714 self.init_result['page_2']['共同借款人及共同抵押人']['id'] = co_borrower_id
685 # 保证人1 715 # 保证人1
686 first_guarantor_name, first_guarantor_id = self.get_somebody(top='保证人1:', bottom='保证人2:') 716 first_guarantor_name, first_guarantor_id = self.get_somebody(
717 top='保证人1:', bottom='保证人2:')
687 self.init_result['page_2']['保证人1']['name'] = first_guarantor_name 718 self.init_result['page_2']['保证人1']['name'] = first_guarantor_name
688 self.init_result['page_2']['保证人1']['id'] = first_guarantor_id 719 self.init_result['page_2']['保证人1']['id'] = first_guarantor_id
689 # 保证人2 720 # 保证人2
690 second_guarantor_name, second_guarantor_id = self.get_somebody(top='保证人2:', bottom='第一章') 721 second_guarantor_name, second_guarantor_id = self.get_somebody(
722 top='保证人2:', bottom='第一章')
691 self.init_result['page_2']['保证人2']['name'] = second_guarantor_name 723 self.init_result['page_2']['保证人2']['name'] = second_guarantor_name
692 self.init_result['page_2']['保证人2']['id'] = second_guarantor_id 724 self.init_result['page_2']['保证人2']['id'] = second_guarantor_id
693 # 所购车辆价格 725 # 所购车辆价格
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!