08b0fd9e by 周伟奇

e-contract fix

1 parent c878c21a
...@@ -295,9 +295,8 @@ class Finder: ...@@ -295,9 +295,8 @@ class Finder:
295 bbox_ytzje = bbox 295 bbox_ytzje = bbox
296 if text == '贷款本金3': 296 if text == '贷款本金3':
297 bbox_dkbj = bbox 297 bbox_dkbj = bbox
298 if text == '附加产品融资贷款本': 298 if text in ['附加产品融资贷款本', '附加产品融资贷款本金', '附加产品融资贷']:
299 bbox_total = bbox 299 bbox_total = bbox
300 # print(bbox_xm, bbox_ytzje, bbox_dkbj, bbox_total)
301 if bbox_xm: 300 if bbox_xm:
302 for i in range(10): 301 for i in range(10):
303 rh = abs(bbox_xm[1] - bbox_xm[-1]) 302 rh = abs(bbox_xm[1] - bbox_xm[-1])
...@@ -337,7 +336,7 @@ class Finder: ...@@ -337,7 +336,7 @@ class Finder:
337 _iou, _key = self.get_top_iou(poly=anchor, ocr_result=self.ocr_results[page_num]) 336 _iou, _key = self.get_top_iou(poly=anchor, ocr_result=self.ocr_results[page_num])
338 bbox, total_text = self.ocr_results[page_num][_key] 337 bbox, total_text = self.ocr_results[page_num][_key]
339 asp_details_table.append(['附加产品融资贷款本金总金额:', '', total_text]) 338 asp_details_table.append(['附加产品融资贷款本金总金额:', '', total_text])
340 asp_details_table_term['words'] = asp_details_table 339 asp_details_table_term['words'] = asp_details_table
341 return asp_details_table_term 340 return asp_details_table_term
342 341
343 def get_signature(self): 342 def get_signature(self):
...@@ -904,4 +903,4 @@ class Finder: ...@@ -904,4 +903,4 @@ class Finder:
904 new_results = {"is_asp": self.is_asp, 903 new_results = {"is_asp": self.is_asp,
905 "page_info": self.init_result 904 "page_info": self.init_result
906 } 905 }
907 return new_results
...\ No newline at end of file ...\ No newline at end of file
906 return new_results
......
...@@ -618,7 +618,7 @@ class Finder: ...@@ -618,7 +618,7 @@ class Finder:
618 page_num = i 618 page_num = i
619 texts.append(text) 619 texts.append(text)
620 boxes.append(bbox) 620 boxes.append(bbox)
621 print(texts) 621 # print(texts)
622 if len(texts) > 4: 622 if len(texts) > 4:
623 words = '有' 623 words = '有'
624 else: 624 else:
...@@ -785,41 +785,60 @@ class Finder: ...@@ -785,41 +785,60 @@ class Finder:
785 785
786 def get_table_add_product(self): 786 def get_table_add_product(self):
787 table_add_product = self.item.copy() 787 table_add_product = self.item.copy()
788 items = [] 788 add_product_page_num = None
789 start = False
790 page = None
791 greater_equal_v35 = False
792 for pno in self.pdf_info: 789 for pno in self.pdf_info:
793 condition = False
794 for block in self.pdf_info[f'{pno}']['blocks']: 790 for block in self.pdf_info[f'{pno}']['blocks']:
795 if block['type'] != 0: 791 if block['type'] != 0:
796 continue 792 continue
797 for line in block['lines']: 793 for line in block['lines']:
798 for span in line['spans']: 794 for span in line['spans']:
799 bbox, text = span['bbox'], span['text'] 795 bbox, text = span['bbox'], span['text']
800 if text == '租赁利率': 796 if '车辆附加产品(明细见下表)' in text:
801 greater_equal_v35 = True 797 add_product_page_num = pno
802 if '总计' in text: 798 ocr_results = []
803 start = True 799 for block in self.pdf_info[f'{add_product_page_num}']['blocks']:
804 if '注:出租人向承租人购买租赁车辆的对价' in text: 800 if block['type'] != 0:
805 page = pno 801 continue
806 start = False 802 for line in block['lines']:
807 if start == True: 803 for span in line['spans']:
808 items.append(text) 804 bbox, text = span['bbox'], span['text']
805 xmin, ymin, xmax, ymax = bbox
806 bbox = [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax]
807 ocr_results.append([bbox, text])
809 lines = [['项目', '购买价格', '实际融资金额']] 808 lines = [['项目', '购买价格', '实际融资金额']]
810 if greater_equal_v35: 809 key_xm = None
811 for i in range(len(items) // 4): 810 key_gmjg = None
812 line = [items[2 + i * 4 + 0], items[2 + i * 4 + 1], items[2 + i * 4 + 2]] 811 key_sjrzje = None
813 lines.append(line) 812 key_total = None
814 else: 813 for index, span in enumerate(ocr_results):
815 for i in range(len(items) // 3): 814 if span[1] == '项目':
816 line = [items[2 + i * 3 + 0], items[2 + i * 3 + 1], items[2 + i * 3 + 2]] 815 key_xm = index
817 lines.append(line) 816 if span[1] == '购买价格':
818 if len(items) > 0: 817 key_gmjg = index
819 lines.append([items[0], '', items[1]]) 818 if span[1] == '实际融资金额':
819 key_sjrzje = index
820 if span[1] == '总计':
821 key_total = index
822 bbox, text = ocr_results[key_xm]
823 rh = abs(bbox[1] - bbox[-1])
824 anchor = np.array(bbox).reshape((-1, 2))
825 anchor[:, 0] += 2 * rh
826 anchor[:, 1] += rh
827 for i in range(5):
828 for span in ocr_results:
829 iou = caculate_iou(anchor, span[0])
830 if iou > 0 and span[1].strip() != '所购':
831 x = get_table_info(span[0], ocr_results[key_gmjg][0], ocr_results)
832 y = get_table_info(span[0], ocr_results[key_sjrzje][0], ocr_results)
833 line = [span[1].replace('\u3000', ' '), x, y]
834 lines.append(line)
835 anchor = np.array(span[0]).reshape((-1, 2))
836 anchor[:, 1] += rh
837 total = get_table_info(ocr_results[key_total][0], ocr_results[key_sjrzje][0], ocr_results)
838 lines.append(['总计', '', total])
820 839
821 table_add_product['words'] = lines 840 table_add_product['words'] = lines
822 table_add_product['page'] = page 841 table_add_product['page'] = add_product_page_num
823 table_add_product['position'] = None 842 table_add_product['position'] = None
824 return table_add_product 843 return table_add_product
825 844
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!