e-contract fix
Showing
2 changed files
with
48 additions
and
30 deletions
... | @@ -295,9 +295,8 @@ class Finder: | ... | @@ -295,9 +295,8 @@ class Finder: |
295 | bbox_ytzje = bbox | 295 | bbox_ytzje = bbox |
296 | if text == '贷款本金3': | 296 | if text == '贷款本金3': |
297 | bbox_dkbj = bbox | 297 | bbox_dkbj = bbox |
298 | if text == '附加产品融资贷款本': | 298 | if text in ['附加产品融资贷款本', '附加产品融资贷款本金', '附加产品融资贷']: |
299 | bbox_total = bbox | 299 | bbox_total = bbox |
300 | # print(bbox_xm, bbox_ytzje, bbox_dkbj, bbox_total) | ||
301 | if bbox_xm: | 300 | if bbox_xm: |
302 | for i in range(10): | 301 | for i in range(10): |
303 | rh = abs(bbox_xm[1] - bbox_xm[-1]) | 302 | rh = abs(bbox_xm[1] - bbox_xm[-1]) |
... | @@ -337,7 +336,7 @@ class Finder: | ... | @@ -337,7 +336,7 @@ class Finder: |
337 | _iou, _key = self.get_top_iou(poly=anchor, ocr_result=self.ocr_results[page_num]) | 336 | _iou, _key = self.get_top_iou(poly=anchor, ocr_result=self.ocr_results[page_num]) |
338 | bbox, total_text = self.ocr_results[page_num][_key] | 337 | bbox, total_text = self.ocr_results[page_num][_key] |
339 | asp_details_table.append(['附加产品融资贷款本金总金额:', '', total_text]) | 338 | asp_details_table.append(['附加产品融资贷款本金总金额:', '', total_text]) |
340 | asp_details_table_term['words'] = asp_details_table | 339 | asp_details_table_term['words'] = asp_details_table |
341 | return asp_details_table_term | 340 | return asp_details_table_term |
342 | 341 | ||
343 | def get_signature(self): | 342 | def get_signature(self): |
... | @@ -904,4 +903,4 @@ class Finder: | ... | @@ -904,4 +903,4 @@ class Finder: |
904 | new_results = {"is_asp": self.is_asp, | 903 | new_results = {"is_asp": self.is_asp, |
905 | "page_info": self.init_result | 904 | "page_info": self.init_result |
906 | } | 905 | } |
907 | return new_results | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
906 | return new_results | ... | ... |
... | @@ -618,7 +618,7 @@ class Finder: | ... | @@ -618,7 +618,7 @@ class Finder: |
618 | page_num = i | 618 | page_num = i |
619 | texts.append(text) | 619 | texts.append(text) |
620 | boxes.append(bbox) | 620 | boxes.append(bbox) |
621 | print(texts) | 621 | # print(texts) |
622 | if len(texts) > 4: | 622 | if len(texts) > 4: |
623 | words = '有' | 623 | words = '有' |
624 | else: | 624 | else: |
... | @@ -785,41 +785,60 @@ class Finder: | ... | @@ -785,41 +785,60 @@ class Finder: |
785 | 785 | ||
786 | def get_table_add_product(self): | 786 | def get_table_add_product(self): |
787 | table_add_product = self.item.copy() | 787 | table_add_product = self.item.copy() |
788 | items = [] | 788 | add_product_page_num = None |
789 | start = False | ||
790 | page = None | ||
791 | greater_equal_v35 = False | ||
792 | for pno in self.pdf_info: | 789 | for pno in self.pdf_info: |
793 | condition = False | ||
794 | for block in self.pdf_info[f'{pno}']['blocks']: | 790 | for block in self.pdf_info[f'{pno}']['blocks']: |
795 | if block['type'] != 0: | 791 | if block['type'] != 0: |
796 | continue | 792 | continue |
797 | for line in block['lines']: | 793 | for line in block['lines']: |
798 | for span in line['spans']: | 794 | for span in line['spans']: |
799 | bbox, text = span['bbox'], span['text'] | 795 | bbox, text = span['bbox'], span['text'] |
800 | if text == '租赁利率': | 796 | if '车辆附加产品(明细见下表)' in text: |
801 | greater_equal_v35 = True | 797 | add_product_page_num = pno |
802 | if '总计' in text: | 798 | ocr_results = [] |
803 | start = True | 799 | for block in self.pdf_info[f'{add_product_page_num}']['blocks']: |
804 | if '注:出租人向承租人购买租赁车辆的对价' in text: | 800 | if block['type'] != 0: |
805 | page = pno | 801 | continue |
806 | start = False | 802 | for line in block['lines']: |
807 | if start == True: | 803 | for span in line['spans']: |
808 | items.append(text) | 804 | bbox, text = span['bbox'], span['text'] |
805 | xmin, ymin, xmax, ymax = bbox | ||
806 | bbox = [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax] | ||
807 | ocr_results.append([bbox, text]) | ||
809 | lines = [['项目', '购买价格', '实际融资金额']] | 808 | lines = [['项目', '购买价格', '实际融资金额']] |
810 | if greater_equal_v35: | 809 | key_xm = None |
811 | for i in range(len(items) // 4): | 810 | key_gmjg = None |
812 | line = [items[2 + i * 4 + 0], items[2 + i * 4 + 1], items[2 + i * 4 + 2]] | 811 | key_sjrzje = None |
813 | lines.append(line) | 812 | key_total = None |
814 | else: | 813 | for index, span in enumerate(ocr_results): |
815 | for i in range(len(items) // 3): | 814 | if span[1] == '项目': |
816 | line = [items[2 + i * 3 + 0], items[2 + i * 3 + 1], items[2 + i * 3 + 2]] | 815 | key_xm = index |
817 | lines.append(line) | 816 | if span[1] == '购买价格': |
818 | if len(items) > 0: | 817 | key_gmjg = index |
819 | lines.append([items[0], '', items[1]]) | 818 | if span[1] == '实际融资金额': |
819 | key_sjrzje = index | ||
820 | if span[1] == '总计': | ||
821 | key_total = index | ||
822 | bbox, text = ocr_results[key_xm] | ||
823 | rh = abs(bbox[1] - bbox[-1]) | ||
824 | anchor = np.array(bbox).reshape((-1, 2)) | ||
825 | anchor[:, 0] += 2 * rh | ||
826 | anchor[:, 1] += rh | ||
827 | for i in range(5): | ||
828 | for span in ocr_results: | ||
829 | iou = caculate_iou(anchor, span[0]) | ||
830 | if iou > 0 and span[1].strip() != '所购': | ||
831 | x = get_table_info(span[0], ocr_results[key_gmjg][0], ocr_results) | ||
832 | y = get_table_info(span[0], ocr_results[key_sjrzje][0], ocr_results) | ||
833 | line = [span[1].replace('\u3000', ' '), x, y] | ||
834 | lines.append(line) | ||
835 | anchor = np.array(span[0]).reshape((-1, 2)) | ||
836 | anchor[:, 1] += rh | ||
837 | total = get_table_info(ocr_results[key_total][0], ocr_results[key_sjrzje][0], ocr_results) | ||
838 | lines.append(['总计', '', total]) | ||
820 | 839 | ||
821 | table_add_product['words'] = lines | 840 | table_add_product['words'] = lines |
822 | table_add_product['page'] = page | 841 | table_add_product['page'] = add_product_page_num |
823 | table_add_product['position'] = None | 842 | table_add_product['position'] = None |
824 | return table_add_product | 843 | return table_add_product |
825 | 844 | ... | ... |
-
Please register or sign in to post a comment