e-contract fix
Showing
2 changed files
with
48 additions
and
30 deletions
| ... | @@ -295,9 +295,8 @@ class Finder: | ... | @@ -295,9 +295,8 @@ class Finder: |
| 295 | bbox_ytzje = bbox | 295 | bbox_ytzje = bbox |
| 296 | if text == '贷款本金3': | 296 | if text == '贷款本金3': |
| 297 | bbox_dkbj = bbox | 297 | bbox_dkbj = bbox |
| 298 | if text == '附加产品融资贷款本': | 298 | if text in ['附加产品融资贷款本', '附加产品融资贷款本金', '附加产品融资贷']: |
| 299 | bbox_total = bbox | 299 | bbox_total = bbox |
| 300 | # print(bbox_xm, bbox_ytzje, bbox_dkbj, bbox_total) | ||
| 301 | if bbox_xm: | 300 | if bbox_xm: |
| 302 | for i in range(10): | 301 | for i in range(10): |
| 303 | rh = abs(bbox_xm[1] - bbox_xm[-1]) | 302 | rh = abs(bbox_xm[1] - bbox_xm[-1]) |
| ... | @@ -337,7 +336,7 @@ class Finder: | ... | @@ -337,7 +336,7 @@ class Finder: |
| 337 | _iou, _key = self.get_top_iou(poly=anchor, ocr_result=self.ocr_results[page_num]) | 336 | _iou, _key = self.get_top_iou(poly=anchor, ocr_result=self.ocr_results[page_num]) |
| 338 | bbox, total_text = self.ocr_results[page_num][_key] | 337 | bbox, total_text = self.ocr_results[page_num][_key] |
| 339 | asp_details_table.append(['附加产品融资贷款本金总金额:', '', total_text]) | 338 | asp_details_table.append(['附加产品融资贷款本金总金额:', '', total_text]) |
| 340 | asp_details_table_term['words'] = asp_details_table | 339 | asp_details_table_term['words'] = asp_details_table |
| 341 | return asp_details_table_term | 340 | return asp_details_table_term |
| 342 | 341 | ||
| 343 | def get_signature(self): | 342 | def get_signature(self): |
| ... | @@ -904,4 +903,4 @@ class Finder: | ... | @@ -904,4 +903,4 @@ class Finder: |
| 904 | new_results = {"is_asp": self.is_asp, | 903 | new_results = {"is_asp": self.is_asp, |
| 905 | "page_info": self.init_result | 904 | "page_info": self.init_result |
| 906 | } | 905 | } |
| 907 | return new_results | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
| 906 | return new_results | ... | ... |
| ... | @@ -618,7 +618,7 @@ class Finder: | ... | @@ -618,7 +618,7 @@ class Finder: |
| 618 | page_num = i | 618 | page_num = i |
| 619 | texts.append(text) | 619 | texts.append(text) |
| 620 | boxes.append(bbox) | 620 | boxes.append(bbox) |
| 621 | print(texts) | 621 | # print(texts) |
| 622 | if len(texts) > 4: | 622 | if len(texts) > 4: |
| 623 | words = '有' | 623 | words = '有' |
| 624 | else: | 624 | else: |
| ... | @@ -785,41 +785,60 @@ class Finder: | ... | @@ -785,41 +785,60 @@ class Finder: |
| 785 | 785 | ||
| 786 | def get_table_add_product(self): | 786 | def get_table_add_product(self): |
| 787 | table_add_product = self.item.copy() | 787 | table_add_product = self.item.copy() |
| 788 | items = [] | 788 | add_product_page_num = None |
| 789 | start = False | ||
| 790 | page = None | ||
| 791 | greater_equal_v35 = False | ||
| 792 | for pno in self.pdf_info: | 789 | for pno in self.pdf_info: |
| 793 | condition = False | ||
| 794 | for block in self.pdf_info[f'{pno}']['blocks']: | 790 | for block in self.pdf_info[f'{pno}']['blocks']: |
| 795 | if block['type'] != 0: | 791 | if block['type'] != 0: |
| 796 | continue | 792 | continue |
| 797 | for line in block['lines']: | 793 | for line in block['lines']: |
| 798 | for span in line['spans']: | 794 | for span in line['spans']: |
| 799 | bbox, text = span['bbox'], span['text'] | 795 | bbox, text = span['bbox'], span['text'] |
| 800 | if text == '租赁利率': | 796 | if '车辆附加产品(明细见下表)' in text: |
| 801 | greater_equal_v35 = True | 797 | add_product_page_num = pno |
| 802 | if '总计' in text: | 798 | ocr_results = [] |
| 803 | start = True | 799 | for block in self.pdf_info[f'{add_product_page_num}']['blocks']: |
| 804 | if '注:出租人向承租人购买租赁车辆的对价' in text: | 800 | if block['type'] != 0: |
| 805 | page = pno | 801 | continue |
| 806 | start = False | 802 | for line in block['lines']: |
| 807 | if start == True: | 803 | for span in line['spans']: |
| 808 | items.append(text) | 804 | bbox, text = span['bbox'], span['text'] |
| 805 | xmin, ymin, xmax, ymax = bbox | ||
| 806 | bbox = [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax] | ||
| 807 | ocr_results.append([bbox, text]) | ||
| 809 | lines = [['项目', '购买价格', '实际融资金额']] | 808 | lines = [['项目', '购买价格', '实际融资金额']] |
| 810 | if greater_equal_v35: | 809 | key_xm = None |
| 811 | for i in range(len(items) // 4): | 810 | key_gmjg = None |
| 812 | line = [items[2 + i * 4 + 0], items[2 + i * 4 + 1], items[2 + i * 4 + 2]] | 811 | key_sjrzje = None |
| 813 | lines.append(line) | 812 | key_total = None |
| 814 | else: | 813 | for index, span in enumerate(ocr_results): |
| 815 | for i in range(len(items) // 3): | 814 | if span[1] == '项目': |
| 816 | line = [items[2 + i * 3 + 0], items[2 + i * 3 + 1], items[2 + i * 3 + 2]] | 815 | key_xm = index |
| 817 | lines.append(line) | 816 | if span[1] == '购买价格': |
| 818 | if len(items) > 0: | 817 | key_gmjg = index |
| 819 | lines.append([items[0], '', items[1]]) | 818 | if span[1] == '实际融资金额': |
| 819 | key_sjrzje = index | ||
| 820 | if span[1] == '总计': | ||
| 821 | key_total = index | ||
| 822 | bbox, text = ocr_results[key_xm] | ||
| 823 | rh = abs(bbox[1] - bbox[-1]) | ||
| 824 | anchor = np.array(bbox).reshape((-1, 2)) | ||
| 825 | anchor[:, 0] += 2 * rh | ||
| 826 | anchor[:, 1] += rh | ||
| 827 | for i in range(5): | ||
| 828 | for span in ocr_results: | ||
| 829 | iou = caculate_iou(anchor, span[0]) | ||
| 830 | if iou > 0 and span[1].strip() != '所购': | ||
| 831 | x = get_table_info(span[0], ocr_results[key_gmjg][0], ocr_results) | ||
| 832 | y = get_table_info(span[0], ocr_results[key_sjrzje][0], ocr_results) | ||
| 833 | line = [span[1].replace('\u3000', ' '), x, y] | ||
| 834 | lines.append(line) | ||
| 835 | anchor = np.array(span[0]).reshape((-1, 2)) | ||
| 836 | anchor[:, 1] += rh | ||
| 837 | total = get_table_info(ocr_results[key_total][0], ocr_results[key_sjrzje][0], ocr_results) | ||
| 838 | lines.append(['总计', '', total]) | ||
| 820 | 839 | ||
| 821 | table_add_product['words'] = lines | 840 | table_add_product['words'] = lines |
| 822 | table_add_product['page'] = page | 841 | table_add_product['page'] = add_product_page_num |
| 823 | table_add_product['position'] = None | 842 | table_add_product['position'] = None |
| 824 | return table_add_product | 843 | return table_add_product |
| 825 | 844 | ... | ... |
-
Please register or sign in to post a comment