fix new hil contract
Showing
1 changed file
with
104 additions
and
29 deletions
| ... | @@ -753,71 +753,146 @@ class Finder: | ... | @@ -753,71 +753,146 @@ class Finder: |
| 753 | if re.match('保证人3', text) is not None: | 753 | if re.match('保证人3', text) is not None: |
| 754 | anchor = [bbox[0], bbox[1]] | 754 | anchor = [bbox[0], bbox[1]] |
| 755 | 755 | ||
| 756 | need_bbox_find_keys_bbox = [None, None, None] | ||
| 756 | if anchor is not None: | 757 | if anchor is not None: |
| 757 | for block in self.pdf_info[page_num]['blocks']: | 758 | for block in self.pdf_info[page_num]['blocks']: |
| 759 | if all(need_bbox_find_keys_bbox): | ||
| 760 | break | ||
| 758 | if block['type'] != 0: | 761 | if block['type'] != 0: |
| 759 | continue | 762 | continue |
| 760 | for line in block['lines']: | 763 | for line in block['lines']: |
| 764 | if all(need_bbox_find_keys_bbox): | ||
| 765 | break | ||
| 761 | for span in line['spans']: | 766 | for span in line['spans']: |
| 767 | if all(need_bbox_find_keys_bbox): | ||
| 768 | break | ||
| 769 | |||
| 762 | bbox, text = span['bbox'], span['text'] | 770 | bbox, text = span['bbox'], span['text'] |
| 763 | # 找到角色姓名 | 771 | # 找到角色姓名 |
| 764 | if re.match(role_key, text) is not None: | 772 | if re.match(role_key, text) is not None: |
| 765 | words = text.split(':')[-1] | 773 | words = text.split(':')[-1] |
| 766 | name['words'] = words | 774 | if len(words) == 0: |
| 767 | name['page'] = page_num | 775 | need_bbox_find_keys_bbox[0] = bbox |
| 768 | name['position'] = bbox | 776 | else: |
| 777 | name['words'] = words | ||
| 778 | name['page'] = page_num | ||
| 779 | name['position'] = bbox | ||
| 780 | continue | ||
| 769 | if role_key == '承租人:': | 781 | if role_key == '承租人:': |
| 770 | # 找到证件号码且确定位置 | 782 | # 找到证件号码且确定位置 |
| 771 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]: | 783 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]: |
| 772 | words = text.split(':')[-1] | 784 | words = text.split(':')[-1] |
| 773 | id_num['words'] = words | 785 | if len(words) == 0: |
| 774 | id_num['page'] = page_num | 786 | need_bbox_find_keys_bbox[1] = bbox |
| 775 | id_num['position'] = bbox | 787 | else: |
| 788 | id_num['words'] = words | ||
| 789 | id_num['page'] = page_num | ||
| 790 | id_num['position'] = bbox | ||
| 776 | # 找到法人代表且确定位置 | 791 | # 找到法人代表且确定位置 |
| 777 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]: | 792 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]: |
| 778 | words = text.split(':')[-1] | 793 | words = text.split(':')[-1] |
| 779 | representative['words'] = words | 794 | if len(words) == 0: |
| 780 | representative['page'] = page_num | 795 | need_bbox_find_keys_bbox[2] = bbox |
| 781 | representative['position'] = bbox | 796 | else: |
| 782 | if role_key == '保证人1:': | 797 | representative['words'] = words |
| 798 | representative['page'] = page_num | ||
| 799 | representative['position'] = bbox | ||
| 800 | elif role_key == '保证人1:': | ||
| 783 | # 找到证件号码且确定位置 | 801 | # 找到证件号码且确定位置 |
| 784 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]: | 802 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]: |
| 785 | words = text.split(':')[-1] | 803 | words = text.split(':')[-1] |
| 786 | id_num['words'] = words | 804 | if len(words) == 0: |
| 787 | id_num['page'] = page_num | 805 | need_bbox_find_keys_bbox[1] = bbox |
| 788 | id_num['position'] = bbox | 806 | else: |
| 807 | id_num['words'] = words | ||
| 808 | id_num['page'] = page_num | ||
| 809 | id_num['position'] = bbox | ||
| 789 | # 找到法人代表且确定位置 | 810 | # 找到法人代表且确定位置 |
| 790 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]: | 811 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]: |
| 791 | words = text.split(':')[-1] | 812 | words = text.split(':')[-1] |
| 792 | representative['words'] = words | 813 | if len(words) == 0: |
| 793 | representative['page'] = page_num | 814 | need_bbox_find_keys_bbox[2] = bbox |
| 794 | representative['position'] = bbox | 815 | else: |
| 795 | if role_key == '保证人2:': | 816 | representative['words'] = words |
| 817 | representative['page'] = page_num | ||
| 818 | representative['position'] = bbox | ||
| 819 | elif role_key == '保证人2:': | ||
| 796 | # 找到证件号码且确定位置 | 820 | # 找到证件号码且确定位置 |
| 797 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]: | 821 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]: |
| 798 | words = text.split(':')[-1] | 822 | words = text.split(':')[-1] |
| 799 | id_num['words'] = words | 823 | if len(words) == 0: |
| 800 | id_num['page'] = page_num | 824 | need_bbox_find_keys_bbox[1] = bbox |
| 801 | id_num['position'] = bbox | 825 | else: |
| 826 | id_num['words'] = words | ||
| 827 | id_num['page'] = page_num | ||
| 828 | id_num['position'] = bbox | ||
| 802 | # 找到法人代表且确定位置 | 829 | # 找到法人代表且确定位置 |
| 803 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]: | 830 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]: |
| 804 | words = text.split(':')[-1] | 831 | words = text.split(':')[-1] |
| 805 | representative['words'] = words | 832 | if len(words) == 0: |
| 806 | representative['page'] = page_num | 833 | need_bbox_find_keys_bbox[2] = bbox |
| 807 | representative['position'] = bbox | 834 | else: |
| 808 | if role_key == '保证人3:': | 835 | representative['words'] = words |
| 836 | representative['page'] = page_num | ||
| 837 | representative['position'] = bbox | ||
| 838 | elif role_key == '保证人3:': | ||
| 809 | # 找到证件号码且确定位置 | 839 | # 找到证件号码且确定位置 |
| 810 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]: | 840 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]: |
| 811 | words = text.split(':')[-1] | 841 | words = text.split(':')[-1] |
| 812 | id_num['words'] = words | 842 | if len(words) == 0: |
| 813 | id_num['page'] = page_num | 843 | need_bbox_find_keys_bbox[1] = bbox |
| 814 | id_num['position'] = bbox | 844 | else: |
| 845 | id_num['words'] = words | ||
| 846 | id_num['page'] = page_num | ||
| 847 | id_num['position'] = bbox | ||
| 815 | # 找到法人代表且确定位置 | 848 | # 找到法人代表且确定位置 |
| 816 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]: | 849 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]: |
| 817 | words = text.split(':')[-1] | 850 | words = text.split(':')[-1] |
| 818 | representative['words'] = words | 851 | if len(words) == 0: |
| 852 | need_bbox_find_keys_bbox[2] = bbox | ||
| 853 | else: | ||
| 854 | representative['words'] = words | ||
| 855 | representative['page'] = page_num | ||
| 856 | representative['position'] = bbox | ||
| 857 | for idx, bbox in enumerate(need_bbox_find_keys_bbox): | ||
| 858 | if bbox is None: | ||
| 859 | continue | ||
| 860 | is_find = False | ||
| 861 | if idx == 1: | ||
| 862 | width_rate = 3 | ||
| 863 | else: | ||
| 864 | width_rate = 1 | ||
| 865 | minx = bbox[2] | ||
| 866 | maxx = bbox[2] + (width_rate * (bbox[2]-bbox[0])) | ||
| 867 | miny = bbox[1] | ||
| 868 | maxy = bbox[3] | ||
| 869 | for block in self.pdf_info[page_num]['blocks']: | ||
| 870 | if block['type'] != 0: | ||
| 871 | continue | ||
| 872 | if is_find: | ||
| 873 | break | ||
| 874 | for line in block['lines']: | ||
| 875 | if is_find: | ||
| 876 | break | ||
| 877 | for span in line['spans']: | ||
| 878 | if is_find: | ||
| 879 | break | ||
| 880 | value_bbox, text = span['bbox'], span['text'] | ||
| 881 | if minx < np.mean(value_bbox[::2]) < maxx and miny < np.mean(value_bbox[1::2]) < maxy: | ||
| 882 | if idx == 0: | ||
| 883 | name['words'] = text | ||
| 884 | name['page'] = page_num | ||
| 885 | name['position'] = value_bbox | ||
| 886 | elif idx == 1: | ||
| 887 | id_num['words'] = text | ||
| 888 | id_num['page'] = page_num | ||
| 889 | id_num['position'] = value_bbox | ||
| 890 | elif idx == 2: | ||
| 891 | representative['words'] = text | ||
| 819 | representative['page'] = page_num | 892 | representative['page'] = page_num |
| 820 | representative['position'] = bbox | 893 | representative['position'] = value_bbox |
| 894 | is_find = True | ||
| 895 | break | ||
| 821 | return name, id_num, representative | 896 | return name, id_num, representative |
| 822 | 897 | ||
| 823 | def get_table_add_product(self): | 898 | def get_table_add_product(self): | ... | ... |
-
Please register or sign in to post a comment