f2cd1c73 by 周伟奇

fix new hil contract

1 parent c80aa3cf
...@@ -753,71 +753,146 @@ class Finder: ...@@ -753,71 +753,146 @@ class Finder:
753 if re.match('保证人3', text) is not None: 753 if re.match('保证人3', text) is not None:
754 anchor = [bbox[0], bbox[1]] 754 anchor = [bbox[0], bbox[1]]
755 755
756 need_bbox_find_keys_bbox = [None, None, None]
756 if anchor is not None: 757 if anchor is not None:
757 for block in self.pdf_info[page_num]['blocks']: 758 for block in self.pdf_info[page_num]['blocks']:
759 if all(need_bbox_find_keys_bbox):
760 break
758 if block['type'] != 0: 761 if block['type'] != 0:
759 continue 762 continue
760 for line in block['lines']: 763 for line in block['lines']:
764 if all(need_bbox_find_keys_bbox):
765 break
761 for span in line['spans']: 766 for span in line['spans']:
767 if all(need_bbox_find_keys_bbox):
768 break
769
762 bbox, text = span['bbox'], span['text'] 770 bbox, text = span['bbox'], span['text']
763 # 找到角色姓名 771 # 找到角色姓名
764 if re.match(role_key, text) is not None: 772 if re.match(role_key, text) is not None:
765 words = text.split(':')[-1] 773 words = text.split(':')[-1]
774 if len(words) == 0:
775 need_bbox_find_keys_bbox[0] = bbox
776 else:
766 name['words'] = words 777 name['words'] = words
767 name['page'] = page_num 778 name['page'] = page_num
768 name['position'] = bbox 779 name['position'] = bbox
780 continue
769 if role_key == '承租人:': 781 if role_key == '承租人:':
770 # 找到证件号码且确定位置 782 # 找到证件号码且确定位置
771 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]: 783 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
772 words = text.split(':')[-1] 784 words = text.split(':')[-1]
785 if len(words) == 0:
786 need_bbox_find_keys_bbox[1] = bbox
787 else:
773 id_num['words'] = words 788 id_num['words'] = words
774 id_num['page'] = page_num 789 id_num['page'] = page_num
775 id_num['position'] = bbox 790 id_num['position'] = bbox
776 # 找到法人代表且确定位置 791 # 找到法人代表且确定位置
777 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]: 792 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
778 words = text.split(':')[-1] 793 words = text.split(':')[-1]
794 if len(words) == 0:
795 need_bbox_find_keys_bbox[2] = bbox
796 else:
779 representative['words'] = words 797 representative['words'] = words
780 representative['page'] = page_num 798 representative['page'] = page_num
781 representative['position'] = bbox 799 representative['position'] = bbox
782 if role_key == '保证人1:': 800 elif role_key == '保证人1:':
783 # 找到证件号码且确定位置 801 # 找到证件号码且确定位置
784 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]: 802 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
785 words = text.split(':')[-1] 803 words = text.split(':')[-1]
804 if len(words) == 0:
805 need_bbox_find_keys_bbox[1] = bbox
806 else:
786 id_num['words'] = words 807 id_num['words'] = words
787 id_num['page'] = page_num 808 id_num['page'] = page_num
788 id_num['position'] = bbox 809 id_num['position'] = bbox
789 # 找到法人代表且确定位置 810 # 找到法人代表且确定位置
790 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]: 811 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
791 words = text.split(':')[-1] 812 words = text.split(':')[-1]
813 if len(words) == 0:
814 need_bbox_find_keys_bbox[2] = bbox
815 else:
792 representative['words'] = words 816 representative['words'] = words
793 representative['page'] = page_num 817 representative['page'] = page_num
794 representative['position'] = bbox 818 representative['position'] = bbox
795 if role_key == '保证人2:': 819 elif role_key == '保证人2:':
796 # 找到证件号码且确定位置 820 # 找到证件号码且确定位置
797 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]: 821 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
798 words = text.split(':')[-1] 822 words = text.split(':')[-1]
823 if len(words) == 0:
824 need_bbox_find_keys_bbox[1] = bbox
825 else:
799 id_num['words'] = words 826 id_num['words'] = words
800 id_num['page'] = page_num 827 id_num['page'] = page_num
801 id_num['position'] = bbox 828 id_num['position'] = bbox
802 # 找到法人代表且确定位置 829 # 找到法人代表且确定位置
803 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]: 830 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
804 words = text.split(':')[-1] 831 words = text.split(':')[-1]
832 if len(words) == 0:
833 need_bbox_find_keys_bbox[2] = bbox
834 else:
805 representative['words'] = words 835 representative['words'] = words
806 representative['page'] = page_num 836 representative['page'] = page_num
807 representative['position'] = bbox 837 representative['position'] = bbox
808 if role_key == '保证人3:': 838 elif role_key == '保证人3:':
809 # 找到证件号码且确定位置 839 # 找到证件号码且确定位置
810 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]: 840 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
811 words = text.split(':')[-1] 841 words = text.split(':')[-1]
842 if len(words) == 0:
843 need_bbox_find_keys_bbox[1] = bbox
844 else:
812 id_num['words'] = words 845 id_num['words'] = words
813 id_num['page'] = page_num 846 id_num['page'] = page_num
814 id_num['position'] = bbox 847 id_num['position'] = bbox
815 # 找到法人代表且确定位置 848 # 找到法人代表且确定位置
816 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]: 849 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
817 words = text.split(':')[-1] 850 words = text.split(':')[-1]
851 if len(words) == 0:
852 need_bbox_find_keys_bbox[2] = bbox
853 else:
818 representative['words'] = words 854 representative['words'] = words
819 representative['page'] = page_num 855 representative['page'] = page_num
820 representative['position'] = bbox 856 representative['position'] = bbox
857 for idx, bbox in enumerate(need_bbox_find_keys_bbox):
858 if bbox is None:
859 continue
860 is_find = False
861 if idx == 1:
862 width_rate = 3
863 else:
864 width_rate = 1
865 minx = bbox[2]
866 maxx = bbox[2] + (width_rate * (bbox[2]-bbox[0]))
867 miny = bbox[1]
868 maxy = bbox[3]
869 for block in self.pdf_info[page_num]['blocks']:
870 if block['type'] != 0:
871 continue
872 if is_find:
873 break
874 for line in block['lines']:
875 if is_find:
876 break
877 for span in line['spans']:
878 if is_find:
879 break
880 value_bbox, text = span['bbox'], span['text']
881 if minx < np.mean(value_bbox[::2]) < maxx and miny < np.mean(value_bbox[1::2]) < maxy:
882 if idx == 0:
883 name['words'] = text
884 name['page'] = page_num
885 name['position'] = value_bbox
886 elif idx == 1:
887 id_num['words'] = text
888 id_num['page'] = page_num
889 id_num['position'] = value_bbox
890 elif idx == 2:
891 representative['words'] = text
892 representative['page'] = page_num
893 representative['position'] = value_bbox
894 is_find = True
895 break
821 return name, id_num, representative 896 return name, id_num, representative
822 897
823 def get_table_add_product(self): 898 def get_table_add_product(self):
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!