f2cd1c73 by 周伟奇

fix new hil contract

1 parent c80aa3cf
...@@ -753,71 +753,146 @@ class Finder: ...@@ -753,71 +753,146 @@ class Finder:
753 if re.match('保证人3', text) is not None: 753 if re.match('保证人3', text) is not None:
754 anchor = [bbox[0], bbox[1]] 754 anchor = [bbox[0], bbox[1]]
755 755
756 need_bbox_find_keys_bbox = [None, None, None]
756 if anchor is not None: 757 if anchor is not None:
757 for block in self.pdf_info[page_num]['blocks']: 758 for block in self.pdf_info[page_num]['blocks']:
759 if all(need_bbox_find_keys_bbox):
760 break
758 if block['type'] != 0: 761 if block['type'] != 0:
759 continue 762 continue
760 for line in block['lines']: 763 for line in block['lines']:
764 if all(need_bbox_find_keys_bbox):
765 break
761 for span in line['spans']: 766 for span in line['spans']:
767 if all(need_bbox_find_keys_bbox):
768 break
769
762 bbox, text = span['bbox'], span['text'] 770 bbox, text = span['bbox'], span['text']
763 # 找到角色姓名 771 # 找到角色姓名
764 if re.match(role_key, text) is not None: 772 if re.match(role_key, text) is not None:
765 words = text.split(':')[-1] 773 words = text.split(':')[-1]
766 name['words'] = words 774 if len(words) == 0:
767 name['page'] = page_num 775 need_bbox_find_keys_bbox[0] = bbox
768 name['position'] = bbox 776 else:
777 name['words'] = words
778 name['page'] = page_num
779 name['position'] = bbox
780 continue
769 if role_key == '承租人:': 781 if role_key == '承租人:':
770 # 找到证件号码且确定位置 782 # 找到证件号码且确定位置
771 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]: 783 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
772 words = text.split(':')[-1] 784 words = text.split(':')[-1]
773 id_num['words'] = words 785 if len(words) == 0:
774 id_num['page'] = page_num 786 need_bbox_find_keys_bbox[1] = bbox
775 id_num['position'] = bbox 787 else:
788 id_num['words'] = words
789 id_num['page'] = page_num
790 id_num['position'] = bbox
776 # 找到法人代表且确定位置 791 # 找到法人代表且确定位置
777 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]: 792 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
778 words = text.split(':')[-1] 793 words = text.split(':')[-1]
779 representative['words'] = words 794 if len(words) == 0:
780 representative['page'] = page_num 795 need_bbox_find_keys_bbox[2] = bbox
781 representative['position'] = bbox 796 else:
782 if role_key == '保证人1:': 797 representative['words'] = words
798 representative['page'] = page_num
799 representative['position'] = bbox
800 elif role_key == '保证人1:':
783 # 找到证件号码且确定位置 801 # 找到证件号码且确定位置
784 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]: 802 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
785 words = text.split(':')[-1] 803 words = text.split(':')[-1]
786 id_num['words'] = words 804 if len(words) == 0:
787 id_num['page'] = page_num 805 need_bbox_find_keys_bbox[1] = bbox
788 id_num['position'] = bbox 806 else:
807 id_num['words'] = words
808 id_num['page'] = page_num
809 id_num['position'] = bbox
789 # 找到法人代表且确定位置 810 # 找到法人代表且确定位置
790 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]: 811 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
791 words = text.split(':')[-1] 812 words = text.split(':')[-1]
792 representative['words'] = words 813 if len(words) == 0:
793 representative['page'] = page_num 814 need_bbox_find_keys_bbox[2] = bbox
794 representative['position'] = bbox 815 else:
795 if role_key == '保证人2:': 816 representative['words'] = words
817 representative['page'] = page_num
818 representative['position'] = bbox
819 elif role_key == '保证人2:':
796 # 找到证件号码且确定位置 820 # 找到证件号码且确定位置
797 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]: 821 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
798 words = text.split(':')[-1] 822 words = text.split(':')[-1]
799 id_num['words'] = words 823 if len(words) == 0:
800 id_num['page'] = page_num 824 need_bbox_find_keys_bbox[1] = bbox
801 id_num['position'] = bbox 825 else:
826 id_num['words'] = words
827 id_num['page'] = page_num
828 id_num['position'] = bbox
802 # 找到法人代表且确定位置 829 # 找到法人代表且确定位置
803 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]: 830 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
804 words = text.split(':')[-1] 831 words = text.split(':')[-1]
805 representative['words'] = words 832 if len(words) == 0:
806 representative['page'] = page_num 833 need_bbox_find_keys_bbox[2] = bbox
807 representative['position'] = bbox 834 else:
808 if role_key == '保证人3:': 835 representative['words'] = words
836 representative['page'] = page_num
837 representative['position'] = bbox
838 elif role_key == '保证人3:':
809 # 找到证件号码且确定位置 839 # 找到证件号码且确定位置
810 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]: 840 if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
811 words = text.split(':')[-1] 841 words = text.split(':')[-1]
812 id_num['words'] = words 842 if len(words) == 0:
813 id_num['page'] = page_num 843 need_bbox_find_keys_bbox[1] = bbox
814 id_num['position'] = bbox 844 else:
845 id_num['words'] = words
846 id_num['page'] = page_num
847 id_num['position'] = bbox
815 # 找到法人代表且确定位置 848 # 找到法人代表且确定位置
816 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]: 849 if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
817 words = text.split(':')[-1] 850 words = text.split(':')[-1]
818 representative['words'] = words 851 if len(words) == 0:
852 need_bbox_find_keys_bbox[2] = bbox
853 else:
854 representative['words'] = words
855 representative['page'] = page_num
856 representative['position'] = bbox
857 for idx, bbox in enumerate(need_bbox_find_keys_bbox):
858 if bbox is None:
859 continue
860 is_find = False
861 if idx == 1:
862 width_rate = 3
863 else:
864 width_rate = 1
865 minx = bbox[2]
866 maxx = bbox[2] + (width_rate * (bbox[2]-bbox[0]))
867 miny = bbox[1]
868 maxy = bbox[3]
869 for block in self.pdf_info[page_num]['blocks']:
870 if block['type'] != 0:
871 continue
872 if is_find:
873 break
874 for line in block['lines']:
875 if is_find:
876 break
877 for span in line['spans']:
878 if is_find:
879 break
880 value_bbox, text = span['bbox'], span['text']
881 if minx < np.mean(value_bbox[::2]) < maxx and miny < np.mean(value_bbox[1::2]) < maxy:
882 if idx == 0:
883 name['words'] = text
884 name['page'] = page_num
885 name['position'] = value_bbox
886 elif idx == 1:
887 id_num['words'] = text
888 id_num['page'] = page_num
889 id_num['position'] = value_bbox
890 elif idx == 2:
891 representative['words'] = text
819 representative['page'] = page_num 892 representative['page'] = page_num
820 representative['position'] = bbox 893 representative['position'] = value_bbox
894 is_find = True
895 break
821 return name, id_num, representative 896 return name, id_num, representative
822 897
823 def get_table_add_product(self): 898 def get_table_add_product(self):
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!