f2cd1c73 by 周伟奇

fix new hil contract

1 parent c80aa3cf
......@@ -753,71 +753,146 @@ class Finder:
if re.match('保证人3', text) is not None:
anchor = [bbox[0], bbox[1]]
need_bbox_find_keys_bbox = [None, None, None]
if anchor is not None:
for block in self.pdf_info[page_num]['blocks']:
if all(need_bbox_find_keys_bbox):
break
if block['type'] != 0:
continue
for line in block['lines']:
if all(need_bbox_find_keys_bbox):
break
for span in line['spans']:
if all(need_bbox_find_keys_bbox):
break
bbox, text = span['bbox'], span['text']
# 找到角色姓名
if re.match(role_key, text) is not None:
words = text.split(':')[-1]
if len(words) == 0:
need_bbox_find_keys_bbox[0] = bbox
else:
name['words'] = words
name['page'] = page_num
name['position'] = bbox
continue
if role_key == '承租人:':
# 找到证件号码且确定位置
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
words = text.split(':')[-1]
if len(words) == 0:
need_bbox_find_keys_bbox[1] = bbox
else:
id_num['words'] = words
id_num['page'] = page_num
id_num['position'] = bbox
# 找到法人代表且确定位置
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
words = text.split(':')[-1]
if len(words) == 0:
need_bbox_find_keys_bbox[2] = bbox
else:
representative['words'] = words
representative['page'] = page_num
representative['position'] = bbox
if role_key == '保证人1:':
elif role_key == '保证人1:':
# 找到证件号码且确定位置
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
words = text.split(':')[-1]
if len(words) == 0:
need_bbox_find_keys_bbox[1] = bbox
else:
id_num['words'] = words
id_num['page'] = page_num
id_num['position'] = bbox
# 找到法人代表且确定位置
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
words = text.split(':')[-1]
if len(words) == 0:
need_bbox_find_keys_bbox[2] = bbox
else:
representative['words'] = words
representative['page'] = page_num
representative['position'] = bbox
if role_key == '保证人2:':
elif role_key == '保证人2:':
# 找到证件号码且确定位置
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
words = text.split(':')[-1]
if len(words) == 0:
need_bbox_find_keys_bbox[1] = bbox
else:
id_num['words'] = words
id_num['page'] = page_num
id_num['position'] = bbox
# 找到法人代表且确定位置
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]:
words = text.split(':')[-1]
if len(words) == 0:
need_bbox_find_keys_bbox[2] = bbox
else:
representative['words'] = words
representative['page'] = page_num
representative['position'] = bbox
if role_key == '保证人3:':
elif role_key == '保证人3:':
# 找到证件号码且确定位置
if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
words = text.split(':')[-1]
if len(words) == 0:
need_bbox_find_keys_bbox[1] = bbox
else:
id_num['words'] = words
id_num['page'] = page_num
id_num['position'] = bbox
# 找到法人代表且确定位置
if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]:
words = text.split(':')[-1]
if len(words) == 0:
need_bbox_find_keys_bbox[2] = bbox
else:
representative['words'] = words
representative['page'] = page_num
representative['position'] = bbox
for idx, bbox in enumerate(need_bbox_find_keys_bbox):
if bbox is None:
continue
is_find = False
if idx == 1:
width_rate = 3
else:
width_rate = 1
minx = bbox[2]
maxx = bbox[2] + (width_rate * (bbox[2]-bbox[0]))
miny = bbox[1]
maxy = bbox[3]
for block in self.pdf_info[page_num]['blocks']:
if block['type'] != 0:
continue
if is_find:
break
for line in block['lines']:
if is_find:
break
for span in line['spans']:
if is_find:
break
value_bbox, text = span['bbox'], span['text']
if minx < np.mean(value_bbox[::2]) < maxx and miny < np.mean(value_bbox[1::2]) < maxy:
if idx == 0:
name['words'] = text
name['page'] = page_num
name['position'] = value_bbox
elif idx == 1:
id_num['words'] = text
id_num['page'] = page_num
id_num['position'] = value_bbox
elif idx == 2:
representative['words'] = text
representative['page'] = page_num
representative['position'] = value_bbox
is_find = True
break
return name, id_num, representative
def get_table_add_product(self):
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!