7ab1add6 by 周伟奇

modify result

1 parent d1752a9f
......@@ -20,8 +20,21 @@ from authorization_from import retriever_individuals, retriever_companies
# 个人授权书
res = retriever_companies.get_target_fields(go_res, signature_res)
print(res)
# {'words_result':
# {'姓名': {'words': '李子君', 'score': 1, 'location': {'left': 105, 'top': 277, 'width': 60, 'height': 28}},
# '个人身份证件号码': {'words': '321001198601050010', 'score': 1, 'location': {'left': 320, 'top': 278, 'widtght': 24}},
# '签字': {'words': '有', 'score': 1, 'location': {'left': 540, 'top': 1293, 'width': 143, 'height': 91}}}
# }
# 公司授权书
# res = retriever_individuals.get_target_fields(go_res, signature_res)
# print(res)
# {'words_result': {
# '经销商名称': {'words': 'xxx', 'score': 1, 'location': {'left': 473, 'top': 440, 'width': 331, 'height': 27}},
# '经销商代码-宝马中国': {'words': '42330', 's 1285, 'top': 414, 'width': 65, 'height': 26}},
# '管理人员姓名-总经理': {'words': '胡开', 'score': 1, 'location': {'left': 703, 'top': 815, 'width': 44, 'height': 24}},
# '公司公章': {'words': '有', 'score: {'left': 239, 'top': 1548, 'width': 323, 'height': 329}},
# '法定代表人签章': {'words': '有', 'score': 1, 'location': {'left': 621, 'top': 1657, 'width': 161, 'height': 164}}}
# }
```
......
......@@ -52,12 +52,14 @@ class Retriever:
x_min = None
value = None
coordinates = None
for (x0, y0, _, _, x1, y1, _, _), text in go_res.values():
if y0 > y_min and y1 < y_max and x0 > x:
if x_min is None or x0 < x_min:
x_min = x0
value = text
return value
coordinates = (x0, y0, x1, y1)
return value, coordinates
@staticmethod
def value_under(go_res, key_coordinates, left_padding, right_padding):
......@@ -69,12 +71,38 @@ class Retriever:
y_min = None
value = None
coordinates = None
for (x0, y0, _, _, x1, y1, _, _), text in go_res.values():
if x0 > x_min and x1 < x_max and y0 > y:
if y_min is None or y0 < y_min:
y_min = y0
value = text
return value
coordinates = (x0, y0, x1, y1)
return value, coordinates
@staticmethod
def rebuild_res(value_res, coordinates_res, is_signature=False):
words_result = dict()
for key, value in value_res.items():
if is_signature:
coordinates_dict = coordinates_res.get(key, dict())
x0 = coordinates_dict.get('xmin', -1)
y0 = coordinates_dict.get('ymin', -1)
x1 = coordinates_dict.get('xmax', -1)
y1 = coordinates_dict.get('ymax', -1)
else:
x0, y0, x1, y1 = coordinates_res.get(key, (-1, -1, -1, -1))
words_result[key] = {
'words': value,
'score': -1 if not is_signature and x0 == -1 else 1,
'location': {
'left': x0,
'top': y0,
'width': x1-x0,
'height': y1-y0,
}
}
return words_result
def get_target_fields(self, go_res, signature_res_list):
# 搜索关键词
......@@ -101,35 +129,41 @@ class Retriever:
key_coordinates_info[field] = pre_key_coordinates
# 搜索字段值
res = dict()
value_res = dict()
coordinates_res = dict()
for field, (direction, kwargs, default_value) in self.target_fields[self.value_str].items():
if not isinstance(key_coordinates_info.get(field), tuple):
res[field] = default_value
break
value = getattr(self, 'value_{0}'.format(direction))(
value_res[field] = default_value
continue
value, coordinates = getattr(self, 'value_{0}'.format(direction))(
go_res,
key_coordinates_info[field],
**kwargs
)
if not isinstance(value, str):
res[field] = default_value
value_res[field] = default_value
else:
res[field] = value
value_res[field] = value
coordinates_res[field] = coordinates
# 搜索签章
tmp_signature_count = dict()
tmp_signature_info = dict()
signature_coordinates_res = dict()
signature_value_res = dict()
for signature_dict in signature_res_list:
if signature_dict['label'] in tmp_signature_count:
tmp_signature_count[signature_dict['label']] += 1
else:
tmp_signature_count[signature_dict['label']] = 1
tmp_signature_info.setdefault(signature_dict['label'], list()).append(signature_dict['location'])
for field, signature_type_set in self.target_fields[self.signature_str].items():
for signature_type in signature_type_set:
if tmp_signature_count.get(signature_type, 0) > 0:
res[field] = self.signature_have_str
tmp_signature_count[signature_type] -= 1
if len(tmp_signature_info.get(signature_type, [])) > 0:
signature_value_res[field] = self.signature_have_str
signature_coordinates_res[field] = tmp_signature_info[signature_type].pop(0)
break
else:
res[field] = self.signature_have_not_str
signature_value_res[field] = self.signature_have_not_str
words_result = self.rebuild_res(value_res, coordinates_res)
words_result_signature = self.rebuild_res(signature_value_res, signature_coordinates_res, True)
words_result.update(words_result_signature)
return res
return {'words_result': words_result}
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!