7ab1add6 by 周伟奇

modify result

1 parent d1752a9f
...@@ -20,8 +20,21 @@ from authorization_from import retriever_individuals, retriever_companies ...@@ -20,8 +20,21 @@ from authorization_from import retriever_individuals, retriever_companies
20 # 个人授权书 20 # 个人授权书
21 res = retriever_companies.get_target_fields(go_res, signature_res) 21 res = retriever_companies.get_target_fields(go_res, signature_res)
22 print(res) 22 print(res)
23 # {'words_result':
24 # {'姓名': {'words': '李子君', 'score': 1, 'location': {'left': 105, 'top': 277, 'width': 60, 'height': 28}},
25 # '个人身份证件号码': {'words': '321001198601050010', 'score': 1, 'location': {'left': 320, 'top': 278, 'widtght': 24}},
26 # '签字': {'words': '有', 'score': 1, 'location': {'left': 540, 'top': 1293, 'width': 143, 'height': 91}}}
27 # }
23 28
24 # 公司授权书 29 # 公司授权书
25 # res = retriever_individuals.get_target_fields(go_res, signature_res) 30 # res = retriever_individuals.get_target_fields(go_res, signature_res)
26 # print(res) 31 # print(res)
32
33 # {'words_result': {
34 # '经销商名称': {'words': 'xxx', 'score': 1, 'location': {'left': 473, 'top': 440, 'width': 331, 'height': 27}},
35 # '经销商代码-宝马中国': {'words': '42330', 's 1285, 'top': 414, 'width': 65, 'height': 26}},
36 # '管理人员姓名-总经理': {'words': '胡开', 'score': 1, 'location': {'left': 703, 'top': 815, 'width': 44, 'height': 24}},
37 # '公司公章': {'words': '有', 'score: {'left': 239, 'top': 1548, 'width': 323, 'height': 329}},
38 # '法定代表人签章': {'words': '有', 'score': 1, 'location': {'left': 621, 'top': 1657, 'width': 161, 'height': 164}}}
39 # }
27 ``` 40 ```
......
...@@ -52,12 +52,14 @@ class Retriever: ...@@ -52,12 +52,14 @@ class Retriever:
52 52
53 x_min = None 53 x_min = None
54 value = None 54 value = None
55 coordinates = None
55 for (x0, y0, _, _, x1, y1, _, _), text in go_res.values(): 56 for (x0, y0, _, _, x1, y1, _, _), text in go_res.values():
56 if y0 > y_min and y1 < y_max and x0 > x: 57 if y0 > y_min and y1 < y_max and x0 > x:
57 if x_min is None or x0 < x_min: 58 if x_min is None or x0 < x_min:
58 x_min = x0 59 x_min = x0
59 value = text 60 value = text
60 return value 61 coordinates = (x0, y0, x1, y1)
62 return value, coordinates
61 63
62 @staticmethod 64 @staticmethod
63 def value_under(go_res, key_coordinates, left_padding, right_padding): 65 def value_under(go_res, key_coordinates, left_padding, right_padding):
...@@ -69,12 +71,38 @@ class Retriever: ...@@ -69,12 +71,38 @@ class Retriever:
69 71
70 y_min = None 72 y_min = None
71 value = None 73 value = None
74 coordinates = None
72 for (x0, y0, _, _, x1, y1, _, _), text in go_res.values(): 75 for (x0, y0, _, _, x1, y1, _, _), text in go_res.values():
73 if x0 > x_min and x1 < x_max and y0 > y: 76 if x0 > x_min and x1 < x_max and y0 > y:
74 if y_min is None or y0 < y_min: 77 if y_min is None or y0 < y_min:
75 y_min = y0 78 y_min = y0
76 value = text 79 value = text
77 return value 80 coordinates = (x0, y0, x1, y1)
81 return value, coordinates
82
83 @staticmethod
84 def rebuild_res(value_res, coordinates_res, is_signature=False):
85 words_result = dict()
86 for key, value in value_res.items():
87 if is_signature:
88 coordinates_dict = coordinates_res.get(key, dict())
89 x0 = coordinates_dict.get('xmin', -1)
90 y0 = coordinates_dict.get('ymin', -1)
91 x1 = coordinates_dict.get('xmax', -1)
92 y1 = coordinates_dict.get('ymax', -1)
93 else:
94 x0, y0, x1, y1 = coordinates_res.get(key, (-1, -1, -1, -1))
95 words_result[key] = {
96 'words': value,
97 'score': -1 if not is_signature and x0 == -1 else 1,
98 'location': {
99 'left': x0,
100 'top': y0,
101 'width': x1-x0,
102 'height': y1-y0,
103 }
104 }
105 return words_result
78 106
79 def get_target_fields(self, go_res, signature_res_list): 107 def get_target_fields(self, go_res, signature_res_list):
80 # 搜索关键词 108 # 搜索关键词
...@@ -101,35 +129,41 @@ class Retriever: ...@@ -101,35 +129,41 @@ class Retriever:
101 key_coordinates_info[field] = pre_key_coordinates 129 key_coordinates_info[field] = pre_key_coordinates
102 130
103 # 搜索字段值 131 # 搜索字段值
104 res = dict() 132 value_res = dict()
133 coordinates_res = dict()
105 for field, (direction, kwargs, default_value) in self.target_fields[self.value_str].items(): 134 for field, (direction, kwargs, default_value) in self.target_fields[self.value_str].items():
106 if not isinstance(key_coordinates_info.get(field), tuple): 135 if not isinstance(key_coordinates_info.get(field), tuple):
107 res[field] = default_value 136 value_res[field] = default_value
108 break 137 continue
109 value = getattr(self, 'value_{0}'.format(direction))( 138 value, coordinates = getattr(self, 'value_{0}'.format(direction))(
110 go_res, 139 go_res,
111 key_coordinates_info[field], 140 key_coordinates_info[field],
112 **kwargs 141 **kwargs
113 ) 142 )
114 if not isinstance(value, str): 143 if not isinstance(value, str):
115 res[field] = default_value 144 value_res[field] = default_value
116 else: 145 else:
117 res[field] = value 146 value_res[field] = value
147 coordinates_res[field] = coordinates
118 148
119 # 搜索签章 149 # 搜索签章
120 tmp_signature_count = dict() 150 tmp_signature_info = dict()
151 signature_coordinates_res = dict()
152 signature_value_res = dict()
121 for signature_dict in signature_res_list: 153 for signature_dict in signature_res_list:
122 if signature_dict['label'] in tmp_signature_count: 154 tmp_signature_info.setdefault(signature_dict['label'], list()).append(signature_dict['location'])
123 tmp_signature_count[signature_dict['label']] += 1 155
124 else:
125 tmp_signature_count[signature_dict['label']] = 1
126 for field, signature_type_set in self.target_fields[self.signature_str].items(): 156 for field, signature_type_set in self.target_fields[self.signature_str].items():
127 for signature_type in signature_type_set: 157 for signature_type in signature_type_set:
128 if tmp_signature_count.get(signature_type, 0) > 0: 158 if len(tmp_signature_info.get(signature_type, [])) > 0:
129 res[field] = self.signature_have_str 159 signature_value_res[field] = self.signature_have_str
130 tmp_signature_count[signature_type] -= 1 160 signature_coordinates_res[field] = tmp_signature_info[signature_type].pop(0)
131 break 161 break
132 else: 162 else:
133 res[field] = self.signature_have_not_str 163 signature_value_res[field] = self.signature_have_not_str
164
165 words_result = self.rebuild_res(value_res, coordinates_res)
166 words_result_signature = self.rebuild_res(signature_value_res, signature_coordinates_res, True)
167 words_result.update(words_result_signature)
134 168
135 return res 169 return {'words_result': words_result}
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!