modify result
Showing
2 changed files
with
65 additions
and
18 deletions
... | @@ -20,8 +20,21 @@ from authorization_from import retriever_individuals, retriever_companies | ... | @@ -20,8 +20,21 @@ from authorization_from import retriever_individuals, retriever_companies |
20 | # 个人授权书 | 20 | # 个人授权书 |
21 | res = retriever_companies.get_target_fields(go_res, signature_res) | 21 | res = retriever_companies.get_target_fields(go_res, signature_res) |
22 | print(res) | 22 | print(res) |
23 | # {'words_result': | ||
24 | # {'姓名': {'words': '李子君', 'score': 1, 'location': {'left': 105, 'top': 277, 'width': 60, 'height': 28}}, | ||
25 | # '个人身份证件号码': {'words': '321001198601050010', 'score': 1, 'location': {'left': 320, 'top': 278, 'widtght': 24}}, | ||
26 | # '签字': {'words': '有', 'score': 1, 'location': {'left': 540, 'top': 1293, 'width': 143, 'height': 91}}} | ||
27 | # } | ||
23 | 28 | ||
24 | # 公司授权书 | 29 | # 公司授权书 |
25 | # res = retriever_individuals.get_target_fields(go_res, signature_res) | 30 | # res = retriever_individuals.get_target_fields(go_res, signature_res) |
26 | # print(res) | 31 | # print(res) |
32 | |||
33 | # {'words_result': { | ||
34 | # '经销商名称': {'words': 'xxx', 'score': 1, 'location': {'left': 473, 'top': 440, 'width': 331, 'height': 27}}, | ||
35 | # '经销商代码-宝马中国': {'words': '42330', 's 1285, 'top': 414, 'width': 65, 'height': 26}}, | ||
36 | # '管理人员姓名-总经理': {'words': '胡开', 'score': 1, 'location': {'left': 703, 'top': 815, 'width': 44, 'height': 24}}, | ||
37 | # '公司公章': {'words': '有', 'score: {'left': 239, 'top': 1548, 'width': 323, 'height': 329}}, | ||
38 | # '法定代表人签章': {'words': '有', 'score': 1, 'location': {'left': 621, 'top': 1657, 'width': 161, 'height': 164}}} | ||
39 | # } | ||
27 | ``` | 40 | ``` | ... | ... |
... | @@ -52,12 +52,14 @@ class Retriever: | ... | @@ -52,12 +52,14 @@ class Retriever: |
52 | 52 | ||
53 | x_min = None | 53 | x_min = None |
54 | value = None | 54 | value = None |
55 | coordinates = None | ||
55 | for (x0, y0, _, _, x1, y1, _, _), text in go_res.values(): | 56 | for (x0, y0, _, _, x1, y1, _, _), text in go_res.values(): |
56 | if y0 > y_min and y1 < y_max and x0 > x: | 57 | if y0 > y_min and y1 < y_max and x0 > x: |
57 | if x_min is None or x0 < x_min: | 58 | if x_min is None or x0 < x_min: |
58 | x_min = x0 | 59 | x_min = x0 |
59 | value = text | 60 | value = text |
60 | return value | 61 | coordinates = (x0, y0, x1, y1) |
62 | return value, coordinates | ||
61 | 63 | ||
62 | @staticmethod | 64 | @staticmethod |
63 | def value_under(go_res, key_coordinates, left_padding, right_padding): | 65 | def value_under(go_res, key_coordinates, left_padding, right_padding): |
... | @@ -69,12 +71,38 @@ class Retriever: | ... | @@ -69,12 +71,38 @@ class Retriever: |
69 | 71 | ||
70 | y_min = None | 72 | y_min = None |
71 | value = None | 73 | value = None |
74 | coordinates = None | ||
72 | for (x0, y0, _, _, x1, y1, _, _), text in go_res.values(): | 75 | for (x0, y0, _, _, x1, y1, _, _), text in go_res.values(): |
73 | if x0 > x_min and x1 < x_max and y0 > y: | 76 | if x0 > x_min and x1 < x_max and y0 > y: |
74 | if y_min is None or y0 < y_min: | 77 | if y_min is None or y0 < y_min: |
75 | y_min = y0 | 78 | y_min = y0 |
76 | value = text | 79 | value = text |
77 | return value | 80 | coordinates = (x0, y0, x1, y1) |
81 | return value, coordinates | ||
82 | |||
83 | @staticmethod | ||
84 | def rebuild_res(value_res, coordinates_res, is_signature=False): | ||
85 | words_result = dict() | ||
86 | for key, value in value_res.items(): | ||
87 | if is_signature: | ||
88 | coordinates_dict = coordinates_res.get(key, dict()) | ||
89 | x0 = coordinates_dict.get('xmin', -1) | ||
90 | y0 = coordinates_dict.get('ymin', -1) | ||
91 | x1 = coordinates_dict.get('xmax', -1) | ||
92 | y1 = coordinates_dict.get('ymax', -1) | ||
93 | else: | ||
94 | x0, y0, x1, y1 = coordinates_res.get(key, (-1, -1, -1, -1)) | ||
95 | words_result[key] = { | ||
96 | 'words': value, | ||
97 | 'score': -1 if not is_signature and x0 == -1 else 1, | ||
98 | 'location': { | ||
99 | 'left': x0, | ||
100 | 'top': y0, | ||
101 | 'width': x1-x0, | ||
102 | 'height': y1-y0, | ||
103 | } | ||
104 | } | ||
105 | return words_result | ||
78 | 106 | ||
79 | def get_target_fields(self, go_res, signature_res_list): | 107 | def get_target_fields(self, go_res, signature_res_list): |
80 | # 搜索关键词 | 108 | # 搜索关键词 |
... | @@ -101,35 +129,41 @@ class Retriever: | ... | @@ -101,35 +129,41 @@ class Retriever: |
101 | key_coordinates_info[field] = pre_key_coordinates | 129 | key_coordinates_info[field] = pre_key_coordinates |
102 | 130 | ||
103 | # 搜索字段值 | 131 | # 搜索字段值 |
104 | res = dict() | 132 | value_res = dict() |
133 | coordinates_res = dict() | ||
105 | for field, (direction, kwargs, default_value) in self.target_fields[self.value_str].items(): | 134 | for field, (direction, kwargs, default_value) in self.target_fields[self.value_str].items(): |
106 | if not isinstance(key_coordinates_info.get(field), tuple): | 135 | if not isinstance(key_coordinates_info.get(field), tuple): |
107 | res[field] = default_value | 136 | value_res[field] = default_value |
108 | break | 137 | continue |
109 | value = getattr(self, 'value_{0}'.format(direction))( | 138 | value, coordinates = getattr(self, 'value_{0}'.format(direction))( |
110 | go_res, | 139 | go_res, |
111 | key_coordinates_info[field], | 140 | key_coordinates_info[field], |
112 | **kwargs | 141 | **kwargs |
113 | ) | 142 | ) |
114 | if not isinstance(value, str): | 143 | if not isinstance(value, str): |
115 | res[field] = default_value | 144 | value_res[field] = default_value |
116 | else: | 145 | else: |
117 | res[field] = value | 146 | value_res[field] = value |
147 | coordinates_res[field] = coordinates | ||
118 | 148 | ||
119 | # 搜索签章 | 149 | # 搜索签章 |
120 | tmp_signature_count = dict() | 150 | tmp_signature_info = dict() |
151 | signature_coordinates_res = dict() | ||
152 | signature_value_res = dict() | ||
121 | for signature_dict in signature_res_list: | 153 | for signature_dict in signature_res_list: |
122 | if signature_dict['label'] in tmp_signature_count: | 154 | tmp_signature_info.setdefault(signature_dict['label'], list()).append(signature_dict['location']) |
123 | tmp_signature_count[signature_dict['label']] += 1 | 155 | |
124 | else: | ||
125 | tmp_signature_count[signature_dict['label']] = 1 | ||
126 | for field, signature_type_set in self.target_fields[self.signature_str].items(): | 156 | for field, signature_type_set in self.target_fields[self.signature_str].items(): |
127 | for signature_type in signature_type_set: | 157 | for signature_type in signature_type_set: |
128 | if tmp_signature_count.get(signature_type, 0) > 0: | 158 | if len(tmp_signature_info.get(signature_type, [])) > 0: |
129 | res[field] = self.signature_have_str | 159 | signature_value_res[field] = self.signature_have_str |
130 | tmp_signature_count[signature_type] -= 1 | 160 | signature_coordinates_res[field] = tmp_signature_info[signature_type].pop(0) |
131 | break | 161 | break |
132 | else: | 162 | else: |
133 | res[field] = self.signature_have_not_str | 163 | signature_value_res[field] = self.signature_have_not_str |
164 | |||
165 | words_result = self.rebuild_res(value_res, coordinates_res) | ||
166 | words_result_signature = self.rebuild_res(signature_value_res, signature_coordinates_res, True) | ||
167 | words_result.update(words_result_signature) | ||
134 | 168 | ||
135 | return res | 169 | return {'words_result': words_result} | ... | ... |
-
Please register or sign in to post a comment