modify result
Showing
2 changed files
with
65 additions
and
18 deletions
| ... | @@ -20,8 +20,21 @@ from authorization_from import retriever_individuals, retriever_companies | ... | @@ -20,8 +20,21 @@ from authorization_from import retriever_individuals, retriever_companies |
| 20 | # 个人授权书 | 20 | # 个人授权书 |
| 21 | res = retriever_companies.get_target_fields(go_res, signature_res) | 21 | res = retriever_companies.get_target_fields(go_res, signature_res) |
| 22 | print(res) | 22 | print(res) |
| 23 | # {'words_result': | ||
| 24 | # {'姓名': {'words': '李子君', 'score': 1, 'location': {'left': 105, 'top': 277, 'width': 60, 'height': 28}}, | ||
| 25 | # '个人身份证件号码': {'words': '321001198601050010', 'score': 1, 'location': {'left': 320, 'top': 278, 'widtght': 24}}, | ||
| 26 | # '签字': {'words': '有', 'score': 1, 'location': {'left': 540, 'top': 1293, 'width': 143, 'height': 91}}} | ||
| 27 | # } | ||
| 23 | 28 | ||
| 24 | # 公司授权书 | 29 | # 公司授权书 |
| 25 | # res = retriever_individuals.get_target_fields(go_res, signature_res) | 30 | # res = retriever_individuals.get_target_fields(go_res, signature_res) |
| 26 | # print(res) | 31 | # print(res) |
| 32 | |||
| 33 | # {'words_result': { | ||
| 34 | # '经销商名称': {'words': 'xxx', 'score': 1, 'location': {'left': 473, 'top': 440, 'width': 331, 'height': 27}}, | ||
| 35 | # '经销商代码-宝马中国': {'words': '42330', 's 1285, 'top': 414, 'width': 65, 'height': 26}}, | ||
| 36 | # '管理人员姓名-总经理': {'words': '胡开', 'score': 1, 'location': {'left': 703, 'top': 815, 'width': 44, 'height': 24}}, | ||
| 37 | # '公司公章': {'words': '有', 'score: {'left': 239, 'top': 1548, 'width': 323, 'height': 329}}, | ||
| 38 | # '法定代表人签章': {'words': '有', 'score': 1, 'location': {'left': 621, 'top': 1657, 'width': 161, 'height': 164}}} | ||
| 39 | # } | ||
| 27 | ``` | 40 | ``` | ... | ... |
| ... | @@ -52,12 +52,14 @@ class Retriever: | ... | @@ -52,12 +52,14 @@ class Retriever: |
| 52 | 52 | ||
| 53 | x_min = None | 53 | x_min = None |
| 54 | value = None | 54 | value = None |
| 55 | coordinates = None | ||
| 55 | for (x0, y0, _, _, x1, y1, _, _), text in go_res.values(): | 56 | for (x0, y0, _, _, x1, y1, _, _), text in go_res.values(): |
| 56 | if y0 > y_min and y1 < y_max and x0 > x: | 57 | if y0 > y_min and y1 < y_max and x0 > x: |
| 57 | if x_min is None or x0 < x_min: | 58 | if x_min is None or x0 < x_min: |
| 58 | x_min = x0 | 59 | x_min = x0 |
| 59 | value = text | 60 | value = text |
| 60 | return value | 61 | coordinates = (x0, y0, x1, y1) |
| 62 | return value, coordinates | ||
| 61 | 63 | ||
| 62 | @staticmethod | 64 | @staticmethod |
| 63 | def value_under(go_res, key_coordinates, left_padding, right_padding): | 65 | def value_under(go_res, key_coordinates, left_padding, right_padding): |
| ... | @@ -69,12 +71,38 @@ class Retriever: | ... | @@ -69,12 +71,38 @@ class Retriever: |
| 69 | 71 | ||
| 70 | y_min = None | 72 | y_min = None |
| 71 | value = None | 73 | value = None |
| 74 | coordinates = None | ||
| 72 | for (x0, y0, _, _, x1, y1, _, _), text in go_res.values(): | 75 | for (x0, y0, _, _, x1, y1, _, _), text in go_res.values(): |
| 73 | if x0 > x_min and x1 < x_max and y0 > y: | 76 | if x0 > x_min and x1 < x_max and y0 > y: |
| 74 | if y_min is None or y0 < y_min: | 77 | if y_min is None or y0 < y_min: |
| 75 | y_min = y0 | 78 | y_min = y0 |
| 76 | value = text | 79 | value = text |
| 77 | return value | 80 | coordinates = (x0, y0, x1, y1) |
| 81 | return value, coordinates | ||
| 82 | |||
| 83 | @staticmethod | ||
| 84 | def rebuild_res(value_res, coordinates_res, is_signature=False): | ||
| 85 | words_result = dict() | ||
| 86 | for key, value in value_res.items(): | ||
| 87 | if is_signature: | ||
| 88 | coordinates_dict = coordinates_res.get(key, dict()) | ||
| 89 | x0 = coordinates_dict.get('xmin', -1) | ||
| 90 | y0 = coordinates_dict.get('ymin', -1) | ||
| 91 | x1 = coordinates_dict.get('xmax', -1) | ||
| 92 | y1 = coordinates_dict.get('ymax', -1) | ||
| 93 | else: | ||
| 94 | x0, y0, x1, y1 = coordinates_res.get(key, (-1, -1, -1, -1)) | ||
| 95 | words_result[key] = { | ||
| 96 | 'words': value, | ||
| 97 | 'score': -1 if not is_signature and x0 == -1 else 1, | ||
| 98 | 'location': { | ||
| 99 | 'left': x0, | ||
| 100 | 'top': y0, | ||
| 101 | 'width': x1-x0, | ||
| 102 | 'height': y1-y0, | ||
| 103 | } | ||
| 104 | } | ||
| 105 | return words_result | ||
| 78 | 106 | ||
| 79 | def get_target_fields(self, go_res, signature_res_list): | 107 | def get_target_fields(self, go_res, signature_res_list): |
| 80 | # 搜索关键词 | 108 | # 搜索关键词 |
| ... | @@ -101,35 +129,41 @@ class Retriever: | ... | @@ -101,35 +129,41 @@ class Retriever: |
| 101 | key_coordinates_info[field] = pre_key_coordinates | 129 | key_coordinates_info[field] = pre_key_coordinates |
| 102 | 130 | ||
| 103 | # 搜索字段值 | 131 | # 搜索字段值 |
| 104 | res = dict() | 132 | value_res = dict() |
| 133 | coordinates_res = dict() | ||
| 105 | for field, (direction, kwargs, default_value) in self.target_fields[self.value_str].items(): | 134 | for field, (direction, kwargs, default_value) in self.target_fields[self.value_str].items(): |
| 106 | if not isinstance(key_coordinates_info.get(field), tuple): | 135 | if not isinstance(key_coordinates_info.get(field), tuple): |
| 107 | res[field] = default_value | 136 | value_res[field] = default_value |
| 108 | break | 137 | continue |
| 109 | value = getattr(self, 'value_{0}'.format(direction))( | 138 | value, coordinates = getattr(self, 'value_{0}'.format(direction))( |
| 110 | go_res, | 139 | go_res, |
| 111 | key_coordinates_info[field], | 140 | key_coordinates_info[field], |
| 112 | **kwargs | 141 | **kwargs |
| 113 | ) | 142 | ) |
| 114 | if not isinstance(value, str): | 143 | if not isinstance(value, str): |
| 115 | res[field] = default_value | 144 | value_res[field] = default_value |
| 116 | else: | 145 | else: |
| 117 | res[field] = value | 146 | value_res[field] = value |
| 147 | coordinates_res[field] = coordinates | ||
| 118 | 148 | ||
| 119 | # 搜索签章 | 149 | # 搜索签章 |
| 120 | tmp_signature_count = dict() | 150 | tmp_signature_info = dict() |
| 151 | signature_coordinates_res = dict() | ||
| 152 | signature_value_res = dict() | ||
| 121 | for signature_dict in signature_res_list: | 153 | for signature_dict in signature_res_list: |
| 122 | if signature_dict['label'] in tmp_signature_count: | 154 | tmp_signature_info.setdefault(signature_dict['label'], list()).append(signature_dict['location']) |
| 123 | tmp_signature_count[signature_dict['label']] += 1 | 155 | |
| 124 | else: | ||
| 125 | tmp_signature_count[signature_dict['label']] = 1 | ||
| 126 | for field, signature_type_set in self.target_fields[self.signature_str].items(): | 156 | for field, signature_type_set in self.target_fields[self.signature_str].items(): |
| 127 | for signature_type in signature_type_set: | 157 | for signature_type in signature_type_set: |
| 128 | if tmp_signature_count.get(signature_type, 0) > 0: | 158 | if len(tmp_signature_info.get(signature_type, [])) > 0: |
| 129 | res[field] = self.signature_have_str | 159 | signature_value_res[field] = self.signature_have_str |
| 130 | tmp_signature_count[signature_type] -= 1 | 160 | signature_coordinates_res[field] = tmp_signature_info[signature_type].pop(0) |
| 131 | break | 161 | break |
| 132 | else: | 162 | else: |
| 133 | res[field] = self.signature_have_not_str | 163 | signature_value_res[field] = self.signature_have_not_str |
| 164 | |||
| 165 | words_result = self.rebuild_res(value_res, coordinates_res) | ||
| 166 | words_result_signature = self.rebuild_res(signature_value_res, signature_coordinates_res, True) | ||
| 167 | words_result.update(words_result_signature) | ||
| 134 | 168 | ||
| 135 | return res | 169 | return {'words_result': words_result} | ... | ... |
-
Please register or sign in to post a comment