fix auth from
Showing
3 changed files
with
57 additions
and
32 deletions
| ... | @@ -18,7 +18,7 @@ print(res) # {'label': '营业执照', 'confidence': 0.988462} | ... | @@ -18,7 +18,7 @@ print(res) # {'label': '营业执照', 'confidence': 0.988462} |
| 18 | from authorization_from import retriever_individuals, retriever_companies | 18 | from authorization_from import retriever_individuals, retriever_companies |
| 19 | 19 | ||
| 20 | # 个人授权书 | 20 | # 个人授权书 |
| 21 | res = retriever_companies.get_target_fields(go_res, signature_res) | 21 | res = retriever_individuals.get_target_fields(go_res, signature_res) |
| 22 | print(res) | 22 | print(res) |
| 23 | # {'words_result': | 23 | # {'words_result': |
| 24 | # {'姓名': {'words': 'xx', 'score': 1, 'location': {'left': 105, 'top': 277, 'width': 60, 'height': 28}}, | 24 | # {'姓名': {'words': 'xx', 'score': 1, 'location': {'left': 105, 'top': 277, 'width': 60, 'height': 28}}, |
| ... | @@ -27,7 +27,7 @@ print(res) | ... | @@ -27,7 +27,7 @@ print(res) |
| 27 | # } | 27 | # } |
| 28 | 28 | ||
| 29 | # 公司授权书 | 29 | # 公司授权书 |
| 30 | # res = retriever_individuals.get_target_fields(go_res, signature_res) | 30 | # res = retriever_companies.get_target_fields(go_res, signature_res) |
| 31 | # print(res) | 31 | # print(res) |
| 32 | 32 | ||
| 33 | # {'words_result': { | 33 | # {'words_result': { | ... | ... |
| ... | @@ -4,8 +4,8 @@ TARGET_FIELD_INDIVIDUALS = { | ... | @@ -4,8 +4,8 @@ TARGET_FIELD_INDIVIDUALS = { |
| 4 | '个人身份证件号码': [('个人身份证件号码', 'top1', {})], | 4 | '个人身份证件号码': [('个人身份证件号码', 'top1', {})], |
| 5 | }, | 5 | }, |
| 6 | 'value': { | 6 | 'value': { |
| 7 | '姓名': ('under', {'left_padding': 1, 'right_padding': 1}, ''), | 7 | '姓名': ('under', {'left_padding': 1, 'right_padding': 1, 'scope': 2}, ''), |
| 8 | '个人身份证件号码': ('under', {'left_padding': 0.5, 'right_padding': 0.5}, '') | 8 | '个人身份证件号码': ('under', {'left_padding': 0.5, 'right_padding': 0.5, 'scope': 2}, '') |
| 9 | }, | 9 | }, |
| 10 | 'signature': { | 10 | 'signature': { |
| 11 | '签字': {'signature', } | 11 | '签字': {'signature', } |
| ... | @@ -19,17 +19,17 @@ TARGET_FIELD_COMPANIES = { | ... | @@ -19,17 +19,17 @@ TARGET_FIELD_COMPANIES = { |
| 19 | ], | 19 | ], |
| 20 | '经销商代码-宝马中国': [ | 20 | '经销商代码-宝马中国': [ |
| 21 | ('经销商代码', 'top1', {}), | 21 | ('经销商代码', 'top1', {}), |
| 22 | ('宝马中国', 'right', {'top_padding': 1.5, 'bottom_padding': 0}) | 22 | ('宝马中国', 'right', {'top_padding': 1.5, 'bottom_padding': 0, 'scope': 2}) |
| 23 | ], | 23 | ], |
| 24 | '管理人员姓名-总经理': [ | 24 | '管理人员姓名-总经理': [ |
| 25 | ('管理人员姓名', 'top1', {}), | 25 | ('管理人员姓名', 'top1', {}), |
| 26 | ('总经理', 'right', {'top_padding': 1, 'bottom_padding': 0}) | 26 | ('总经理', 'right', {'top_padding': 1, 'bottom_padding': 0, 'scope': 2}) |
| 27 | ], | 27 | ], |
| 28 | }, | 28 | }, |
| 29 | 'value': { | 29 | 'value': { |
| 30 | '经销商名称': ('right', {'top_padding': 1, 'bottom_padding': 1}, ''), | 30 | '经销商名称': ('right', {'top_padding': 1, 'bottom_padding': 1, 'scope': 4}, ''), |
| 31 | '经销商代码-宝马中国': ('right', {'top_padding': 0.5, 'bottom_padding': 0.5}, ''), | 31 | '经销商代码-宝马中国': ('right', {'top_padding': 0.5, 'bottom_padding': 0.5, 'scope': 3, 'value_type': 'int'}, ''), |
| 32 | '管理人员姓名-总经理': ('right', {'top_padding': 0.5, 'bottom_padding': 0.5}, '') | 32 | '管理人员姓名-总经理': ('right', {'top_padding': 0.5, 'bottom_padding': 0.5, 'scope': 5}, '') |
| 33 | }, | 33 | }, |
| 34 | 'signature': { | 34 | 'signature': { |
| 35 | '公司公章': {'circle', }, | 35 | '公司公章': {'circle', }, | ... | ... |
| ... | @@ -8,6 +8,11 @@ class Retriever: | ... | @@ -8,6 +8,11 @@ class Retriever: |
| 8 | self.signature_have_not_str = '无' | 8 | self.signature_have_not_str = '无' |
| 9 | self.target_fields = target_fields | 9 | self.target_fields = target_fields |
| 10 | self.key_text_set = self.get_key_text_set(target_fields) | 10 | self.key_text_set = self.get_key_text_set(target_fields) |
| 11 | self.replace_map = { | ||
| 12 | 'int': { | ||
| 13 | '(': '0' | ||
| 14 | } | ||
| 15 | } | ||
| 11 | 16 | ||
| 12 | def get_key_text_set(self, target_fields): | 17 | def get_key_text_set(self, target_fields): |
| 13 | # 关键词集合 | 18 | # 关键词集合 |
| ... | @@ -24,60 +29,80 @@ class Retriever: | ... | @@ -24,60 +29,80 @@ class Retriever: |
| 24 | return coordinates_list[0] | 29 | return coordinates_list[0] |
| 25 | 30 | ||
| 26 | @staticmethod | 31 | @staticmethod |
| 27 | def key_right(coordinates_list, key_coordinates, top_padding, bottom_padding): | 32 | def key_right(coordinates_list, key_coordinates, top_padding, bottom_padding, scope): |
| 28 | # 关键词查找方向:右侧 | 33 | # 关键词查找方向:右侧 |
| 29 | if len(coordinates_list) == 1: | 34 | if len(coordinates_list) == 1: |
| 30 | return coordinates_list[0] | 35 | return coordinates_list[0] |
| 31 | height = key_coordinates[-1] - key_coordinates[1] | 36 | height = key_coordinates[-1] - key_coordinates[1] |
| 32 | y_min = key_coordinates[1] - (top_padding * height) | 37 | y_min = key_coordinates[1] - (top_padding * height) |
| 33 | y_max = key_coordinates[-1] + (bottom_padding * height) | 38 | y_max = key_coordinates[-1] + (bottom_padding * height) |
| 34 | x = key_coordinates[2] | ||
| 35 | 39 | ||
| 36 | x_min = None | 40 | width = key_coordinates[2] - key_coordinates[0] |
| 41 | x_min = key_coordinates[2] | ||
| 42 | x_max = key_coordinates[2] + (width * scope) | ||
| 43 | |||
| 44 | x_min_find = None | ||
| 37 | key_coordinates = None | 45 | key_coordinates = None |
| 38 | for x0, y0, x1, y1 in coordinates_list: | 46 | for x0, y0, x1, y1 in coordinates_list: |
| 39 | if y0 > y_min and y1 < y_max and x0 > x: | 47 | cent_x = x0 + ((x1 - x0) / 2) |
| 40 | if x_min is None or x0 < x_min: | 48 | cent_y = y0 + ((y1 - y0) / 2) |
| 41 | x_min = x0 | 49 | if x_min < cent_x < x_max and y_min < cent_y < y_max: |
| 50 | if x_min_find is None or x0 < x_min_find: | ||
| 51 | x_min_find = x0 | ||
| 42 | key_coordinates = (x0, y0, x1, y1) | 52 | key_coordinates = (x0, y0, x1, y1) |
| 43 | return key_coordinates | 53 | return key_coordinates |
| 44 | 54 | ||
| 45 | @staticmethod | 55 | def value_right(self, go_res, key_coordinates, top_padding, bottom_padding, scope, value_type=None): |
| 46 | def value_right(go_res, key_coordinates, top_padding, bottom_padding): | ||
| 47 | # 字段值查找方向:右侧 | 56 | # 字段值查找方向:右侧 |
| 48 | height = key_coordinates[-1] - key_coordinates[1] | 57 | height = key_coordinates[-1] - key_coordinates[1] |
| 49 | y_min = key_coordinates[1] - (top_padding * height) | 58 | y_min = key_coordinates[1] - (top_padding * height) |
| 50 | y_max = key_coordinates[-1] + (bottom_padding * height) | 59 | y_max = key_coordinates[-1] + (bottom_padding * height) |
| 51 | x = key_coordinates[2] | ||
| 52 | 60 | ||
| 53 | x_min = None | 61 | width = key_coordinates[2] - key_coordinates[0] |
| 62 | x_min = key_coordinates[2] | ||
| 63 | x_max = key_coordinates[2] + (width * scope) | ||
| 64 | |||
| 65 | x_min_find = None | ||
| 54 | value = None | 66 | value = None |
| 55 | coordinates = None | 67 | coordinates = None |
| 56 | for (x0, y0, _, _, x1, y1, _, _), text in go_res.values(): | 68 | for (x0, y0, _, _, x1, y1, _, _), text in go_res.values(): |
| 57 | if y0 > y_min and y1 < y_max and x0 > x: | 69 | cent_x = x0 + ((x1 - x0) / 2) |
| 58 | if x_min is None or x0 < x_min: | 70 | cent_y = y0 + ((y1 - y0) / 2) |
| 59 | x_min = x0 | 71 | if x_min < cent_x < x_max and y_min < cent_y < y_max: |
| 60 | value = text | 72 | if x_min_find is None or x0 < x_min_find: |
| 61 | coordinates = (x0, y0, x1, y1) | 73 | if len(text.strip()) > 0: |
| 74 | x_min_find = x0 | ||
| 75 | value = text | ||
| 76 | coordinates = (x0, y0, x1, y1) | ||
| 77 | |||
| 78 | if isinstance(value_type, str) and value_type in self.replace_map and isinstance(value, str): | ||
| 79 | new_value = value.translate(str.maketrans(self.replace_map.get(value_type, {}))) | ||
| 80 | return new_value, coordinates | ||
| 62 | return value, coordinates | 81 | return value, coordinates |
| 63 | 82 | ||
| 64 | @staticmethod | 83 | @staticmethod |
| 65 | def value_under(go_res, key_coordinates, left_padding, right_padding): | 84 | def value_under(go_res, key_coordinates, left_padding, right_padding, scope, value_type=None): |
| 66 | # 字段值查找方向:下方 | 85 | # 字段值查找方向:下方 |
| 67 | width = key_coordinates[2] - key_coordinates[0] | 86 | width = key_coordinates[2] - key_coordinates[0] |
| 68 | x_min = key_coordinates[0] - (width * left_padding) | 87 | x_min = key_coordinates[0] - (width * left_padding) |
| 69 | x_max = key_coordinates[2] + (width * right_padding) | 88 | x_max = key_coordinates[2] + (width * right_padding) |
| 70 | y = key_coordinates[-1] | ||
| 71 | 89 | ||
| 72 | y_min = None | 90 | height = key_coordinates[-1] - key_coordinates[1] |
| 91 | y_min = key_coordinates[-1] | ||
| 92 | y_max = key_coordinates[-1] + (height * scope) | ||
| 93 | |||
| 94 | y_min_find = None | ||
| 73 | value = None | 95 | value = None |
| 74 | coordinates = None | 96 | coordinates = None |
| 75 | for (x0, y0, _, _, x1, y1, _, _), text in go_res.values(): | 97 | for (x0, y0, _, _, x1, y1, _, _), text in go_res.values(): |
| 76 | if x0 > x_min and x1 < x_max and y0 > y: | 98 | cent_x = x0 + ((x1 - x0)/2) |
| 77 | if y_min is None or y0 < y_min: | 99 | cent_y = y0 + ((y1 - y0)/2) |
| 78 | y_min = y0 | 100 | if x_min < cent_x < x_max and y_min < cent_y < y_max: |
| 79 | value = text | 101 | if y_min_find is None or y0 < y_min_find: |
| 80 | coordinates = (x0, y0, x1, y1) | 102 | if len(text.strip()) > 0: |
| 103 | y_min_find = y0 | ||
| 104 | value = text | ||
| 105 | coordinates = (x0, y0, x1, y1) | ||
| 81 | return value, coordinates | 106 | return value, coordinates |
| 82 | 107 | ||
| 83 | @staticmethod | 108 | @staticmethod | ... | ... |
-
Please register or sign in to post a comment