fix auth from
Showing
3 changed files
with
57 additions
and
32 deletions
... | @@ -18,7 +18,7 @@ print(res) # {'label': '营业执照', 'confidence': 0.988462} | ... | @@ -18,7 +18,7 @@ print(res) # {'label': '营业执照', 'confidence': 0.988462} |
18 | from authorization_from import retriever_individuals, retriever_companies | 18 | from authorization_from import retriever_individuals, retriever_companies |
19 | 19 | ||
20 | # 个人授权书 | 20 | # 个人授权书 |
21 | res = retriever_companies.get_target_fields(go_res, signature_res) | 21 | res = retriever_individuals.get_target_fields(go_res, signature_res) |
22 | print(res) | 22 | print(res) |
23 | # {'words_result': | 23 | # {'words_result': |
24 | # {'姓名': {'words': 'xx', 'score': 1, 'location': {'left': 105, 'top': 277, 'width': 60, 'height': 28}}, | 24 | # {'姓名': {'words': 'xx', 'score': 1, 'location': {'left': 105, 'top': 277, 'width': 60, 'height': 28}}, |
... | @@ -27,7 +27,7 @@ print(res) | ... | @@ -27,7 +27,7 @@ print(res) |
27 | # } | 27 | # } |
28 | 28 | ||
29 | # 公司授权书 | 29 | # 公司授权书 |
30 | # res = retriever_individuals.get_target_fields(go_res, signature_res) | 30 | # res = retriever_companies.get_target_fields(go_res, signature_res) |
31 | # print(res) | 31 | # print(res) |
32 | 32 | ||
33 | # {'words_result': { | 33 | # {'words_result': { | ... | ... |
... | @@ -4,8 +4,8 @@ TARGET_FIELD_INDIVIDUALS = { | ... | @@ -4,8 +4,8 @@ TARGET_FIELD_INDIVIDUALS = { |
4 | '个人身份证件号码': [('个人身份证件号码', 'top1', {})], | 4 | '个人身份证件号码': [('个人身份证件号码', 'top1', {})], |
5 | }, | 5 | }, |
6 | 'value': { | 6 | 'value': { |
7 | '姓名': ('under', {'left_padding': 1, 'right_padding': 1}, ''), | 7 | '姓名': ('under', {'left_padding': 1, 'right_padding': 1, 'scope': 2}, ''), |
8 | '个人身份证件号码': ('under', {'left_padding': 0.5, 'right_padding': 0.5}, '') | 8 | '个人身份证件号码': ('under', {'left_padding': 0.5, 'right_padding': 0.5, 'scope': 2}, '') |
9 | }, | 9 | }, |
10 | 'signature': { | 10 | 'signature': { |
11 | '签字': {'signature', } | 11 | '签字': {'signature', } |
... | @@ -19,17 +19,17 @@ TARGET_FIELD_COMPANIES = { | ... | @@ -19,17 +19,17 @@ TARGET_FIELD_COMPANIES = { |
19 | ], | 19 | ], |
20 | '经销商代码-宝马中国': [ | 20 | '经销商代码-宝马中国': [ |
21 | ('经销商代码', 'top1', {}), | 21 | ('经销商代码', 'top1', {}), |
22 | ('宝马中国', 'right', {'top_padding': 1.5, 'bottom_padding': 0}) | 22 | ('宝马中国', 'right', {'top_padding': 1.5, 'bottom_padding': 0, 'scope': 2}) |
23 | ], | 23 | ], |
24 | '管理人员姓名-总经理': [ | 24 | '管理人员姓名-总经理': [ |
25 | ('管理人员姓名', 'top1', {}), | 25 | ('管理人员姓名', 'top1', {}), |
26 | ('总经理', 'right', {'top_padding': 1, 'bottom_padding': 0}) | 26 | ('总经理', 'right', {'top_padding': 1, 'bottom_padding': 0, 'scope': 2}) |
27 | ], | 27 | ], |
28 | }, | 28 | }, |
29 | 'value': { | 29 | 'value': { |
30 | '经销商名称': ('right', {'top_padding': 1, 'bottom_padding': 1}, ''), | 30 | '经销商名称': ('right', {'top_padding': 1, 'bottom_padding': 1, 'scope': 4}, ''), |
31 | '经销商代码-宝马中国': ('right', {'top_padding': 0.5, 'bottom_padding': 0.5}, ''), | 31 | '经销商代码-宝马中国': ('right', {'top_padding': 0.5, 'bottom_padding': 0.5, 'scope': 3, 'value_type': 'int'}, ''), |
32 | '管理人员姓名-总经理': ('right', {'top_padding': 0.5, 'bottom_padding': 0.5}, '') | 32 | '管理人员姓名-总经理': ('right', {'top_padding': 0.5, 'bottom_padding': 0.5, 'scope': 5}, '') |
33 | }, | 33 | }, |
34 | 'signature': { | 34 | 'signature': { |
35 | '公司公章': {'circle', }, | 35 | '公司公章': {'circle', }, | ... | ... |
... | @@ -8,6 +8,11 @@ class Retriever: | ... | @@ -8,6 +8,11 @@ class Retriever: |
8 | self.signature_have_not_str = '无' | 8 | self.signature_have_not_str = '无' |
9 | self.target_fields = target_fields | 9 | self.target_fields = target_fields |
10 | self.key_text_set = self.get_key_text_set(target_fields) | 10 | self.key_text_set = self.get_key_text_set(target_fields) |
11 | self.replace_map = { | ||
12 | 'int': { | ||
13 | '(': '0' | ||
14 | } | ||
15 | } | ||
11 | 16 | ||
12 | def get_key_text_set(self, target_fields): | 17 | def get_key_text_set(self, target_fields): |
13 | # 关键词集合 | 18 | # 关键词集合 |
... | @@ -24,60 +29,80 @@ class Retriever: | ... | @@ -24,60 +29,80 @@ class Retriever: |
24 | return coordinates_list[0] | 29 | return coordinates_list[0] |
25 | 30 | ||
26 | @staticmethod | 31 | @staticmethod |
27 | def key_right(coordinates_list, key_coordinates, top_padding, bottom_padding): | 32 | def key_right(coordinates_list, key_coordinates, top_padding, bottom_padding, scope): |
28 | # 关键词查找方向:右侧 | 33 | # 关键词查找方向:右侧 |
29 | if len(coordinates_list) == 1: | 34 | if len(coordinates_list) == 1: |
30 | return coordinates_list[0] | 35 | return coordinates_list[0] |
31 | height = key_coordinates[-1] - key_coordinates[1] | 36 | height = key_coordinates[-1] - key_coordinates[1] |
32 | y_min = key_coordinates[1] - (top_padding * height) | 37 | y_min = key_coordinates[1] - (top_padding * height) |
33 | y_max = key_coordinates[-1] + (bottom_padding * height) | 38 | y_max = key_coordinates[-1] + (bottom_padding * height) |
34 | x = key_coordinates[2] | ||
35 | 39 | ||
36 | x_min = None | 40 | width = key_coordinates[2] - key_coordinates[0] |
41 | x_min = key_coordinates[2] | ||
42 | x_max = key_coordinates[2] + (width * scope) | ||
43 | |||
44 | x_min_find = None | ||
37 | key_coordinates = None | 45 | key_coordinates = None |
38 | for x0, y0, x1, y1 in coordinates_list: | 46 | for x0, y0, x1, y1 in coordinates_list: |
39 | if y0 > y_min and y1 < y_max and x0 > x: | 47 | cent_x = x0 + ((x1 - x0) / 2) |
40 | if x_min is None or x0 < x_min: | 48 | cent_y = y0 + ((y1 - y0) / 2) |
41 | x_min = x0 | 49 | if x_min < cent_x < x_max and y_min < cent_y < y_max: |
50 | if x_min_find is None or x0 < x_min_find: | ||
51 | x_min_find = x0 | ||
42 | key_coordinates = (x0, y0, x1, y1) | 52 | key_coordinates = (x0, y0, x1, y1) |
43 | return key_coordinates | 53 | return key_coordinates |
44 | 54 | ||
45 | @staticmethod | 55 | def value_right(self, go_res, key_coordinates, top_padding, bottom_padding, scope, value_type=None): |
46 | def value_right(go_res, key_coordinates, top_padding, bottom_padding): | ||
47 | # 字段值查找方向:右侧 | 56 | # 字段值查找方向:右侧 |
48 | height = key_coordinates[-1] - key_coordinates[1] | 57 | height = key_coordinates[-1] - key_coordinates[1] |
49 | y_min = key_coordinates[1] - (top_padding * height) | 58 | y_min = key_coordinates[1] - (top_padding * height) |
50 | y_max = key_coordinates[-1] + (bottom_padding * height) | 59 | y_max = key_coordinates[-1] + (bottom_padding * height) |
51 | x = key_coordinates[2] | ||
52 | 60 | ||
53 | x_min = None | 61 | width = key_coordinates[2] - key_coordinates[0] |
62 | x_min = key_coordinates[2] | ||
63 | x_max = key_coordinates[2] + (width * scope) | ||
64 | |||
65 | x_min_find = None | ||
54 | value = None | 66 | value = None |
55 | coordinates = None | 67 | coordinates = None |
56 | for (x0, y0, _, _, x1, y1, _, _), text in go_res.values(): | 68 | for (x0, y0, _, _, x1, y1, _, _), text in go_res.values(): |
57 | if y0 > y_min and y1 < y_max and x0 > x: | 69 | cent_x = x0 + ((x1 - x0) / 2) |
58 | if x_min is None or x0 < x_min: | 70 | cent_y = y0 + ((y1 - y0) / 2) |
59 | x_min = x0 | 71 | if x_min < cent_x < x_max and y_min < cent_y < y_max: |
60 | value = text | 72 | if x_min_find is None or x0 < x_min_find: |
61 | coordinates = (x0, y0, x1, y1) | 73 | if len(text.strip()) > 0: |
74 | x_min_find = x0 | ||
75 | value = text | ||
76 | coordinates = (x0, y0, x1, y1) | ||
77 | |||
78 | if isinstance(value_type, str) and value_type in self.replace_map and isinstance(value, str): | ||
79 | new_value = value.translate(str.maketrans(self.replace_map.get(value_type, {}))) | ||
80 | return new_value, coordinates | ||
62 | return value, coordinates | 81 | return value, coordinates |
63 | 82 | ||
64 | @staticmethod | 83 | @staticmethod |
65 | def value_under(go_res, key_coordinates, left_padding, right_padding): | 84 | def value_under(go_res, key_coordinates, left_padding, right_padding, scope, value_type=None): |
66 | # 字段值查找方向:下方 | 85 | # 字段值查找方向:下方 |
67 | width = key_coordinates[2] - key_coordinates[0] | 86 | width = key_coordinates[2] - key_coordinates[0] |
68 | x_min = key_coordinates[0] - (width * left_padding) | 87 | x_min = key_coordinates[0] - (width * left_padding) |
69 | x_max = key_coordinates[2] + (width * right_padding) | 88 | x_max = key_coordinates[2] + (width * right_padding) |
70 | y = key_coordinates[-1] | ||
71 | 89 | ||
72 | y_min = None | 90 | height = key_coordinates[-1] - key_coordinates[1] |
91 | y_min = key_coordinates[-1] | ||
92 | y_max = key_coordinates[-1] + (height * scope) | ||
93 | |||
94 | y_min_find = None | ||
73 | value = None | 95 | value = None |
74 | coordinates = None | 96 | coordinates = None |
75 | for (x0, y0, _, _, x1, y1, _, _), text in go_res.values(): | 97 | for (x0, y0, _, _, x1, y1, _, _), text in go_res.values(): |
76 | if x0 > x_min and x1 < x_max and y0 > y: | 98 | cent_x = x0 + ((x1 - x0)/2) |
77 | if y_min is None or y0 < y_min: | 99 | cent_y = y0 + ((y1 - y0)/2) |
78 | y_min = y0 | 100 | if x_min < cent_x < x_max and y_min < cent_y < y_max: |
79 | value = text | 101 | if y_min_find is None or y0 < y_min_find: |
80 | coordinates = (x0, y0, x1, y1) | 102 | if len(text.strip()) > 0: |
103 | y_min_find = y0 | ||
104 | value = text | ||
105 | coordinates = (x0, y0, x1, y1) | ||
81 | return value, coordinates | 106 | return value, coordinates |
82 | 107 | ||
83 | @staticmethod | 108 | @staticmethod | ... | ... |
-
Please register or sign in to post a comment