update auth_form
Showing
2 changed files
with
37 additions
and
21 deletions
1 | TARGET_FIELD_INDIVIDUALS = { | 1 | TARGET_FIELD_INDIVIDUALS = { |
2 | 'keys': { | 2 | 'keys': { |
3 | '姓名': [('姓名', 'top1', {})], | 3 | '姓名': [('姓名', r'^.?姓名.?$', 'top1', {})], |
4 | '个人身份证件号码': [('个人身份证件号码', 'top1', {})], | 4 | '个人身份证件号码': [('个人身份证件号码', r'^.?个人身份证件号码.?$', 'top1', {})], |
5 | '经销商名称': [('经销商名称', r'^.?经销商名称.*$', 'top1', {})], | ||
5 | }, | 6 | }, |
6 | 'value': { | 7 | 'value': { |
7 | '姓名': ('under', {'left_padding': 1, 'right_padding': 1, 'scope': 2}, ''), | 8 | '姓名': ('under', {'left_padding': 1, 'right_padding': 1, 'scope': 2}, ''), |
8 | '个人身份证件号码': ('under', {'left_padding': 0.5, 'right_padding': 0.5, 'scope': 2}, '') | 9 | '个人身份证件号码': ('under', {'left_padding': 0.5, 'right_padding': 0.5, 'scope': 2}, ''), |
10 | '经销商名称': ('under', {'left_padding': 0.5, 'right_padding': 0.5, 'scope': 2}, '') | ||
9 | }, | 11 | }, |
10 | 'signature': { | 12 | 'signature': { |
11 | '签字': {'signature', } | 13 | '签字': {'signature', } |
... | @@ -15,15 +17,15 @@ TARGET_FIELD_INDIVIDUALS = { | ... | @@ -15,15 +17,15 @@ TARGET_FIELD_INDIVIDUALS = { |
15 | TARGET_FIELD_COMPANIES = { | 17 | TARGET_FIELD_COMPANIES = { |
16 | 'keys': { | 18 | 'keys': { |
17 | '经销商名称': [ | 19 | '经销商名称': [ |
18 | ('经销商名称', 'top1', {}) | 20 | ('经销商名称', r'^.?经销商名称.?$', 'top1', {}) |
19 | ], | 21 | ], |
20 | '经销商代码-宝马中国': [ | 22 | '经销商代码-宝马中国': [ |
21 | ('经销商代码', 'top1', {}), | 23 | ('经销商代码', r'^.?经销商代码.?$', 'top1', {}), |
22 | ('宝马中国', 'right', {'top_padding': 1.5, 'bottom_padding': 0, 'scope': 2}) | 24 | ('宝马中国', r'^.?宝马中国.?$', 'right', {'top_padding': 1.5, 'bottom_padding': 0, 'scope': 2}) |
23 | ], | 25 | ], |
24 | '管理人员姓名-总经理': [ | 26 | '管理人员姓名-总经理': [ |
25 | ('管理人员姓名', 'top1', {}), | 27 | ('管理人员姓名', r'^.?管理人员姓名.?$', 'top1', {}), |
26 | ('总经理', 'right', {'top_padding': 1, 'bottom_padding': 0, 'scope': 2}) | 28 | ('总经理', r'^.?总经理.?$', 'right', {'top_padding': 1, 'bottom_padding': 0, 'scope': 2}) |
27 | ], | 29 | ], |
28 | }, | 30 | }, |
29 | 'value': { | 31 | 'value': { |
... | @@ -35,4 +37,10 @@ TARGET_FIELD_COMPANIES = { | ... | @@ -35,4 +37,10 @@ TARGET_FIELD_COMPANIES = { |
35 | '公司公章': {'circle', }, | 37 | '公司公章': {'circle', }, |
36 | '法定代表人签章': {'signature', 'rectangle'} | 38 | '法定代表人签章': {'signature', 'rectangle'} |
37 | } | 39 | } |
38 | } | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
40 | } | ||
41 | |||
42 | todo_list = [ | ||
43 | '8点,旋转', | ||
44 | 'key的文本多选择done', | ||
45 | '新字段done' | ||
46 | ] | ... | ... |
1 | import re | ||
2 | |||
3 | |||
1 | class Retriever: | 4 | class Retriever: |
2 | 5 | ||
3 | def __init__(self, target_fields): | 6 | def __init__(self, target_fields): |
... | @@ -7,20 +10,20 @@ class Retriever: | ... | @@ -7,20 +10,20 @@ class Retriever: |
7 | self.signature_have_str = '有' | 10 | self.signature_have_str = '有' |
8 | self.signature_have_not_str = '无' | 11 | self.signature_have_not_str = '无' |
9 | self.target_fields = target_fields | 12 | self.target_fields = target_fields |
10 | self.key_text_set = self.get_key_text_set(target_fields) | 13 | # self.key_text_set = self.get_key_text_set(target_fields) |
11 | self.replace_map = { | 14 | self.replace_map = { |
12 | 'int': { | 15 | 'int': { |
13 | '(': '0' | 16 | '(': '0' |
14 | } | 17 | } |
15 | } | 18 | } |
16 | 19 | ||
17 | def get_key_text_set(self, target_fields): | 20 | # def get_key_text_set(self, target_fields): |
18 | # 关键词集合 | 21 | # # 关键词集合 |
19 | key_text_set = set() | 22 | # key_text_set = set() |
20 | for key_text_list in target_fields[self.keys_str].values(): | 23 | # for key_text_list in target_fields[self.keys_str].values(): |
21 | for key_text, _, _ in key_text_list: | 24 | # for key_text, key_re, _, _ in key_text_list: |
22 | key_text_set.add(key_text) | 25 | # key_text_set.add(key_text) |
23 | return key_text_set | 26 | # return key_text_set |
24 | 27 | ||
25 | @staticmethod | 28 | @staticmethod |
26 | def key_top1(coordinates_list, key_coordinates): | 29 | def key_top1(coordinates_list, key_coordinates): |
... | @@ -132,15 +135,20 @@ class Retriever: | ... | @@ -132,15 +135,20 @@ class Retriever: |
132 | def get_target_fields(self, go_res, signature_res_list): | 135 | def get_target_fields(self, go_res, signature_res_list): |
133 | # 搜索关键词 | 136 | # 搜索关键词 |
134 | key_text_info = dict() | 137 | key_text_info = dict() |
135 | for (x0, y0, _, _, x1, y1, _, _), text in go_res.values(): | 138 | for key_text_list in self.target_fields[self.keys_str].values(): |
136 | if text in self.key_text_set: | 139 | for key_text, key_re, _, _ in key_text_list: |
137 | key_text_info.setdefault(text, list()).append((x0, y0, x1, y1)) | 140 | for (x0, y0, _, _, x1, y1, _, _), text in go_res.values(): |
141 | if re.match(key_re, text): | ||
142 | key_text_info.setdefault(key_text, list()).append((x0, y0, x1, y1)) | ||
143 | |||
144 | # if text in self.key_text_set: | ||
145 | # key_text_info.setdefault(text, list()).append((x0, y0, x1, y1)) | ||
138 | 146 | ||
139 | # 搜索关键词 | 147 | # 搜索关键词 |
140 | key_coordinates_info = dict() | 148 | key_coordinates_info = dict() |
141 | for field, key_text_list in self.target_fields[self.keys_str].items(): | 149 | for field, key_text_list in self.target_fields[self.keys_str].items(): |
142 | pre_key_coordinates = None | 150 | pre_key_coordinates = None |
143 | for key_text, direction, kwargs in key_text_list: | 151 | for key_text, _, direction, kwargs in key_text_list: |
144 | if key_text not in key_text_info: | 152 | if key_text not in key_text_info: |
145 | break | 153 | break |
146 | key_coordinates = getattr(self, 'key_{0}'.format(direction))( | 154 | key_coordinates = getattr(self, 'key_{0}'.format(direction))( | ... | ... |
-
Please register or sign in to post a comment