update auth_form
Showing
2 changed files
with
37 additions
and
21 deletions
| 1 | TARGET_FIELD_INDIVIDUALS = { | 1 | TARGET_FIELD_INDIVIDUALS = { |
| 2 | 'keys': { | 2 | 'keys': { |
| 3 | '姓名': [('姓名', 'top1', {})], | 3 | '姓名': [('姓名', r'^.?姓名.?$', 'top1', {})], |
| 4 | '个人身份证件号码': [('个人身份证件号码', 'top1', {})], | 4 | '个人身份证件号码': [('个人身份证件号码', r'^.?个人身份证件号码.?$', 'top1', {})], |
| 5 | '经销商名称': [('经销商名称', r'^.?经销商名称.*$', 'top1', {})], | ||
| 5 | }, | 6 | }, |
| 6 | 'value': { | 7 | 'value': { |
| 7 | '姓名': ('under', {'left_padding': 1, 'right_padding': 1, 'scope': 2}, ''), | 8 | '姓名': ('under', {'left_padding': 1, 'right_padding': 1, 'scope': 2}, ''), |
| 8 | '个人身份证件号码': ('under', {'left_padding': 0.5, 'right_padding': 0.5, 'scope': 2}, '') | 9 | '个人身份证件号码': ('under', {'left_padding': 0.5, 'right_padding': 0.5, 'scope': 2}, ''), |
| 10 | '经销商名称': ('under', {'left_padding': 0.5, 'right_padding': 0.5, 'scope': 2}, '') | ||
| 9 | }, | 11 | }, |
| 10 | 'signature': { | 12 | 'signature': { |
| 11 | '签字': {'signature', } | 13 | '签字': {'signature', } |
| ... | @@ -15,15 +17,15 @@ TARGET_FIELD_INDIVIDUALS = { | ... | @@ -15,15 +17,15 @@ TARGET_FIELD_INDIVIDUALS = { |
| 15 | TARGET_FIELD_COMPANIES = { | 17 | TARGET_FIELD_COMPANIES = { |
| 16 | 'keys': { | 18 | 'keys': { |
| 17 | '经销商名称': [ | 19 | '经销商名称': [ |
| 18 | ('经销商名称', 'top1', {}) | 20 | ('经销商名称', r'^.?经销商名称.?$', 'top1', {}) |
| 19 | ], | 21 | ], |
| 20 | '经销商代码-宝马中国': [ | 22 | '经销商代码-宝马中国': [ |
| 21 | ('经销商代码', 'top1', {}), | 23 | ('经销商代码', r'^.?经销商代码.?$', 'top1', {}), |
| 22 | ('宝马中国', 'right', {'top_padding': 1.5, 'bottom_padding': 0, 'scope': 2}) | 24 | ('宝马中国', r'^.?宝马中国.?$', 'right', {'top_padding': 1.5, 'bottom_padding': 0, 'scope': 2}) |
| 23 | ], | 25 | ], |
| 24 | '管理人员姓名-总经理': [ | 26 | '管理人员姓名-总经理': [ |
| 25 | ('管理人员姓名', 'top1', {}), | 27 | ('管理人员姓名', r'^.?管理人员姓名.?$', 'top1', {}), |
| 26 | ('总经理', 'right', {'top_padding': 1, 'bottom_padding': 0, 'scope': 2}) | 28 | ('总经理', r'^.?总经理.?$', 'right', {'top_padding': 1, 'bottom_padding': 0, 'scope': 2}) |
| 27 | ], | 29 | ], |
| 28 | }, | 30 | }, |
| 29 | 'value': { | 31 | 'value': { |
| ... | @@ -35,4 +37,10 @@ TARGET_FIELD_COMPANIES = { | ... | @@ -35,4 +37,10 @@ TARGET_FIELD_COMPANIES = { |
| 35 | '公司公章': {'circle', }, | 37 | '公司公章': {'circle', }, |
| 36 | '法定代表人签章': {'signature', 'rectangle'} | 38 | '法定代表人签章': {'signature', 'rectangle'} |
| 37 | } | 39 | } |
| 38 | } | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
| 40 | } | ||
| 41 | |||
| 42 | todo_list = [ | ||
| 43 | '8点,旋转', | ||
| 44 | 'key的文本多选择done', | ||
| 45 | '新字段done' | ||
| 46 | ] | ... | ... |
| 1 | import re | ||
| 2 | |||
| 3 | |||
| 1 | class Retriever: | 4 | class Retriever: |
| 2 | 5 | ||
| 3 | def __init__(self, target_fields): | 6 | def __init__(self, target_fields): |
| ... | @@ -7,20 +10,20 @@ class Retriever: | ... | @@ -7,20 +10,20 @@ class Retriever: |
| 7 | self.signature_have_str = '有' | 10 | self.signature_have_str = '有' |
| 8 | self.signature_have_not_str = '无' | 11 | self.signature_have_not_str = '无' |
| 9 | self.target_fields = target_fields | 12 | self.target_fields = target_fields |
| 10 | self.key_text_set = self.get_key_text_set(target_fields) | 13 | # self.key_text_set = self.get_key_text_set(target_fields) |
| 11 | self.replace_map = { | 14 | self.replace_map = { |
| 12 | 'int': { | 15 | 'int': { |
| 13 | '(': '0' | 16 | '(': '0' |
| 14 | } | 17 | } |
| 15 | } | 18 | } |
| 16 | 19 | ||
| 17 | def get_key_text_set(self, target_fields): | 20 | # def get_key_text_set(self, target_fields): |
| 18 | # 关键词集合 | 21 | # # 关键词集合 |
| 19 | key_text_set = set() | 22 | # key_text_set = set() |
| 20 | for key_text_list in target_fields[self.keys_str].values(): | 23 | # for key_text_list in target_fields[self.keys_str].values(): |
| 21 | for key_text, _, _ in key_text_list: | 24 | # for key_text, key_re, _, _ in key_text_list: |
| 22 | key_text_set.add(key_text) | 25 | # key_text_set.add(key_text) |
| 23 | return key_text_set | 26 | # return key_text_set |
| 24 | 27 | ||
| 25 | @staticmethod | 28 | @staticmethod |
| 26 | def key_top1(coordinates_list, key_coordinates): | 29 | def key_top1(coordinates_list, key_coordinates): |
| ... | @@ -132,15 +135,20 @@ class Retriever: | ... | @@ -132,15 +135,20 @@ class Retriever: |
| 132 | def get_target_fields(self, go_res, signature_res_list): | 135 | def get_target_fields(self, go_res, signature_res_list): |
| 133 | # 搜索关键词 | 136 | # 搜索关键词 |
| 134 | key_text_info = dict() | 137 | key_text_info = dict() |
| 135 | for (x0, y0, _, _, x1, y1, _, _), text in go_res.values(): | 138 | for key_text_list in self.target_fields[self.keys_str].values(): |
| 136 | if text in self.key_text_set: | 139 | for key_text, key_re, _, _ in key_text_list: |
| 137 | key_text_info.setdefault(text, list()).append((x0, y0, x1, y1)) | 140 | for (x0, y0, _, _, x1, y1, _, _), text in go_res.values(): |
| 141 | if re.match(key_re, text): | ||
| 142 | key_text_info.setdefault(key_text, list()).append((x0, y0, x1, y1)) | ||
| 143 | |||
| 144 | # if text in self.key_text_set: | ||
| 145 | # key_text_info.setdefault(text, list()).append((x0, y0, x1, y1)) | ||
| 138 | 146 | ||
| 139 | # 搜索关键词 | 147 | # 搜索关键词 |
| 140 | key_coordinates_info = dict() | 148 | key_coordinates_info = dict() |
| 141 | for field, key_text_list in self.target_fields[self.keys_str].items(): | 149 | for field, key_text_list in self.target_fields[self.keys_str].items(): |
| 142 | pre_key_coordinates = None | 150 | pre_key_coordinates = None |
| 143 | for key_text, direction, kwargs in key_text_list: | 151 | for key_text, _, direction, kwargs in key_text_list: |
| 144 | if key_text not in key_text_info: | 152 | if key_text not in key_text_info: |
| 145 | break | 153 | break |
| 146 | key_coordinates = getattr(self, 'key_{0}'.format(direction))( | 154 | key_coordinates = getattr(self, 'key_{0}'.format(direction))( | ... | ... |
-
Please register or sign in to post a comment