8b0cf91f by 周伟奇

update auth_form

1 parent d29ec34f
TARGET_FIELD_INDIVIDUALS = {
'keys': {
'姓名': [('姓名', 'top1', {})],
'个人身份证件号码': [('个人身份证件号码', 'top1', {})],
'姓名': [('姓名', r'^.?姓名.?$', 'top1', {})],
'个人身份证件号码': [('个人身份证件号码', r'^.?个人身份证件号码.?$', 'top1', {})],
'经销商名称': [('经销商名称', r'^.?经销商名称.*$', 'top1', {})],
},
'value': {
'姓名': ('under', {'left_padding': 1, 'right_padding': 1, 'scope': 2}, ''),
'个人身份证件号码': ('under', {'left_padding': 0.5, 'right_padding': 0.5, 'scope': 2}, '')
'个人身份证件号码': ('under', {'left_padding': 0.5, 'right_padding': 0.5, 'scope': 2}, ''),
'经销商名称': ('under', {'left_padding': 0.5, 'right_padding': 0.5, 'scope': 2}, '')
},
'signature': {
'签字': {'signature', }
......@@ -15,15 +17,15 @@ TARGET_FIELD_INDIVIDUALS = {
TARGET_FIELD_COMPANIES = {
'keys': {
'经销商名称': [
('经销商名称', 'top1', {})
('经销商名称', r'^.?经销商名称.?$', 'top1', {})
],
'经销商代码-宝马中国': [
('经销商代码', 'top1', {}),
('宝马中国', 'right', {'top_padding': 1.5, 'bottom_padding': 0, 'scope': 2})
('经销商代码', r'^.?经销商代码.?$', 'top1', {}),
('宝马中国', r'^.?宝马中国.?$', 'right', {'top_padding': 1.5, 'bottom_padding': 0, 'scope': 2})
],
'管理人员姓名-总经理': [
('管理人员姓名', 'top1', {}),
('总经理', 'right', {'top_padding': 1, 'bottom_padding': 0, 'scope': 2})
('管理人员姓名', r'^.?管理人员姓名.?$', 'top1', {}),
('总经理', r'^.?总经理.?$', 'right', {'top_padding': 1, 'bottom_padding': 0, 'scope': 2})
],
},
'value': {
......@@ -35,4 +37,10 @@ TARGET_FIELD_COMPANIES = {
'公司公章': {'circle', },
'法定代表人签章': {'signature', 'rectangle'}
}
}
\ No newline at end of file
}
todo_list = [
'8点,旋转',
'key的文本多选择done',
'新字段done'
]
......
import re
class Retriever:
def __init__(self, target_fields):
......@@ -7,20 +10,20 @@ class Retriever:
self.signature_have_str = '有'
self.signature_have_not_str = '无'
self.target_fields = target_fields
self.key_text_set = self.get_key_text_set(target_fields)
# self.key_text_set = self.get_key_text_set(target_fields)
self.replace_map = {
'int': {
'(': '0'
}
}
def get_key_text_set(self, target_fields):
# 关键词集合
key_text_set = set()
for key_text_list in target_fields[self.keys_str].values():
for key_text, _, _ in key_text_list:
key_text_set.add(key_text)
return key_text_set
# def get_key_text_set(self, target_fields):
# # 关键词集合
# key_text_set = set()
# for key_text_list in target_fields[self.keys_str].values():
# for key_text, key_re, _, _ in key_text_list:
# key_text_set.add(key_text)
# return key_text_set
@staticmethod
def key_top1(coordinates_list, key_coordinates):
......@@ -132,15 +135,20 @@ class Retriever:
def get_target_fields(self, go_res, signature_res_list):
# 搜索关键词
key_text_info = dict()
for (x0, y0, _, _, x1, y1, _, _), text in go_res.values():
if text in self.key_text_set:
key_text_info.setdefault(text, list()).append((x0, y0, x1, y1))
for key_text_list in self.target_fields[self.keys_str].values():
for key_text, key_re, _, _ in key_text_list:
for (x0, y0, _, _, x1, y1, _, _), text in go_res.values():
if re.match(key_re, text):
key_text_info.setdefault(key_text, list()).append((x0, y0, x1, y1))
# if text in self.key_text_set:
# key_text_info.setdefault(text, list()).append((x0, y0, x1, y1))
# 搜索关键词
key_coordinates_info = dict()
for field, key_text_list in self.target_fields[self.keys_str].items():
pre_key_coordinates = None
for key_text, direction, kwargs in key_text_list:
for key_text, _, direction, kwargs in key_text_list:
if key_text not in key_text_info:
break
key_coordinates = getattr(self, 'key_{0}'.format(direction))(
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!