3d620b3b by 周伟奇

add slice

1 parent 84d79e06
......@@ -250,6 +250,7 @@ class Command(BaseCommand, LoggerMixin):
page_num_only = page_num
rebuild_page_info = []
text_key = 'words'
position_key = 'position'
for key, value in contract_dict.get('page_info', {}).items():
if value is None:
rebuild_page_info.append((key, ))
......@@ -279,11 +280,17 @@ class Command(BaseCommand, LoggerMixin):
contract_result.setdefault(classify, dict()).setdefault(page_num_only, []).append(rebuild_page_info)
page_compare_dict = {}
page_compare_dict = {
consts.IMG_PATH_KEY: img_path,
consts.ALL_POSITION_KEY: {},
}
for key, value in contract_dict.get('page_info', {}).items():
if not isinstance(value, dict):
continue
elif text_key in value:
position_list = value.get(position_key, [])
page_compare_dict[consts.ALL_POSITION_KEY][key] = position_list if isinstance(position_list, list) else []
if value[text_key] is None:
page_compare_dict[key] = ''
elif isinstance(value[text_key], str):
......@@ -292,16 +299,47 @@ class Command(BaseCommand, LoggerMixin):
page_compare_dict[key] = value[text_key]
else:
page_compare_dict[key] = {}
page_compare_dict[consts.ALL_POSITION_KEY][key] = {}
for sub_key, sub_value in value.items():
position_list = sub_value.get(position_key, [])
page_compare_dict[consts.ALL_POSITION_KEY][key][sub_key] = position_list if isinstance(
position_list, list) else []
if sub_value[text_key] is None:
page_compare_dict[key][sub_key] = ''
elif isinstance(sub_value[text_key], str):
page_compare_dict[key][sub_key] = sub_value[text_key]
page_compare_dict[consts.IMG_PATH_KEY] = img_path
contract_result_compare.setdefault(classify, dict())[consts.ASP_KEY] = contract_dict.get(consts.ASP_KEY, False)
# "position" = [xmin, ymin, xmax, ymax]
contract_result_compare.setdefault(classify, dict())[page_num_only] = page_compare_dict
@staticmethod
def rebuild_position(src_position):
# 'position': {'left': 470, 'top': 671, 'right': 542, 'bottom': 694}
# 'width'='right-left', 'height'='bottom-top'
# 'position': {'left': 470, 'top': 671, 'width': 542, 'height': 694}
try:
left = src_position.get('left', 0)
top = src_position.get('top', 0)
right = src_position.get('right', 0)
bottom = src_position.get('bottom', 0)
width = right - left
height = bottom - top
return {
'left': left,
'top': top,
'width': width,
'height': height,
}
except Exception as e:
return {
'left': 0,
'top': 0,
'width': 0,
'height': 0,
}
def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path, do_dda,
dda_id_bc_mapping):
# 类别:'0'身份证, '1'居住证
......@@ -329,6 +367,7 @@ class Command(BaseCommand, LoggerMixin):
# 保单
if classify == consts.INSURANCE_CLASSIFY:
product_result = ['', '', '']
product_result_position = [dict(), dict(), dict()]
min_char_count_1 = 1000
min_char_count_2 = 1000
for product in license_data.get('result', {}).get('productList', []):
......@@ -338,10 +377,16 @@ class Command(BaseCommand, LoggerMixin):
min_char_count_1 = len(name)
product_result[0] = product.get('coverage', {}).get('words', '')
product_result[2] = product.get('deductible_franchise', {}).get('words', '')
product_result_position[0] = self.rebuild_position(product.get('coverage', {}).get(
'position', {}))
product_result_position[2] = self.rebuild_position(product.get('deductible_franchise', {}).get(
'position', {}))
elif name.find('第三者责任') != -1:
if len(name) < min_char_count_2:
min_char_count_2 = len(name)
product_result[1] = product.get('coverage', {}).get('words', '')
product_result_position[1] = self.rebuild_position(product.get('coverage', {}).get(
'position', {}))
special_str = license_data.get('result', {}).get('1stBeneficiary', {}).get('words', '')
special = '无'
......@@ -362,11 +407,29 @@ class Command(BaseCommand, LoggerMixin):
consts.IMG_PATH_KEY: img_path,
consts.SECTION_IMG_PATH_KEY: section_img_path,
}
# 'position': {'left': 470, 'top': 671, 'right': 542, 'bottom': 694}
# position_dict = {
# '被保险人姓名': {consts.FIELD_POSITION_KEY: {}}
# }
# insurance_ocr_result[consts.ALL_POSITION_KEY] = position_dict
position_dict = {
'被保险人姓名': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
'insured', {}).get('name', {}).get('position', {}))},
'被保险人证件号码': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
'insured', {}).get('certiCode', {}).get('position', {}))},
'车架号': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
'vehicle', {}).get('VIN', {}).get('position', {}))},
'机动车损失保险金额': {consts.FIELD_POSITION_KEY: product_result_position[0]},
'机动车第三者责任保险金额': {consts.FIELD_POSITION_KEY: product_result_position[1]},
'机动车损失保险绝对免赔率/绝对免赔额': {consts.FIELD_POSITION_KEY: product_result_position[2]},
'保险费合计': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
'premiumSum', {}).get('position', {}))},
'保险起始日期': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
'startDate', {}).get('position', {}))},
'保险截止日期': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
'endDate', {}).get('position', {}))},
'保单章': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
'seal', {}).get('position', {}))},
'特别约定第一受益人': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
'1stBeneficiary', {}).get('position', {}))},
}
insurance_ocr_result[consts.ALL_POSITION_KEY] = position_dict
license_summary.setdefault(classify, []).append(insurance_ocr_result)
# DDA
elif classify == consts.DDA_CLASSIFY:
......@@ -873,11 +936,24 @@ class Command(BaseCommand, LoggerMixin):
res[key] = page_info_dict.get(str(pno), {}).get(key1, '')
res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get(
consts.IMG_PATH_KEY, '')
res.setdefault(consts.ALL_POSITION_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get(
consts.ALL_POSITION_KEY, {}).get(key1, [])
else:
res[key] = page_info_dict.get(str(pno), {}).get(key1, {}).get(key2, '')
res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get(
consts.IMG_PATH_KEY, '')
res.setdefault(consts.ALL_POSITION_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get(
consts.ALL_POSITION_KEY, {}).get(key1, {}).get(key2, [])
# res = {
# 'key': 'list or str',
# 'uniq_img_path_key': {
# 'key': 'str',
# },
# 'uniq_all_position_key': {
# 'key': 'list'
# }
# }
license_summary[classify] = [res]
else:
res = {}
......
......@@ -2405,11 +2405,13 @@ def se_contract_compare(license_en, ocr_res_dict, strip_list, is_gsyh):
result_field_list = []
field_img_path_dict = dict()
ocr_res = dict()
if ocr_res_str is not None:
ocr_res_list = json.loads(ocr_res_str)
ocr_res = ocr_res_list.pop()
for name, value in strip_list:
# 购置税校验
if name == consts.SE_AFC_CON_FIELD[21]:
if len(value) == 3:
reason = []
......@@ -2471,6 +2473,29 @@ def se_contract_compare(license_en, ocr_res_dict, strip_list, is_gsyh):
result_field_list.append((name, value, consts.RESULT_N, empty_str, empty_str, ErrorType.NF.value,
'{0}未找到'.format(license_en)))
if ocr_res_str is not None:
img_map = {}
for name, _, result, _, img_path, _, _ in result_field_list:
if result == consts.RESULT_N:
img_map.setdefault(img_path, []).append(name)
for path, field_list in img_map.items():
if os.path.exists(path):
pre, suf = os.path.splitext(path)
last_img = cv2.imread(path)
for field_idx, field in enumerate(field_list):
try:
save_path = '{0}_{1}{2}'.format(pre, str(field_idx), suf)
section_position_list = ocr_res.get(consts.ALL_POSITION_KEY, {}).get(field, [])
if isinstance(section_position_list, list) and len(section_position_list) == 4:
field_img = last_img[section_position_list[1]: section_position_list[3],
section_position_list[0]: section_position_list[2], :]
cv2.imwrite(save_path, field_img)
field_img_path_dict[field] = save_path
else:
field_img_path_dict[field] = path
except Exception as e:
field_img_path_dict[field] = path
return result_field_list, field_img_path_dict
......
......@@ -788,6 +788,7 @@ class Finder:
items = []
start = False
page = None
greater_equal_v35 = False
for pno in self.pdf_info:
condition = False
for block in self.pdf_info[f'{pno}']['blocks']:
......@@ -796,6 +797,8 @@ class Finder:
for line in block['lines']:
for span in line['spans']:
bbox, text = span['bbox'], span['text']
if text == '租赁利率':
greater_equal_v35 = True
if '总计' in text:
start = True
if '注:出租人向承租人购买租赁车辆的对价' in text:
......@@ -804,9 +807,14 @@ class Finder:
if start == True:
items.append(text)
lines = [['项目', '购买价格', '实际融资金额']]
for i in range(len(items) // 3):
line = [items[2 + i * 3 + 0], items[2 + i * 3 + 1], items[2 + i * 3 + 2]]
lines.append(line)
if greater_equal_v35:
for i in range(len(items) // 4):
line = [items[2 + i * 4 + 0], items[2 + i * 4 + 1], items[2 + i * 4 + 2]]
lines.append(line)
else:
for i in range(len(items) // 3):
line = [items[2 + i * 3 + 0], items[2 + i * 3 + 1], items[2 + i * 3 + 2]]
lines.append(line)
if len(items) > 0:
lines.append([items[0], '', items[1]])
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!