3b1e6657 by 周伟奇

fix folder

1 parent 6e9f7b32
......@@ -61,72 +61,114 @@ class Command(BaseCommand, LoggerMixin):
def signal_handler(self, sig, frame):
self.switch = False # 停止处理文件
def license1_process(self, ocr_data, license_summary, classify, img_path):
def license1_process(self, ocr_data, license_summary, classify):
# 类别:'0'身份证, '1'居住证
license_data = ocr_data.get('data', [])
license_data = ocr_data.get('data')
if not license_data:
return
if classify == consts.MVC_CLASSIFY: # 车辆登记证 3/4页结果整合
for mvc_dict in license_data:
try:
mvc_page = mvc_dict.pop('page')
except Exception as e:
pass
else:
if isinstance(license_data, dict):
license_data.pop('base64_img', '')
# 保单
if classify == consts.INSURANCE_CLASSIFY:
product_result = ['', '', '']
for product in license_data.get('result', {}).get('productList', []):
name = product.get('name', {}).get('words', '')
if name.find('机动车损失') != -1:
product_result[0] = product.get('coverage', {}).get('words', '')
product_result[2] = product.get('deductible_franchise', {}).get('words', '')
elif name.find('第三者责任') != -1:
product_result[1] = product.get('coverage', {}).get('words', '')
special_str = license_data.get('result', {}).get('1stBeneficiary', {}).get('words', '')
special = '无'
if special_str.find('宝马') != -1 or special_str.find('先锋国际融资租赁有限公司') != -1:
special = '有'
insurance_ocr_result = {
'被保险人姓名': license_data.get('result', {}).get('insured', {}).get('name', {}).get('words', ''),
'被保险人证件号码': license_data.get('result', {}).get('insured', {}).get('certiCode', {}).get('words', ''),
'车架号': license_data.get('result', {}).get('vehicle', {}).get('VIN', {}).get('words', ''),
'机动车损失保险金额': product_result[0],
'机动车第三者责任保险金额': product_result[1],
'机动车损失保险绝对免赔率/绝对免赔额': product_result[2],
'保险费合计': license_data.get('result', {}).get('premiumSum', {}).get('words', ''),
'保险起始日期': license_data.get('result', {}).get('startDate', {}).get('words', ''),
'保险截止日期': license_data.get('result', {}).get('endDate', {}).get('words', ''),
'保单章': license_data.get('result', {}).get('seal', {}).get('words', ''),
'特别约定第一受益人': special,
}
license_summary.setdefault(classify, []).append(insurance_ocr_result)
# DDA
elif classify == consts.DDA_CLASSIFY:
pro = ocr_data.get('confidence', 0)
if pro < consts.DDA_PRO_MIN:
return
dda_ocr_result = {}
for key, value in license_data.get('result', {}).items():
dda_ocr_result[key] = value.get('words', '')
dda_ocr_result[consts.DDA_PRO] = pro
license_summary.setdefault(classify, []).append(dda_ocr_result)
# 抵押登记豁免函
elif classify == consts.HMH_CLASSIFY:
hmh_ocr_result = {}
for key, value in license_data.get('words_result', {}).items():
hmh_ocr_result[key] = value.get('words', '')
license_summary.setdefault(classify, []).append(hmh_ocr_result)
# 二手车交易凭证
elif classify == consts.JYPZ_CLASSIFY:
jypz_ocr_result = {}
for key, value in license_data.get('result', {}).items():
jypz_ocr_result[key] = value.get('words', '')
license_summary.setdefault(classify, []).append(jypz_ocr_result)
# 车辆登记证 3/4页结果整合
elif classify == consts.MVC_CLASSIFY:
rebuild_data_dict = {}
mvc_page = license_data.pop('page', 'VehicleRCI')
mvc_res = license_data.pop('results', {})
if mvc_page == 'VehicleRegArea':
mvc_res = mvc_dict.pop('results', {})
mvc_dict['机动车登记证书编号'] = mvc_res.get('register_no', {}).get('words', '')
for register_info in mvc_res.get('register_info', []):
for detail_dict in register_info.get('details', {}).values():
mvc_dict.setdefault(detail_dict.get('chinese_key', '未知'), []).append(
rebuild_data_dict['机动车登记证书编号'] = mvc_res.get('机动车登记证书编号', {}).get('words', '')
for register_info in mvc_res.get('登记信息', []):
register_info.pop('register_type', None)
register_info.pop('register_type_name', None)
for cn_key, detail_dict in register_info.items():
rebuild_data_dict.setdefault(cn_key, []).append(
detail_dict.get('words', ''))
del mvc_res
if classify == consts.IC_CLASSIFY:
for id_card_dict in license_data:
try:
base64_img = id_card_dict.pop('base64_img')
except Exception as e:
continue
else:
card_type = -1
json_data_4 = {
'mode': 1,
'user_info': {
'image_content': base64_img,
},
'options': {
'distinguish_type': 1,
'auto_rotate': True,
},
}
for times in range(consts.RETRY_TIMES):
try:
start_time = time.time()
ocr_4_response = requests.post(self.ocr_url_4, json=json_data_4)
if ocr_4_response.status_code != 200:
raise OCR4Exception('ocr_4 status code: {0}'.format(ocr_4_response.status_code))
except Exception as e:
self.folder_log.warn(
'{0} [ocr_4 failed] [times={1}] [img_path={2}] [error={3}]'.format(
self.log_base, times, img_path, traceback.format_exc()))
else:
ocr_4_res = ocr_4_response.json()
end_time = time.time()
speed_time = int(end_time - start_time)
if ocr_4_res.get('code') == 0 and ocr_4_res.get('result', {}).get('rtn') == 0:
card_type = ocr_4_res.get('result', {}).get(
'idcard_distinguish_result', {}).get('result', -1)
for cn_key, detail_dict in mvc_res.items():
rebuild_data_dict[cn_key] = detail_dict.get('words', '')
del mvc_res
license_summary.setdefault(classify, []).append(rebuild_data_dict)
self.folder_log.info(
'{0} [ocr_4 success] [img_path={1}] [speed_time={2}]'.format(
self.log_base, img_path, speed_time))
break
# 身份证真伪
elif classify == consts.IC_CLASSIFY:
id_card_dict = {}
card_type = license_data.get('type', '')
is_ic = card_type.startswith('身份证')
is_info_side = card_type.endswith('信息面')
id_card_dict['类别'] = '0' if is_ic else '1'
if is_ic:
field_map = consts.IC_MAP_0 if is_info_side else consts.IC_MAP_1
else:
self.folder_log.warn(
'{0} [ocr_4 failed] [img_path={1}]'.format(self.log_base, img_path))
field_map = consts.RP_MAP_0 if is_info_side else consts.RP_MAP_1
for write_field, search_field in field_map:
id_card_dict[write_field] = license_data.get('words_result', {}).get(search_field, {}).get('words', '')
if not is_info_side:
start_time = license_data.get('words_result', {}).get('签发日期', {}).get('words', '')
end_time = license_data.get('words_result', {}).get('失效日期', {}).get('words', '')
id_card_dict['有效期限'] = '{0}-{1}'.format(start_time, end_time)
id_card_dict[consts.IC_TURE_OR_FALSE] = consts.IC_RES_MAPPING.get(card_type)
id_card_dict[consts.IC_TURE_OR_FALSE] = consts.IC_RES_MAPPING.get(1)
license_summary.setdefault(classify, []).append(id_card_dict)
# 购车发票 & 二手车发票
elif classify == consts.MVI_CLASSIFY or classify == consts.UCI_CLASSIFY:
rebuild_data_dict = {}
mvi_res = license_data.pop('result', {})
for en_key, detail_dict in mvi_res.items():
rebuild_data_dict[detail_dict.get('chinese_key', '')] = detail_dict.get('words', '')
rebuild_data_dict['新旧版式'] = license_data.get('layout', '')
license_summary.setdefault(classify, []).append(rebuild_data_dict)
# 其他
else:
license_summary.setdefault(classify, []).extend(license_data)
@staticmethod
......@@ -166,7 +208,7 @@ class Command(BaseCommand, LoggerMixin):
if isinstance(data_list, list):
for ocr_data in data_list:
# part_idx = part_idx + 1
self.license1_process(ocr_data, license_summary, classify, img_path)
self.license1_process(ocr_data, license_summary, classify)
wb = BSWorkbook(set(), set(), set(), set(), set())
wb.simple_license_rebuild(license_summary, consts.DOC_SCHEME_LIST[0])
......
......@@ -48,6 +48,7 @@ class Finder:
"其他约定与条件英文": "",
"其他约定与条件中文": "",
}
def get_line(self, ocr_results, key_string):
# 根据指定关键词, 找出与关键词同处一行的字符
top, bottom = -1, -1
......@@ -69,6 +70,7 @@ class Finder:
line_text = sorted(line_text, key=lambda x: x[0][0], reverse=False)
lines = ''.join([i[1] for i in line_text])
return lines
def page_predict(self, ocr_results, page_template):
classes = []
for pno in ocr_results:
......@@ -82,6 +84,7 @@ class Finder:
classes.append([pno, score])
pred = sorted(classes, key=lambda x: x[1], reverse=True)[0]
return pred
def get_top_key(self, ocr_results, key_string): # 加入过滤词典
"""找到与 key_string 最匹配的字段的 key
"""
......@@ -90,6 +93,7 @@ class Finder:
ratio_list = [[fuzz.ratio(key_string, ocr_results[key][1]), key] for key in ocr_results]
top_key = sorted(ratio_list, key=lambda x: x[0])[-1]
return top_key
def get_top_iou(self, ocr_results, poly):
"""求最大IoU
"""
......@@ -108,6 +112,7 @@ class Finder:
return -1, -1
top_iou = sorted(iou_list, key=lambda x: x[0])[-1]
return top_iou
def get_key_value(self, ocr_results, key_string):
"""根据 key 查找 value
"""
......@@ -139,6 +144,7 @@ class Finder:
else:
value = words
return value
def get_contract_No(self):
"""提取左上角的合同编号字段
"""
......@@ -153,6 +159,7 @@ class Finder:
# TODO!!!
contract_No_list.append(contract_No)
return contract_No_list
def get_info_in_page_3(self):
"""提取第三页上的经销商名称,和经销商统一社会信用代码或公司注册号
"""
......@@ -178,6 +185,7 @@ class Finder:
words = self.get_key_value(self.ocr_results[pno], '统一社会信用代码或公司注册号')
dealer_No = words.replace('O', '0')
return dealer_name, dealer_No
def get_info_in_page_38(self):
"""提取第38页上的经销商名称
"""
......@@ -195,6 +203,7 @@ class Finder:
words = re.sub(r'[(())盖章《]', "", words)
dealer_name = words
return dealer_name
def get_guarantor(self):
"""提取第10页上保证人段落,所见即所得
"""
......@@ -210,6 +219,7 @@ class Finder:
words = words.replace('【', '[').replace('】', ']').replace(',', ',').replace('(', '(').replace(')', ')')
guarantor = words
return guarantor
def get_info_in_page_39(self):
"""提取综合授信合同上的一些字段
"""
......@@ -291,6 +301,7 @@ class Finder:
deposit_chn = f'{words}%'
return amount_eng, amount_chn, term_start_eng, term_end_eng, \
term_start_chn, term_end_chn, deposit_eng, deposit_chn
def get_other_arrangements_and_conditions(self):
"""获取其它约定与条件文本段落
"""
......@@ -311,6 +322,7 @@ class Finder:
words = searchObj.group(1)
other_arrangements_and_conditions_chn = words
return other_arrangements_and_conditions_eng, other_arrangements_and_conditions_chn
def get_info(self):
# 按照文档页码返回一个合同编号列表,依次表示每一页上识别到的合同编号
contract_No_list = self.get_contract_No()
......@@ -337,6 +349,7 @@ class Finder:
self.init_result["其他约定与条件中文"] = words_chn
return self.init_result
class TIFFHandler:
def __init__(self, path, img_save_path):
......@@ -568,6 +581,7 @@ class Command(BaseCommand, LoggerMixin):
if len(true_file_set) == 0 and len(os_error_filename_set) > 0:
true_file_set.add(os_error_filename_set.pop())
for name in true_file_set:
time.sleep(10) # 防止文件较大时,读取到不完整文件
path = os.path.join(input_dir, name)
try:
......
......@@ -989,7 +989,7 @@ def get_se_cms_compare_info_auto(last_obj, application_entity):
vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[0], hmh_name))
vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[1], hmh_id))
vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[2], first_submission_date))
vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[3], consts.SE_STAMP_VALUE))
# vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[3], consts.SE_STAMP_VALUE))
vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[4], consts.SE_FPL_VALUE))
bhsj = float(amount) / 1.13
vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[5], consts.SPLIT_STR.join([
......@@ -1464,7 +1464,7 @@ def get_se_cms_compare_info(last_obj, application_entity, detect_list):
vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[0], hmh_name))
vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[1], hmh_id))
vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[2], first_submission_date))
vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[3], consts.SE_STAMP_VALUE))
# vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[3], consts.SE_STAMP_VALUE))
vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[4], consts.SE_FPL_VALUE))
bhsj = float(amount) / 1.13
vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[5], consts.SPLIT_STR.join([
......@@ -1499,7 +1499,7 @@ def get_se_cms_compare_info(last_obj, application_entity, detect_list):
gb34_field_input.append((consts.SE_GB_USED_FIELD[1], main_num))
gb34_field_input.append((consts.SE_GB_USED_FIELD[2], first_submission_date))
vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[2], first_submission_date))
vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[3], consts.SE_STAMP_VALUE))
# vehicle_field_input.append((consts.SE_NEW_ADD_FIELD[3], consts.SE_STAMP_VALUE))
jypz_field_input.append(('vinNo', vin_no))
jypz_field_input.append(('vehicleTransactionAmount', amount))
jypz_field_input.append((consts.SE_GB_USED_FIELD[-1], first_submission_date))
......@@ -2683,9 +2683,9 @@ def se_compare_process(compare_info, ocr_res_dict, is_gsyh):
for i in cn_reason_list:
if i in tmp_set:
continue
elif i in consts.BS_REASON:
tmp_set.add(i)
bs_cn_reason_list.append(i)
# elif i in consts.BS_REASON:
# tmp_set.add(i)
# bs_cn_reason_list.append(i)
else:
tmp_set.add(i)
last_cn_reason_list.append(i)
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!