3586d37a by 周伟奇

add ltgt

1 parent 634fd497
......@@ -832,6 +832,16 @@ MVC_SE_FIELD_ORDER_3_4 = (
('解除抵押日期', '解除抵押日期'),
)
MVC_SE_FIELD_ORDER_1_2_LTGT = (('1.机动车所有人/身份证名称/号码', '机动车所有人/身份证明名称/号码'),
('编号', '机动车登记证书编号'),)
MVC_SE_FIELD_ORDER_3_4_LTGT = (
('身份证名称/号码', '身份证明名称/号码'),
('机动车登记证书编号', '机动车登记证书编号'),
('抵押登记日期', '抵押登记日期'),
)
# 机动车销售统一发票
MVI_CN_NAME = '机动车销售统一发票'
MVI_CLASSIFY = 29
......@@ -1421,6 +1431,23 @@ SE_DDA_FIELD = ['applicationId(1)', 'applicationId(2)', 'bankName', 'companyName
ASP_KEY = 'is_asp'
AFC_CON_MAP_LTGT = {
'合同编号': (1, 1, '合同编号', None),
'借款人姓名': (2, 2, '借款人及抵押人', 'name'),
'共借人姓名': (2, 2, '共同借款人及共同抵押人', 'name'),
'保证人姓名1': (2, 2, '保证人1', 'name'),
'保证人姓名2': (2, 2, '保证人2', 'name'),
}
AFC_CON_FIELD_ORDER_LTGT = (
('合同编号', '合同编号'),
('借款人姓名', '借款人姓名'),
('共借人姓名', '共借人姓名'),
('保证人姓名1', '保证人姓名1'),
('保证人姓名2', '保证人姓名2'),
)
SE_AFC_CON_MAP = {
'合同编号-每页': (None, None, '合同编号', None),
'所购车辆价格-小写-重要条款': (1, 1, '所购车辆价格', None),
......
......@@ -58,10 +58,11 @@ class Command(BaseCommand, LoggerMixin):
130: '民事调解书'
}
self.sheet_content = {
128: ('执行裁定书', ('承办法院', '案号/标号', '被执行人', '债权金额', '诉讼时间')),
129: ('民事判决书', ('承办法院', '案号/标号', '被告', '判决结果: 贷款本金', '判决结果: 罚息', '判决结果: 律师费', '判决结果: 案件受理费', '诉讼时间')),
130: ('民事调解书', ('承办法院', '案号/标号', '被告', '协议内容: 支付金额', '协议内容: 案件受理费', '诉讼时间')),
128: ['执行裁定书', ['承办法院', '案号/标号', '被执行人', '债权金额', '诉讼时间']],
129: ['民事判决书', ['承办法院', '案号/标号', '被告', '判决结果: 贷款本金', '判决结果: 罚息', '判决结果: 律师费', '判决结果: 案件受理费', '诉讼时间']],
130: ['民事调解书', ['承办法院', '案号/标号', '被告', '协议内容: 支付金额', '协议内容: 案件受理费', '诉讼时间']],
}
self.FILE_KEY = 'file'
self.DATE_KEY = 'date'
self.CLASSIFY_KEY = 'classify'
self.RESULT_KEY = 'result'
......@@ -84,6 +85,14 @@ class Command(BaseCommand, LoggerMixin):
consts.IC_CLASSIFY: (consts.IC_CN_NAME, '有效期限', consts.IC_FIELD_ORDER_3, consts.IC_FIELD_ORDER_2),
consts.MVC_CLASSIFY: (consts.MVC_CN_NAME, '机动车登记证书编号', consts.MVC_SE_FIELD_ORDER_3_4, consts.MVC_SE_FIELD_ORDER_1_2),
}
self.field_map_2 = {
# sheet_name, key_field, side_field_order, src_field_order
consts.CONTRACT_CLASSIFY: (consts.CONTRACT_CN_NAME, None, None, consts.AFC_CON_FIELD_ORDER_LTGT),
consts.VAT_CLASSIFY: (consts.VAT_CN_NAME, None, None, consts.VATS_FIELD_ORDER),
consts.IC_CLASSIFY: (consts.IC_CN_NAME, '有效期限', consts.IC_FIELD_ORDER_3, consts.IC_FIELD_ORDER_2),
consts.MVC_CLASSIFY: (
consts.MVC_CN_NAME, '机动车登记证书编号', consts.MVC_SE_FIELD_ORDER_3_4_LTGT, consts.MVC_SE_FIELD_ORDER_1_2_LTGT),
}
# ocr相关
self.ocr_url = conf.OCR_URL_FOLDER
self.ocr_url_2 = conf.OCR2_URL_FOLDER
......@@ -95,7 +104,7 @@ class Command(BaseCommand, LoggerMixin):
def signal_handler(self, sig, frame):
self.switch = False # 停止处理文件
def contract_process(self, ocr_data, contract_result, classify):
def contract_process(self, ocr_data, contract_result, classify, rebuild_contract_result):
contract_dict = ocr_data.get('data')
if not contract_dict or contract_dict.get('page_num') is None or contract_dict.get('page_info') is None:
return
......@@ -135,6 +144,26 @@ class Command(BaseCommand, LoggerMixin):
contract_result.setdefault(classify, dict()).setdefault(page_num_only, []).append(rebuild_page_info)
page_compare_dict = {}
for key, value in contract_dict.get('page_info', {}).items():
if not isinstance(value, dict):
continue
elif text_key in value:
if value[text_key] is None:
page_compare_dict[key] = ''
elif isinstance(value[text_key], str):
page_compare_dict[key] = value[text_key]
elif isinstance(value[text_key], list):
page_compare_dict[key] = value[text_key]
else:
page_compare_dict[key] = {}
for sub_key, sub_value in value.items():
if sub_value[text_key] is None:
page_compare_dict[key][sub_key] = ''
elif isinstance(sub_value[text_key], str):
page_compare_dict[key][sub_key] = sub_value[text_key]
rebuild_contract_result.setdefault(classify, dict())[page_num_only] = page_compare_dict
def license1_process(self, ocr_data, all_res, classify):
# 类别:'0'身份证, '1'居住证
license_data = ocr_data.get('data')
......@@ -253,7 +282,31 @@ class Command(BaseCommand, LoggerMixin):
seperate_path_map[c] = os.path.join(seperate_dir, new_name)
return img_save_path, excel_path, pdf_save_path, seperate_path_map
def res_process(self, all_res, excel_path, classify, contract_result):
@staticmethod
def all_res_add_contract(all_res, rebuild_contract_result):
for classify, page_info_dict in rebuild_contract_result.items():
res = {}
is_asp = False
for key, (pno_not_asp, pno_asp, key1, key2) in consts.AFC_CON_MAP_LTGT.items():
pno = pno_asp if is_asp else pno_not_asp
if pno is None:
if isinstance(pno_asp, int):
continue
end_idx = 9 if is_asp else 8
for i in range(1, end_idx):
res.setdefault(key, list()).append(page_info_dict.get(str(i), {}).get(key1, ''))
elif key2 is None:
res[key] = page_info_dict.get(str(pno), {}).get(key1, '')
res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get(
consts.IMG_PATH_KEY, '')
else:
res[key] = page_info_dict.get(str(pno), {}).get(key1, {}).get(key2, '')
res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get(
consts.IMG_PATH_KEY, '')
all_res[classify] = [res]
def res_process(self, all_res, excel_path, classify, contract_result, rebuild_contract_result):
try:
wb = BSWorkbook(set(), set(), set(), set(), set())
for c, res_list in all_res.items():
......@@ -274,6 +327,8 @@ class Command(BaseCommand, LoggerMixin):
wb.contract_rebuild(contract_result)
wb.remove_base_sheet()
wb.save(excel_path)
self.all_res_add_contract(all_res, rebuild_contract_result)
except Exception as e:
self.folder_log.error('{0} [wb build error] [path={1}] [error={2}]'.format(
self.log_base, excel_path, traceback.format_exc()))
......@@ -284,7 +339,7 @@ class Command(BaseCommand, LoggerMixin):
sep = os.path.sep + (os.path.altsep or '')
return os.path.basename(path.rstrip(sep))
def ocr_process(self, img_path, classify, all_res, seperate_path_map, contract_result):
def ocr_process(self, img_path, classify, all_res, seperate_path_map, contract_result, rebuild_contract_result):
if os.path.exists(img_path):
# TODO 图片验证
with open(img_path, 'rb') as f:
......@@ -332,7 +387,7 @@ class Command(BaseCommand, LoggerMixin):
elif new_classify in consts.LICENSE_CLASSIFY_SET_2:
self.license2_process(ocr_data, all_res, new_classify, img_path)
elif new_classify in consts.CONTRACT_SET:
self.contract_process(ocr_data, contract_result, new_classify)
self.contract_process(ocr_data, contract_result, new_classify, rebuild_contract_result)
break
else:
self.folder_log.warn('{0} [ocr failed] [img_path={1}]'.format(self.log_base, img_path))
......@@ -395,15 +450,15 @@ class Command(BaseCommand, LoggerMixin):
def images_process(self, img_path_list, classify, excel_path, seperate_path_map):
all_res = dict()
contract_result = dict()
rebuild_contract_result = dict()
for img_path in img_path_list:
self.ocr_process(img_path, classify, all_res, seperate_path_map, contract_result)
self.ocr_process(img_path, classify, all_res, seperate_path_map, contract_result, rebuild_contract_result)
# if len(all_res) > 0:
self.res_process(all_res, excel_path, classify, contract_result)
self.res_process(all_res, excel_path, classify, contract_result, rebuild_contract_result)
return all_res
def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir_map):
if os.path.exists(path):
rebuild_res = None
img_save_path, excel_path, pdf_save_path, seperate_path_map = self.get_path(
name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir_map)
pdf_handler = PDFHandler(path, img_save_path)
......@@ -420,7 +475,9 @@ class Command(BaseCommand, LoggerMixin):
else:
ocr_result = afc_predict(pdf_handler.pdf_info)
contract_result = dict()
rebuild_contract_result = dict()
page_res = {}
all_res = dict()
for page_num, page_info in ocr_result.get('page_info', {}).items():
if isinstance(page_num, str) and page_num.startswith('page_'):
page_res[page_num] = {
......@@ -435,9 +492,10 @@ class Command(BaseCommand, LoggerMixin):
'classify': page_res[page_key].pop('classify', consts.OTHER_CLASSIFY),
'data': page_res[page_key]
}
self.contract_process(ocr_data, contract_result, classify)
self.res_process({}, excel_path, classify, contract_result)
self.contract_process(ocr_data, contract_result, classify, rebuild_contract_result)
self.res_process(all_res, excel_path, classify, contract_result, rebuild_contract_result)
shutil.move(path, pdf_save_path)
return all_res
else:
try:
self.folder_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path))
......@@ -465,7 +523,6 @@ class Command(BaseCommand, LoggerMixin):
def tif_process(self, name, path, classify, img_output_dir, wb_output_dir, tiff_output_dir, seperate_dir_map):
if os.path.exists(path):
rebuild_res = None
try:
img_save_path, excel_path, tiff_save_path, seperate_path_map = self.get_path(
name, img_output_dir, wb_output_dir, tiff_output_dir, seperate_dir_map)
......@@ -490,7 +547,6 @@ class Command(BaseCommand, LoggerMixin):
return rebuild_res
def img_process(self, name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir, seperate_dir_map):
rebuild_res = None
try:
img_save_path, excel_path, _, seperate_path_map = self.get_path(
name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir_map)
......@@ -539,19 +595,27 @@ class Command(BaseCommand, LoggerMixin):
try:
if result[self.CLASSIFY_KEY] in self.sheet_content:
sheet_name, head_fields = self.sheet_content[result[self.CLASSIFY_KEY]]
first_head_row = head_fields
else:
sheet_name, key_field, side_field_order, field_order = self.field_map_2[result[self.CLASSIFY_KEY]]
if key_field is not None and len(side_field_order) > len(field_order):
first_head_row = []
for a, _ in side_field_order:
first_head_row.append(a)
else:
sheet_name, key_field, side_field_order, field_order = self.field_map[result[self.CLASSIFY_KEY]]
first_head_row = []
for a, _ in field_order:
first_head_row.append(a)
if key_field is not None and key_field in result[self.RESULT_KEY]:
head_fields = []
for a, b in side_field_order:
if isinstance(b, str):
for a, _ in side_field_order:
head_fields.append(a)
else:
head_fields = []
for a, b in field_order:
if isinstance(b, str):
for a, _ in field_order:
head_fields.append(a)
row = []
row = [result[self.FILE_KEY]]
for field in head_fields:
ocr_str_or_list = result[self.RESULT_KEY].get(field, '')
if isinstance(ocr_str_or_list, list):
......@@ -563,7 +627,8 @@ class Command(BaseCommand, LoggerMixin):
ws = wb.get_sheet_by_name(sheet_name)
else:
ws = wb.create_sheet(sheet_name)
ws.append(head_fields)
first_head_row.insert(0, '文件名')
ws.append(first_head_row)
ws.append(row)
except Exception as e:
self.folder_log.info('{0} [daily wb failed] [result={1}] [error={2}]'.format(
......@@ -617,6 +682,7 @@ class Command(BaseCommand, LoggerMixin):
if len(true_file_set) == 0 and len(os_error_filename_set) > 0:
true_file_set.add(os_error_filename_set.pop())
for name in true_file_set:
time.sleep(5)
path = os.path.join(input_dir, name)
try:
......@@ -664,7 +730,8 @@ class Command(BaseCommand, LoggerMixin):
{
self.CLASSIFY_KEY: c,
self.RESULT_KEY: res,
self.DATE_KEY: date_str
self.DATE_KEY: date_str,
self.FILE_KEY: name,
}
)
......
......@@ -1067,8 +1067,8 @@ def get_se_cms_compare_info_auto(last_obj, application_entity):
# ('accountNo', account_no),
# ]
# bank_info[consts.DDA_EN] = dda_field_input
# if len(bank_info) > 0:
# compare_info['bankInfo'] = bank_info
if len(bank_info) > 0:
compare_info['bankInfo'] = bank_info
# 银行流水 --------------------------------------------------------------------
if cms_info.get('autoApprovedDetails', {}).get('aaType', '') in ['CAA1', 'CAA2']:
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!