31656b38 by 周伟奇

DDA part 1

1 parent 779dbb1a
......@@ -520,7 +520,7 @@ OTHER_TUPLE = (None, None, None, None, None, None, None, None, None, None, None,
# "35":"针式打印-部分格线-竖版-邮储银行",
# "36":"针式打印-部分格线-竖版-邮储银行-绿卡",
# "38":"普通打印-无格线-农业银行-整数-特殊",
# "50":"普通打印-无格线-农业银行-整数-特殊",
CLASSIFY_LIST = [
('其他', OTHER_TUPLE),
......@@ -563,6 +563,18 @@ CLASSIFY_LIST = [
('针式打印-部分格线-竖版-邮储银行', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
('针式打印-部分格线-竖版-邮储银行-绿卡', (2, None, 5, 6, None, 4, None, 7, None, None, None, None, None)),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('其他', OTHER_TUPLE),
('普通打印-无格线-农业银行-整数-特殊', (1, None, 3, 4, None, 2, None, 5, None, None, None, None, None)),
]
......@@ -608,6 +620,18 @@ CLASSIFY_HEADER_LIST = [
('序号', '交易日期', '交易渠道', '摘要', '交易金额', '账户余额', '对方账号/卡号/汇票号', '原子账号', '交易机构名称'),
('序号', '交易日期', '交易渠道', '摘要', '交易金额', '账户余额', '对方账号/卡号/汇票号', '原子账号', '交易机构名称'),
OTHER_TUPLE,
OTHER_TUPLE,
OTHER_TUPLE,
OTHER_TUPLE,
OTHER_TUPLE,
OTHER_TUPLE,
OTHER_TUPLE,
OTHER_TUPLE,
OTHER_TUPLE,
OTHER_TUPLE,
OTHER_TUPLE,
OTHER_TUPLE,
OTHER_TUPLE,
('交易日期', '摘要/附言', '交易金额', '账户余额', '对方账号和户名'),
]
......@@ -812,7 +836,7 @@ MVI_FIELD_ORDER = (('发票代码', '发票代码'),
('主管税务机关及代码', '主管税务机关及代码'),
('吨位', '吨位'),
('限乘人数', '限乘人数'),)
IC_PID = VAT_PID = VATS_PID = MVC_PID = MVI_PID = None
IC_PID = VAT_PID = VATS_PID = MVC_PID = MVI_PID = RP_PID = None
# 营业执照
BL_CN_NAME = '营业执照'
......@@ -916,6 +940,10 @@ BC_FIELD_ORDER = (('BankName', '发卡行名称'),
('CardType', '银行卡类型'),
('Name', '持卡人姓名'),)
# DDA
DDA_CN_NAME = 'DDA'
DDA_CLASSIFY = 38
SUCCESS_CODE_SET = {'0', 0}
FIELD_ORDER_MAP = {
......@@ -941,7 +969,7 @@ MODEL_FIELD_VAT = 'vat_count'
LICENSE_ORDER = ((MVI_CLASSIFY, (MVI_PID, MVI_CN_NAME, MVI_FIELD_ORDER, False, False, MODEL_FIELD_MVI)),
(IC_CLASSIFY, (IC_PID, IC_CN_NAME, None, True, False, MODEL_FIELD_IC)),
(RP_CLASSIFY, (None, RP_CN_NAME, None, True, False, MODEL_FIELD_RP)),
(RP_CLASSIFY, (RP_PID, RP_CN_NAME, None, True, False, MODEL_FIELD_RP)),
(BC_CLASSIFY, (BC_PID, BC_CN_NAME, BC_FIELD_ORDER, False, False, MODEL_FIELD_BC)),
(BL_CLASSIFY, (BL_PID, BL_CN_NAME, BL_FIELD_ORDER, False, False, MODEL_FIELD_BL)),
(UCI_CLASSIFY, (UCI_PID, UCI_CN_NAME, UCI_FIELD_ORDER, False, False, MODEL_FIELD_UCI)),
......@@ -960,12 +988,12 @@ FOLDER_LICENSE_ORDER = ((MVI_CLASSIFY, (MVI_PID, MVI_CN_NAME, MVI_FIELD_ORDER, F
LICENSE_CLASSIFY_MAPPING = dict(LICENSE_ORDER)
OTHER_CLASSIFY_SET = {OTHER_CLASSIFY}
LICENSE_CLASSIFY_SET_1 = {IC_CLASSIFY, VAT_CLASSIFY, MVC_CLASSIFY, MVI_CLASSIFY, UCI_CLASSIFY}
LICENSE_CLASSIFY_SET_1 = {IC_CLASSIFY, VAT_CLASSIFY, MVC_CLASSIFY, MVI_CLASSIFY, UCI_CLASSIFY, DDA_CLASSIFY}
LICENSE_CLASSIFY_SET_2 = {BL_CLASSIFY, EEP_CLASSIFY, DL_CLASSIFY, PP_CLASSIFY, BC_CLASSIFY}
NYYH_CLASSIFY = {17, 18}
NYZS_CLASSIFY = 18
SPECIAL_NYZS_CLASSIFY = 38
SPECIAL_NYZS_CLASSIFY = 50
MS_CLASSIFY = 21
MS_ERROR_COL = (5, 6)
WECHART_CLASSIFY = 12
......@@ -1163,5 +1191,24 @@ UC_ORDER = ('vinNo', 'manufactureDate', 'firstRegistrationDate')
CO_ORDER = ('customerType', 'customerChineseName', 'legalRepName', 'idNum', 'businessLicenseNo', 'taxRegistrationCode',
'incorporationDate', 'businessLicenseDueDate', 'capitalRegAmount')
# --------------- DDA 保存图片 --------------------
DDA_FIELD = 'DDA'
IC_FIELD = 'ID'
BC_FIELD = 'BC'
IC_KEY_FIELD = ('姓名', '公民身份号码')
BC_KEY_FIELD = 'CardNum'
DDA_IC_NAME = 'customer_name'
DDA_IC_ID = 'customer_id'
DDA_BC_NAME = 'account_name'
DDA_BC_ID = 'account_id'
DDA_IMG_PATH = 'img_path'
DDA_MAPPING = [
(DDA_IC_NAME, IC_FIELD),
(DDA_IC_ID, IC_FIELD),
(DDA_BC_ID, BC_FIELD),
]
......
......@@ -189,13 +189,23 @@ class Command(BaseCommand, LoggerMixin):
else:
res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY))
def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path):
def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path, is_hil, hil_id_bc_mapping):
# 类别:'0'身份证, '1'居住证
license_data = ocr_data.get('data', [])
license_data = ocr_data.get('data')
if not license_data:
res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY))
return
res_list.append((pno, ino, part_idx, consts.RES_SUCCESS))
if classify == consts.DDA_CLASSIFY: # DDA处理
dda_ocr_result = {
consts.DDA_IC_NAME: license_data.get('result', {}).get(consts.DDA_IC_NAME, {}).get('words', ''),
consts.DDA_IC_ID: license_data.get('result', {}).get(consts.DDA_IC_ID, {}).get('words', ''),
consts.DDA_BC_NAME: license_data.get('result', {}).get(consts.DDA_BC_NAME, {}).get('words', ''),
consts.DDA_BC_ID: license_data.get('result', {}).get(consts.DDA_BC_ID, {}).get('words', ''),
consts.DDA_IMG_PATH: img_path
}
license_summary.setdefault(classify, []).append(dda_ocr_result)
if classify == consts.MVC_CLASSIFY: # 车辆登记证 3/4页结果整合
for mvc_dict in license_data:
try:
......@@ -231,6 +241,8 @@ class Command(BaseCommand, LoggerMixin):
mvc_dict['解除抵押日期'].append(
register_info.get('details', {}).get('date', {}).get('words', ''))
del mvc_res
license_summary.setdefault(classify, []).extend(license_data)
if classify == consts.IC_CLASSIFY: # 身份证真伪
for id_card_dict in license_data:
try:
......@@ -277,9 +289,18 @@ class Command(BaseCommand, LoggerMixin):
'{0} [ocr_4 failed] [img_path={1}]'.format(self.log_base, img_path))
id_card_dict[consts.IC_TURE_OR_FALSE] = consts.IC_RES_MAPPING.get(card_type)
finally:
if is_hil:
for key in consts.IC_KEY_FIELD:
if not isinstance(id_card_dict.get(key), str):
break
value = id_card_dict[key].strip()
if len(value) > 0:
hil_id_bc_mapping.setdefault(consts.IC_FIELD, dict()).setdefault(
value, set()).add(img_path)
license_summary.setdefault(classify, []).extend(license_data)
def license2_process(self, ocr_res_2, license_summary, pid, classify, res_list, pno, ino, part_idx):
def license2_process(self, ocr_res_2, license_summary, pid, classify, res_list, pno, ino, part_idx, img_path, is_hil, hil_id_bc_mapping):
if ocr_res_2.get('ErrorCode') in consts.SUCCESS_CODE_SET:
res_list.append((pno, ino, part_idx, consts.RES_SUCCESS))
if pid == consts.BC_PID:
......@@ -288,6 +309,11 @@ class Command(BaseCommand, LoggerMixin):
# for en_key, chn_key in consts.BC_FIELD:
# res_dict[chn_key] = ocr_res_2.get(en_key, '')
license_summary.setdefault(classify, []).append(ocr_res_2)
if is_hil and isinstance(ocr_res_2.get(consts.BC_KEY_FIELD), str):
value = ocr_res_2[consts.BC_KEY_FIELD].strip()
if len(value) > 0:
hil_id_bc_mapping.setdefault(consts.BC_FIELD, dict()).setdefault(
value, set()).add(img_path)
else:
# 营业执照等
for result_dict in ocr_res_2.get('ResultList', []):
......@@ -696,6 +722,8 @@ class Command(BaseCommand, LoggerMixin):
business_type, doc_id_str = task_str.split(consts.SPLIT_STR)
doc_id = int(doc_id_str)
doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc
is_hil = True if business_type == consts.HIL_PREFIX else False
hil_id_bc_mapping = dict()
doc_data_path = os.path.join(self.data_dir, business_type, consts.TMP_DIR_NAME, doc_id_str)
excel_path = os.path.join(doc_data_path, '{0}.xlsx'.format(doc_id_str))
......@@ -742,8 +770,8 @@ class Command(BaseCommand, LoggerMixin):
res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_OTHER))
continue
elif classify in consts.LICENSE_CLASSIFY_SET_1: # 证件1
self.license1_process(ocr_data, license_summary, classify, res_list,
pno, ino, part_idx, img_path)
self.license1_process(ocr_data, license_summary, classify, res_list, pno,
ino, part_idx, img_path, is_hil, hil_id_bc_mapping)
elif classify in consts.LICENSE_CLASSIFY_SET_2: # 证件2
pid, _, _, _, _, _ = consts.LICENSE_CLASSIFY_MAPPING.get(classify)
file_data = ocr_data.get('section_img')
......@@ -788,7 +816,9 @@ class Command(BaseCommand, LoggerMixin):
card_name_res.get('data', {}).get('is_exists_name') == 0:
name = '无'
ocr_2_res['Name'] = name
self.license2_process(ocr_2_res, license_summary, pid, classify, res_list, pno, ino, part_idx)
self.license2_process(ocr_2_res, license_summary, pid, classify,
res_list, pno, ino, part_idx, img_path,
is_hil, hil_id_bc_mapping)
break
else:
res_list.append((pno, ino, part_idx, consts.RES_FAILED_2))
......@@ -890,6 +920,29 @@ class Command(BaseCommand, LoggerMixin):
os.remove(excel_path)
finally:
# TODO 识别结果存一张表,方便跑报表
# DDA处理
if is_hil:
# 获取需要保存图片的集合
hil_dda_save_img_list = []
for dda_res_list in license_summary.get(consts.DDA_CLASSIFY, []):
for dda_idx, dda_res in enumerate(dda_res_list):
save_img_dict = {
consts.DDA_FIELD: dda_res.get(consts.DDA_IMG_PATH)
}
for dda_field, license_field in consts.DDA_MAPPING:
target_str = dda_res.get(dda_field, '')
save_img_dict.setdefault(license_field, set()).update(
hil_id_bc_mapping.get(license_field, dict()).get(target_str, set()))
hil_dda_save_img_list.append(save_img_dict)
self.online_log.info('{0} [DDA process] [DDA_info={1}]'.format(self.log_base, license_summary.get(consts.DDA_CLASSIFY, [])))
self.online_log.info('{0} [DDA process] [ic&bc_info={1}]'.format(self.log_base, hil_id_bc_mapping))
self.online_log.info('{0} [DDA process] [img_path={1}]'.format(self.log_base, hil_dda_save_img_list))
# 保存图片
# 数据库记录
# report记录
# CA比对
if doc.document_scheme == consts.DOC_SCHEME_LIST[0]:
try:
# 更新OCR累计识别结果表
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!