1cfd8e70 by 周伟奇

dda part 2

1 parent 86fd122c
......@@ -1204,6 +1204,7 @@ DDA_IC_ID = 'customer_id'
DDA_BC_NAME = 'account_name'
DDA_BC_ID = 'account_id'
DDA_IMG_PATH = 'img_path'
DDA_PRO = 'pro'
DDA_MAPPING = [
......
......@@ -23,7 +23,7 @@ from apps.doc.ocr.edms import EDMS, rh
from apps.doc.named_enum import KeywordsType
from apps.doc.exceptions import EDMSException, OCR1Exception, OCR2Exception, OCR4Exception
from apps.doc.ocr.wb import BSWorkbook
from apps.doc.models import DocStatus, HILDoc, AFCDoc, Keywords, HILOCRResult, AFCOCRResult
from apps.doc.models import DocStatus, HILDoc, AFCDoc, Keywords, HILOCRResult, AFCOCRResult, DDARecords, IDBCRecords
from celery_compare.tasks import compare
......@@ -43,6 +43,10 @@ class Command(BaseCommand, LoggerMixin):
self.img_queue_size = int(conf.IMG_QUEUE_SIZE)
# 数据目录
self.data_dir = conf.DATA_DIR
# DDA目录
self.dda_dir = os.path.join(self.data_dir, 'HIL', 'DDA')
self.dda_complete_dir = os.path.join(self.dda_dir, 'complete')
self.dda_wanting_dir = os.path.join(self.dda_dir, 'wanting')
# ocr相关
self.ocr_1_urls = conf.get_namespace('OCR_URL_1_')
self.ocr_url_2 = conf.OCR_URL_2
......@@ -189,7 +193,7 @@ class Command(BaseCommand, LoggerMixin):
else:
res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY))
def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path, is_hil, hil_id_bc_mapping):
def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path, do_dda, dda_id_bc_mapping):
# 类别:'0'身份证, '1'居住证
license_data = ocr_data.get('data')
if not license_data:
......@@ -197,12 +201,14 @@ class Command(BaseCommand, LoggerMixin):
return
res_list.append((pno, ino, part_idx, consts.RES_SUCCESS))
if classify == consts.DDA_CLASSIFY: # DDA处理
pro = ocr_data.get('confidence')
dda_ocr_result = {
consts.DDA_IC_NAME: license_data.get('result', {}).get(consts.DDA_IC_NAME, {}).get('words', ''),
consts.DDA_IC_ID: license_data.get('result', {}).get(consts.DDA_IC_ID, {}).get('words', ''),
consts.DDA_BC_NAME: license_data.get('result', {}).get(consts.DDA_BC_NAME, {}).get('words', ''),
consts.DDA_BC_ID: license_data.get('result', {}).get(consts.DDA_BC_ID, {}).get('words', ''),
consts.DDA_IMG_PATH: img_path
consts.DDA_IMG_PATH: img_path,
consts.DDA_PRO: pro
}
license_summary.setdefault(classify, []).append(dda_ocr_result)
......@@ -290,17 +296,14 @@ class Command(BaseCommand, LoggerMixin):
id_card_dict[consts.IC_TURE_OR_FALSE] = consts.IC_RES_MAPPING.get(card_type)
finally:
if is_hil:
for key in consts.IC_KEY_FIELD:
if not isinstance(id_card_dict.get(key), str):
break
value = id_card_dict[key].strip()
if len(value) > 0:
hil_id_bc_mapping.setdefault(consts.IC_FIELD, dict()).setdefault(
value, set()).add(img_path)
if do_dda and isinstance(id_card_dict.get(consts.IC_KEY_FIELD[0]), str) and isinstance(id_card_dict.get(consts.IC_KEY_FIELD[1]), str):
ic_name = id_card_dict[consts.IC_KEY_FIELD[0]].strip()
ic_id = id_card_dict[consts.IC_KEY_FIELD[1]].strip()
if len(ic_name) > 0 and ic_id > 0:
dda_id_bc_mapping.setdefault(consts.IC_FIELD, []).append((ic_name, ic_id, img_path))
license_summary.setdefault(classify, []).extend(license_data)
def license2_process(self, ocr_res_2, license_summary, pid, classify, res_list, pno, ino, part_idx, img_path, is_hil, hil_id_bc_mapping):
def license2_process(self, ocr_res_2, license_summary, pid, classify, res_list, pno, ino, part_idx, img_path, do_dda, dda_id_bc_mapping):
if ocr_res_2.get('ErrorCode') in consts.SUCCESS_CODE_SET:
res_list.append((pno, ino, part_idx, consts.RES_SUCCESS))
if pid == consts.BC_PID:
......@@ -309,11 +312,10 @@ class Command(BaseCommand, LoggerMixin):
# for en_key, chn_key in consts.BC_FIELD:
# res_dict[chn_key] = ocr_res_2.get(en_key, '')
license_summary.setdefault(classify, []).append(ocr_res_2)
if is_hil and isinstance(ocr_res_2.get(consts.BC_KEY_FIELD), str):
value = ocr_res_2[consts.BC_KEY_FIELD].strip()
if len(value) > 0:
hil_id_bc_mapping.setdefault(consts.BC_FIELD, dict()).setdefault(
value, set()).add(img_path)
if do_dda and isinstance(ocr_res_2.get(consts.BC_KEY_FIELD), str):
bc_no = ocr_res_2[consts.BC_KEY_FIELD].strip()
if len(bc_no) > 0:
dda_id_bc_mapping.setdefault(consts.BC_FIELD, []).append((bc_no, img_path))
else:
# 营业执照等
for result_dict in ocr_res_2.get('ResultList', []):
......@@ -723,13 +725,14 @@ class Command(BaseCommand, LoggerMixin):
doc_id = int(doc_id_str)
doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc
is_hil = True if business_type == consts.HIL_PREFIX else False
hil_id_bc_mapping = dict()
dda_id_bc_mapping = dict()
doc_data_path = os.path.join(self.data_dir, business_type, consts.TMP_DIR_NAME, doc_id_str)
excel_path = os.path.join(doc_data_path, '{0}.xlsx'.format(doc_id_str))
try:
doc = doc_class.objects.filter(id=doc_id).first()
do_dda = is_hil and doc.document_scheme == consts.DOC_SCHEME_LIST[1]
except Exception as e:
self.online_log.error('{0} [process error (db filter)] [task={1}] [error={2}]'.format(
self.log_base, task_str, traceback.format_exc()))
......@@ -771,7 +774,7 @@ class Command(BaseCommand, LoggerMixin):
continue
elif classify in consts.LICENSE_CLASSIFY_SET_1: # 证件1
self.license1_process(ocr_data, license_summary, classify, res_list, pno,
ino, part_idx, img_path, is_hil, hil_id_bc_mapping)
ino, part_idx, img_path, do_dda, dda_id_bc_mapping)
elif classify in consts.LICENSE_CLASSIFY_SET_2: # 证件2
pid, _, _, _, _, _ = consts.LICENSE_CLASSIFY_MAPPING.get(classify)
file_data = ocr_data.get('section_img')
......@@ -818,7 +821,7 @@ class Command(BaseCommand, LoggerMixin):
ocr_2_res['Name'] = name
self.license2_process(ocr_2_res, license_summary, pid, classify,
res_list, pno, ino, part_idx, img_path,
is_hil, hil_id_bc_mapping)
do_dda, dda_id_bc_mapping)
break
else:
res_list.append((pno, ino, part_idx, consts.RES_FAILED_2))
......@@ -921,31 +924,6 @@ class Command(BaseCommand, LoggerMixin):
finally:
# TODO 识别结果存一张表,方便跑报表
# DDA处理
if is_hil:
try:
# 获取需要保存图片的集合
hil_dda_save_img_list = []
dda_res_list = license_summary.get(consts.DDA_CLASSIFY, [])
self.online_log.info('{0} [DDA process] [DDA_info={1}]'.format(self.log_base, dda_res_list))
for dda_idx, dda_res in enumerate(dda_res_list):
save_img_dict = {
consts.DDA_FIELD: dda_res.get(consts.DDA_IMG_PATH)
}
for dda_field, license_field in consts.DDA_MAPPING:
target_str = dda_res.get(dda_field, '')
save_img_dict.setdefault(license_field, set()).update(
hil_id_bc_mapping.get(license_field, dict()).get(target_str, set()))
hil_dda_save_img_list.append(save_img_dict)
self.online_log.info('{0} [DDA process] [ic&bc_info={1}]'.format(self.log_base, hil_id_bc_mapping))
self.online_log.info('{0} [DDA process] [img_path={1}]'.format(self.log_base, hil_dda_save_img_list))
except Exception as e:
pass
# 保存图片
# 数据库记录
# report记录
# CA比对
if doc.document_scheme == consts.DOC_SCHEME_LIST[0]:
try:
......@@ -995,6 +973,208 @@ class Command(BaseCommand, LoggerMixin):
else:
self.online_log.info('{0} [comparison info send success] [task={1}] '
'[res_id={2}]'.format(self.log_base, task_str, res_obj.id))
# DDA处理
if do_dda:
# 入库
try:
dda_record = DDARecords.objects.filter(
application_id=doc.application_id).first()
if dda_record is None:
dda_record = DDARecords(application_id=doc.application_id)
except Exception as e:
self.online_log.error('{0} [process error (dda db get)] [task={1}] '
'[error={2}]'.format(self.log_base, task_str,
traceback.format_exc()))
else:
try:
if not dda_record.all_found:
found_time = timezone.now()
move_img_path_dict = dict()
ic_res_list = dda_id_bc_mapping.get(consts.IC_FIELD, [])
bc_res_list = dda_id_bc_mapping.get(consts.BC_FIELD, [])
self.online_log.info('{0} [dda process] [task={1}] [ic={2}] '
'[bc={3}]'.format(self.log_base, task_str, ic_res_list,
bc_res_list))
if not dda_record.is_dda_found:
try:
# DDA过滤,获取有效DDA
best_dda_res = None
dda_res_list = license_summary.get(consts.DDA_CLASSIFY, [])
if len(dda_res_list) > 0:
dda_res_list.sort(key=lambda x: x.get(consts.DDA_PRO, 0),
reverse=True)
tmp_best_dda_res = dda_res_list[0]
if tmp_best_dda_res.get(consts.DDA_PRO, 0) >= 0.6:
best_dda_res = tmp_best_dda_res
self.online_log.info(
'{0} [dda process] [task={1}] [dda={2}]'.format(
self.log_base, task_str, dda_res_list))
except Exception as e:
best_dda_res = None
dda_record.is_dda_found = False if best_dda_res is None else True
if dda_record.is_dda_found:
dda_path = best_dda_res.get(consts.DDA_IMG_PATH, '')
customer_name = best_dda_res.get(consts.DDA_IC_NAME, '')
customer_id = best_dda_res.get(consts.DDA_IC_ID, '')
account_id = best_dda_res.get(consts.DDA_BC_ID, '')
dda_record.dda_path = dda_path
dda_record.dda_found_time = found_time
dda_record.customer_name = customer_name
dda_record.customer_id = customer_id
dda_record.account_id = account_id
# move
move_img_path_dict.setdefault(consts.DDA_FIELD, set()).add(dda_path)
if dda_record.is_dda_found:
try:
if not dda_record.is_id_found:
for ic_name, ic_id, ic_img_path in ic_res_list:
if ic_id == dda_record.customer_id \
or ic_name == dda_record.customer_name:
dda_record.is_id_found = True
dda_record.id_path = ic_img_path
dda_record.id_found_time = found_time
move_img_path_dict.setdefault(consts.IC_FIELD, set()).add(
ic_img_path)
break
else:
id_record = IDBCRecords.objects.filter(
application_id=doc.application_id,
target_id=dda_record.customer_id,
is_id=True).first()
if id_record is None:
id_record = IDBCRecords.objects.filter(
application_id=doc.application_id,
target_name=dda_record.customer_name,
is_id=True).first()
if id_record is not None:
dda_record.is_id_found = True
dda_record.id_path = id_record.file_path
dda_record.id_found_time = id_record.create_time
move_img_path_dict.setdefault(consts.IC_FIELD, set()).add(
id_record.file_path)
except Exception as e:
self.online_log.error(
'{0} [process error (dda id process)] [task={1}] '
'[error={2}]'.format(self.log_base, task_str,
traceback.format_exc()))
try:
if not dda_record.is_bc_found:
for bc_no, bc_img_path in bc_res_list:
if bc_no == dda_record.account_id:
dda_record.is_bc_found = True
dda_record.bc_path = bc_img_path
dda_record.bc_found_time = found_time
move_img_path_dict.setdefault(consts.BC_FIELD, set()).add(
bc_img_path)
break
else:
bc_record = IDBCRecords.objects.filter(
application_id=doc.application_id,
target_id=dda_record.account_id,
is_id=False).first()
if bc_record is not None:
dda_record.is_bc_found = True
dda_record.bc_path = bc_record.file_path
dda_record.bc_found_time = bc_record.create_time
move_img_path_dict.setdefault(consts.BC_FIELD, set()).add(
bc_record.file_path)
except Exception as e:
self.online_log.error(
'{0} [process error (dda bc process)] [task={1}] '
'[error={2}]'.format(self.log_base, task_str,
traceback.format_exc()))
if dda_record.is_dda_found and dda_record.is_id_found and dda_record.is_bc_found:
dda_record.all_found = True
dda_record.save()
# 图片移动
try:
if len(move_img_path_dict) > 0:
self.online_log.info(
'{0} [dda process] [task={1}] [move_img_path={2}]'.format(
self.log_base, task_str, move_img_path_dict))
wanting_dir = os.path.join(self.dda_wanting_dir, doc.application_id)
wanting_dir_exists = os.path.isdir(wanting_dir)
if dda_record.all_found:
target_dir = os.path.join(self.dda_complete_dir, doc.application_id)
if wanting_dir_exists:
shutil.move(wanting_dir, target_dir)
else:
os.makedirs(target_dir, exist_ok=True)
else:
target_dir = wanting_dir
if not wanting_dir_exists:
os.makedirs(target_dir, exist_ok=True)
for prefix, path_set in move_img_path_dict.items():
for idx, path in enumerate(path_set):
if os.path.isfile(path):
file_name = '{0}_{1}{2}'.format(
prefix, idx, os.path.splitext(path)[-1])
target_path = os.path.join(target_dir, file_name)
shutil.copyfile(path, target_path)
else:
self.online_log.warn(
'{0} [dda process] [img path empty] [task={1}] '
'[path={2}]'.format(self.log_base, task_str, path))
except Exception as e:
self.online_log.error(
'{0} [process error (dda img move)] [task={1}] '
'[error={2}]'.format(self.log_base, task_str,
traceback.format_exc()))
# id & bc 入库
try:
if not dda_record.is_dda_found:
ic_set = set()
bc_set = set()
for ic_name, ic_id, ic_img_path in ic_res_list:
query_str = '{0}{1}'.format(ic_name, ic_id)
if query_str in ic_set:
continue
ic_set.add(query_str)
IDBCRecords.objects.create(
application_id=doc.application_id,
target_name=ic_name,
target_id=ic_id,
is_id=True,
file_path=ic_img_path
)
for bc_no, bc_img_path in bc_res_list:
if bc_no in bc_set:
continue
bc_set.add(bc_no)
IDBCRecords.objects.create(
application_id=doc.application_id,
target_id=bc_no,
is_id=False,
file_path=bc_img_path
)
except Exception as e:
self.online_log.error(
'{0} [process error (dda id&bc db save)] [task={1}] '
'[error={2}]'.format(self.log_base, task_str,
traceback.format_exc()))
# TODO report
except Exception as e:
self.online_log.error('{0} [process error (dda process)] [task={1}] '
'[error={2}]'.format(self.log_base, task_str,
traceback.format_exc()))
finally:
try:
img_save_path = os.path.join(doc_data_path, 'img')
......
......@@ -267,3 +267,52 @@ class HILOCRResult(models.Model):
managed = False
db_table = 'hil_ocr_result'
# DDA ID & BC ----> HIL SE 专有
class DDARecords(models.Model):
id = models.AutoField(primary_key=True, verbose_name="id") # 主键
application_id = models.CharField(max_length=64, verbose_name="申请id") # 索引
is_dda_found = models.BooleanField(default=False, verbose_name="DDA是否找到")
is_id_found = models.BooleanField(default=False, verbose_name="身份证是否找到")
is_bc_found = models.BooleanField(default=False, verbose_name="银行卡是否找到")
all_found = models.BooleanField(default=False, verbose_name="是否全找到")
dda_path = models.CharField(null=True, max_length=1024, verbose_name="DDA图片路径")
id_path = models.CharField(null=True, max_length=1024, verbose_name="身份证图片路径")
bc_path = models.CharField(null=True, max_length=1024, verbose_name="银行卡图片路径")
customer_name = models.CharField(null=True, max_length=1024, verbose_name="DDA身份证姓名")
customer_id = models.CharField(null=True, max_length=1024, verbose_name="DDA身份证号码")
account_id = models.CharField(null=True, max_length=1024, verbose_name="DDA银行卡号")
dda_found_time = models.DateTimeField(null=True, verbose_name='DDA时间')
id_found_time = models.DateTimeField(null=True, verbose_name='身份证时间')
bc_found_time = models.DateTimeField(null=True, verbose_name='银行卡时间')
update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间') # 索引
create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间') # 索引
class Meta:
managed = False
db_table = 'dda_records'
class IDBCRecords(models.Model):
id = models.AutoField(primary_key=True, verbose_name="id") # 主键
application_id = models.CharField(max_length=64, verbose_name="申请id") # 索引
target_name = models.CharField(null=True, max_length=1024, verbose_name="DDA身份证姓名") # 与申请号联合索引
target_id = models.CharField(max_length=1024, verbose_name="DDA身份证号码or银行卡号") # 与申请号联合索引
is_id = models.BooleanField(default=True, verbose_name="身份证or银行卡")
file_path = models.CharField(max_length=1024, verbose_name="图片路径")
create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间')
class Meta:
managed = False
db_table = 'idbc_records'
......
import pyodbc
hil_sql = """
create table dda_records
(
id bigint identity
primary key,
application_id nvarchar(64) not null,
is_dda_found bit default 0 not null,
is_id_found bit default 0 not null,
is_bc_found bit default 0 not null,
all_found bit default 0 not null,
dda_path nvarchar(1024),
id_path nvarchar(1024),
bc_path nvarchar(1024),
customer_name nvarchar(1024),
customer_id nvarchar(1024),
account_id nvarchar(1024),
dda_found_time datetime,
id_found_time datetime,
bc_found_time datetime,
update_time datetime not null,
create_time datetime not null,
);
create index dda_records_application_id_index
on dda_records (application_id);
create index dda_records_update_time_index
on dda_records (update_time);
create index dda_records_create_time_index
on dda_records (create_time);
create table idbc_records
(
id bigint identity
primary key,
application_id nvarchar(64) not null,
target_name nvarchar(1024),
target_id nvarchar(1024) not null,
is_id bit default 1 not null,
file_path nvarchar(1024) not null,
create_time datetime not null,
);
create index idbc_records_application_id_index
on idbc_records (application_id);
create index idbc_records_application_id_target_name_index
on idbc_records (application_id, target_name);
create index idbc_records_application_id_target_id_index
on idbc_records (application_id, target_id);
"""
hil_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit=True)
hil_cursor = hil_cnxn.cursor()
hil_cursor.execute(hil_sql)
hil_cursor.close()
hil_cnxn.close()
# afc_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit=True)
# afc_cursor = afc_cnxn.cursor()
# afc_cursor.execute(afc_sql)
# afc_cursor.close()
# afc_cnxn.close()
\ No newline at end of file
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!