dda part 2

周伟奇
Showing 4 changed files with 342 additions and 45 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/models.py
src/common/tools/mssql_script4.py
--- a/src/apps/doc/consts.py
View file @1cfd8e7
+++ b/src/apps/doc/consts.py
View file @1cfd8e7
@@ -1204,6 +1204,7 @@ DDA_IC_ID = 'customer_id'
 DDA_BC_NAME = 'account_name'
 DDA_BC_ID = 'account_id'
 DDA_IMG_PATH = 'img_path'
+DDA_PRO = 'pro'


 DDA_MAPPING = [
--- a/src/apps/doc/management/commands/ocr_process.py
View file @1cfd8e7
+++ b/src/apps/doc/management/commands/ocr_process.py
View file @1cfd8e7
@@ -23,7 +23,7 @@ from apps.doc.ocr.edms import EDMS, rh
 from apps.doc.named_enum import KeywordsType
 from apps.doc.exceptions import EDMSException, OCR1Exception, OCR2Exception, OCR4Exception
 from apps.doc.ocr.wb import BSWorkbook
-from apps.doc.models import DocStatus, HILDoc, AFCDoc, Keywords, HILOCRResult, AFCOCRResult
+from apps.doc.models import DocStatus, HILDoc, AFCDoc, Keywords, HILOCRResult, AFCOCRResult, DDARecords, IDBCRecords
 from celery_compare.tasks import compare


@@ -43,6 +43,10 @@ class Command(BaseCommand, LoggerMixin):
        self.img_queue_size = int(conf.IMG_QUEUE_SIZE)
        # 数据目录
        self.data_dir = conf.DATA_DIR
+        # DDA目录
+        self.dda_dir = os.path.join(self.data_dir, 'HIL', 'DDA')
+        self.dda_complete_dir = os.path.join(self.dda_dir, 'complete')
+        self.dda_wanting_dir = os.path.join(self.dda_dir, 'wanting')
        # ocr相关
        self.ocr_1_urls = conf.get_namespace('OCR_URL_1_')
        self.ocr_url_2 = conf.OCR_URL_2
@@ -189,7 +193,7 @@ class Command(BaseCommand, LoggerMixin):
        else:
            res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY))

-    def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path, is_hil, hil_id_bc_mapping):
+    def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path, do_dda, dda_id_bc_mapping):
        # 类别：'0'身份证， '1'居住证
        license_data = ocr_data.get('data')
        if not license_data:
@@ -197,12 +201,14 @@ class Command(BaseCommand, LoggerMixin):
            return
        res_list.append((pno, ino, part_idx, consts.RES_SUCCESS))
        if classify == consts.DDA_CLASSIFY:  # DDA处理
+            pro = ocr_data.get('confidence')
            dda_ocr_result = {
                consts.DDA_IC_NAME: license_data.get('result', {}).get(consts.DDA_IC_NAME, {}).get('words', ''),
                consts.DDA_IC_ID: license_data.get('result', {}).get(consts.DDA_IC_ID, {}).get('words', ''),
                consts.DDA_BC_NAME: license_data.get('result', {}).get(consts.DDA_BC_NAME, {}).get('words', ''),
                consts.DDA_BC_ID: license_data.get('result', {}).get(consts.DDA_BC_ID, {}).get('words', ''),
-                consts.DDA_IMG_PATH: img_path
+                consts.DDA_IMG_PATH: img_path,
+                consts.DDA_PRO: pro
            }
            license_summary.setdefault(classify, []).append(dda_ocr_result)

@@ -290,17 +296,14 @@ class Command(BaseCommand, LoggerMixin):

                    id_card_dict[consts.IC_TURE_OR_FALSE] = consts.IC_RES_MAPPING.get(card_type)
                finally:
-                    if is_hil:
-                        for key in consts.IC_KEY_FIELD:
-                            if not isinstance(id_card_dict.get(key), str):
-                                break
-                            value = id_card_dict[key].strip()
-                            if len(value) > 0:
-                                hil_id_bc_mapping.setdefault(consts.IC_FIELD, dict()).setdefault(
-                                    value, set()).add(img_path)
+                    if do_dda and isinstance(id_card_dict.get(consts.IC_KEY_FIELD[0]), str) and isinstance(id_card_dict.get(consts.IC_KEY_FIELD[1]), str):
+                        ic_name = id_card_dict[consts.IC_KEY_FIELD[0]].strip()
+                        ic_id = id_card_dict[consts.IC_KEY_FIELD[1]].strip()
+                        if len(ic_name) > 0 and ic_id > 0:
+                            dda_id_bc_mapping.setdefault(consts.IC_FIELD, []).append((ic_name, ic_id, img_path))
            license_summary.setdefault(classify, []).extend(license_data)

-    def license2_process(self, ocr_res_2, license_summary, pid, classify, res_list, pno, ino, part_idx, img_path, is_hil, hil_id_bc_mapping):
+    def license2_process(self, ocr_res_2, license_summary, pid, classify, res_list, pno, ino, part_idx, img_path, do_dda, dda_id_bc_mapping):
        if ocr_res_2.get('ErrorCode') in consts.SUCCESS_CODE_SET:
            res_list.append((pno, ino, part_idx, consts.RES_SUCCESS))
            if pid == consts.BC_PID:
@@ -309,11 +312,10 @@ class Command(BaseCommand, LoggerMixin):
                # for en_key, chn_key in consts.BC_FIELD:
                #     res_dict[chn_key] = ocr_res_2.get(en_key, '')
                license_summary.setdefault(classify, []).append(ocr_res_2)
-                if is_hil and isinstance(ocr_res_2.get(consts.BC_KEY_FIELD), str):
-                    value = ocr_res_2[consts.BC_KEY_FIELD].strip()
-                    if len(value) > 0:
-                        hil_id_bc_mapping.setdefault(consts.BC_FIELD, dict()).setdefault(
-                            value, set()).add(img_path)
+                if do_dda and isinstance(ocr_res_2.get(consts.BC_KEY_FIELD), str):
+                    bc_no = ocr_res_2[consts.BC_KEY_FIELD].strip()
+                    if len(bc_no) > 0:
+                        dda_id_bc_mapping.setdefault(consts.BC_FIELD, []).append((bc_no, img_path))
            else:
                # 营业执照等
                for result_dict in ocr_res_2.get('ResultList', []):
@@ -723,13 +725,14 @@ class Command(BaseCommand, LoggerMixin):
                doc_id = int(doc_id_str)
                doc_class = HILDoc if business_type == consts.HIL_PREFIX else AFCDoc
                is_hil = True if business_type == consts.HIL_PREFIX else False
-                hil_id_bc_mapping = dict()
+                dda_id_bc_mapping = dict()

                doc_data_path = os.path.join(self.data_dir, business_type, consts.TMP_DIR_NAME, doc_id_str)
                excel_path = os.path.join(doc_data_path, '{0}.xlsx'.format(doc_id_str))

                try:
                    doc = doc_class.objects.filter(id=doc_id).first()
+                    do_dda = is_hil and doc.document_scheme == consts.DOC_SCHEME_LIST[1]
                except Exception as e:
                    self.online_log.error('{0} [process error (db filter)] [task={1}] [error={2}]'.format(
                        self.log_base, task_str, traceback.format_exc()))
@@ -771,7 +774,7 @@ class Command(BaseCommand, LoggerMixin):
                                            continue
                                        elif classify in consts.LICENSE_CLASSIFY_SET_1:  # 证件1
                                            self.license1_process(ocr_data, license_summary, classify, res_list, pno,
-                                                                  ino, part_idx, img_path, is_hil, hil_id_bc_mapping)
+                                                                  ino, part_idx, img_path, do_dda, dda_id_bc_mapping)
                                        elif classify in consts.LICENSE_CLASSIFY_SET_2:  # 证件2
                                            pid, _, _, _, _, _ = consts.LICENSE_CLASSIFY_MAPPING.get(classify)
                                            file_data = ocr_data.get('section_img')
@@ -818,7 +821,7 @@ class Command(BaseCommand, LoggerMixin):
                                                        ocr_2_res['Name'] = name
                                                    self.license2_process(ocr_2_res, license_summary, pid, classify,
                                                                          res_list, pno, ino, part_idx, img_path,
-                                                                          is_hil, hil_id_bc_mapping)
+                                                                          do_dda, dda_id_bc_mapping)
                                                    break
                                            else:
                                                res_list.append((pno, ino, part_idx, consts.RES_FAILED_2))
@@ -921,31 +924,6 @@ class Command(BaseCommand, LoggerMixin):
                        finally:
                            # TODO 识别结果存一张表，方便跑报表

-                            # DDA处理
-                            if is_hil:
-                                try:
-                                    # 获取需要保存图片的集合
-                                    hil_dda_save_img_list = []
-                                    dda_res_list = license_summary.get(consts.DDA_CLASSIFY, [])
-                                    self.online_log.info('{0} [DDA process] [DDA_info={1}]'.format(self.log_base, dda_res_list))
-
-                                    for dda_idx, dda_res in enumerate(dda_res_list):
-                                        save_img_dict = {
-                                            consts.DDA_FIELD: dda_res.get(consts.DDA_IMG_PATH)
-                                        }
-                                        for dda_field, license_field in consts.DDA_MAPPING:
-                                            target_str = dda_res.get(dda_field, '')
-                                            save_img_dict.setdefault(license_field, set()).update(
-                                                hil_id_bc_mapping.get(license_field, dict()).get(target_str, set()))
-                                        hil_dda_save_img_list.append(save_img_dict)
-                                    self.online_log.info('{0} [DDA process] [ic&bc_info={1}]'.format(self.log_base, hil_id_bc_mapping))
-                                    self.online_log.info('{0} [DDA process] [img_path={1}]'.format(self.log_base, hil_dda_save_img_list))
-                                except Exception as e:
-                                    pass
-                                # 保存图片
-                                # 数据库记录
-                                # report记录
-
                            # CA比对
                            if doc.document_scheme == consts.DOC_SCHEME_LIST[0]:
                                try:
@@ -995,6 +973,208 @@ class Command(BaseCommand, LoggerMixin):
                                    else:
                                        self.online_log.info('{0} [comparison info send success] [task={1}] '
                                                             '[res_id={2}]'.format(self.log_base, task_str, res_obj.id))
+
+                            # DDA处理
+                            if do_dda:
+                                # 入库
+                                try:
+                                    dda_record = DDARecords.objects.filter(
+                                        application_id=doc.application_id).first()
+                                    if dda_record is None:
+                                        dda_record = DDARecords(application_id=doc.application_id)
+                                except Exception as e:
+                                    self.online_log.error('{0} [process error (dda db get)] [task={1}] '
+                                                          '[error={2}]'.format(self.log_base, task_str,
+                                                                               traceback.format_exc()))
+                                else:
+                                    try:
+                                        if not dda_record.all_found:
+                                            found_time = timezone.now()
+                                            move_img_path_dict = dict()
+                                            ic_res_list = dda_id_bc_mapping.get(consts.IC_FIELD, [])
+                                            bc_res_list = dda_id_bc_mapping.get(consts.BC_FIELD, [])
+                                            self.online_log.info('{0} [dda process] [task={1}] [ic={2}] '
+                                                                 '[bc={3}]'.format(self.log_base, task_str, ic_res_list,
+                                                                                   bc_res_list))
+
+                                            if not dda_record.is_dda_found:
+                                                try:
+                                                    # DDA过滤，获取有效DDA
+                                                    best_dda_res = None
+                                                    dda_res_list = license_summary.get(consts.DDA_CLASSIFY, [])
+                                                    if len(dda_res_list) > 0:
+                                                        dda_res_list.sort(key=lambda x: x.get(consts.DDA_PRO, 0),
+                                                                          reverse=True)
+                                                        tmp_best_dda_res = dda_res_list[0]
+                                                        if tmp_best_dda_res.get(consts.DDA_PRO, 0) >= 0.6:
+                                                            best_dda_res = tmp_best_dda_res
+                                                    self.online_log.info(
+                                                        '{0} [dda process] [task={1}] [dda={2}]'.format(
+                                                            self.log_base, task_str, dda_res_list))
+                                                except Exception as e:
+                                                    best_dda_res = None
+
+                                                dda_record.is_dda_found = False if best_dda_res is None else True
+
+                                                if dda_record.is_dda_found:
+                                                    dda_path = best_dda_res.get(consts.DDA_IMG_PATH, '')
+                                                    customer_name = best_dda_res.get(consts.DDA_IC_NAME, '')
+                                                    customer_id = best_dda_res.get(consts.DDA_IC_ID, '')
+                                                    account_id = best_dda_res.get(consts.DDA_BC_ID, '')
+                                                    dda_record.dda_path = dda_path
+                                                    dda_record.dda_found_time = found_time
+                                                    dda_record.customer_name = customer_name
+                                                    dda_record.customer_id = customer_id
+                                                    dda_record.account_id = account_id
+                                                    # move
+                                                    move_img_path_dict.setdefault(consts.DDA_FIELD, set()).add(dda_path)
+
+                                            if dda_record.is_dda_found:
+
+                                                try:
+                                                    if not dda_record.is_id_found:
+                                                        for ic_name, ic_id, ic_img_path in ic_res_list:
+                                                            if ic_id == dda_record.customer_id \
+                                                                    or ic_name == dda_record.customer_name:
+                                                                dda_record.is_id_found = True
+                                                                dda_record.id_path = ic_img_path
+                                                                dda_record.id_found_time = found_time
+                                                                move_img_path_dict.setdefault(consts.IC_FIELD, set()).add(
+                                                                    ic_img_path)
+                                                                break
+                                                        else:
+                                                            id_record = IDBCRecords.objects.filter(
+                                                                application_id=doc.application_id,
+                                                                target_id=dda_record.customer_id,
+                                                                is_id=True).first()
+
+                                                            if id_record is None:
+                                                                id_record = IDBCRecords.objects.filter(
+                                                                    application_id=doc.application_id,
+                                                                    target_name=dda_record.customer_name,
+                                                                    is_id=True).first()
+
+                                                            if id_record is not None:
+                                                                dda_record.is_id_found = True
+                                                                dda_record.id_path = id_record.file_path
+                                                                dda_record.id_found_time = id_record.create_time
+                                                                move_img_path_dict.setdefault(consts.IC_FIELD, set()).add(
+                                                                    id_record.file_path)
+                                                except Exception as e:
+                                                    self.online_log.error(
+                                                        '{0} [process error (dda id process)] [task={1}] '
+                                                        '[error={2}]'.format(self.log_base, task_str,
+                                                                             traceback.format_exc()))
+
+                                                try:
+                                                    if not dda_record.is_bc_found:
+                                                        for bc_no, bc_img_path in bc_res_list:
+                                                            if bc_no == dda_record.account_id:
+                                                                dda_record.is_bc_found = True
+                                                                dda_record.bc_path = bc_img_path
+                                                                dda_record.bc_found_time = found_time
+                                                                move_img_path_dict.setdefault(consts.BC_FIELD, set()).add(
+                                                                    bc_img_path)
+                                                                break
+                                                        else:
+                                                            bc_record = IDBCRecords.objects.filter(
+                                                                application_id=doc.application_id,
+                                                                target_id=dda_record.account_id,
+                                                                is_id=False).first()
+
+                                                            if bc_record is not None:
+                                                                dda_record.is_bc_found = True
+                                                                dda_record.bc_path = bc_record.file_path
+                                                                dda_record.bc_found_time = bc_record.create_time
+                                                                move_img_path_dict.setdefault(consts.BC_FIELD, set()).add(
+                                                                    bc_record.file_path)
+                                                except Exception as e:
+                                                    self.online_log.error(
+                                                        '{0} [process error (dda bc process)] [task={1}] '
+                                                        '[error={2}]'.format(self.log_base, task_str,
+                                                                             traceback.format_exc()))
+
+                                            if dda_record.is_dda_found and dda_record.is_id_found and dda_record.is_bc_found:
+                                                dda_record.all_found = True
+                                            dda_record.save()
+
+                                            # 图片移动
+                                            try:
+                                                if len(move_img_path_dict) > 0:
+                                                    self.online_log.info(
+                                                        '{0} [dda process] [task={1}] [move_img_path={2}]'.format(
+                                                            self.log_base, task_str, move_img_path_dict))
+
+                                                    wanting_dir = os.path.join(self.dda_wanting_dir, doc.application_id)
+                                                    wanting_dir_exists = os.path.isdir(wanting_dir)
+                                                    if dda_record.all_found:
+                                                        target_dir = os.path.join(self.dda_complete_dir, doc.application_id)
+                                                        if wanting_dir_exists:
+                                                            shutil.move(wanting_dir, target_dir)
+                                                        else:
+                                                            os.makedirs(target_dir, exist_ok=True)
+                                                    else:
+                                                        target_dir = wanting_dir
+                                                        if not wanting_dir_exists:
+                                                            os.makedirs(target_dir, exist_ok=True)
+
+                                                    for prefix, path_set in move_img_path_dict.items():
+                                                        for idx, path in enumerate(path_set):
+                                                            if os.path.isfile(path):
+                                                                file_name = '{0}_{1}{2}'.format(
+                                                                    prefix, idx, os.path.splitext(path)[-1])
+                                                                target_path = os.path.join(target_dir, file_name)
+                                                                shutil.copyfile(path, target_path)
+                                                            else:
+                                                                self.online_log.warn(
+                                                                    '{0} [dda process] [img path empty] [task={1}] '
+                                                                    '[path={2}]'.format(self.log_base, task_str, path))
+                                            except Exception as e:
+                                                self.online_log.error(
+                                                    '{0} [process error (dda img move)] [task={1}] '
+                                                    '[error={2}]'.format(self.log_base, task_str,
+                                                                         traceback.format_exc()))
+
+                                            # id & bc 入库
+                                            try:
+                                                if not dda_record.is_dda_found:
+                                                    ic_set = set()
+                                                    bc_set = set()
+                                                    for ic_name, ic_id, ic_img_path in ic_res_list:
+                                                        query_str = '{0}{1}'.format(ic_name, ic_id)
+                                                        if query_str in ic_set:
+                                                            continue
+                                                        ic_set.add(query_str)
+                                                        IDBCRecords.objects.create(
+                                                            application_id=doc.application_id,
+                                                            target_name=ic_name,
+                                                            target_id=ic_id,
+                                                            is_id=True,
+                                                            file_path=ic_img_path
+                                                        )
+                                                    for bc_no, bc_img_path in bc_res_list:
+                                                        if bc_no in bc_set:
+                                                            continue
+                                                        bc_set.add(bc_no)
+                                                        IDBCRecords.objects.create(
+                                                            application_id=doc.application_id,
+                                                            target_id=bc_no,
+                                                            is_id=False,
+                                                            file_path=bc_img_path
+                                                        )
+                                            except Exception as e:
+                                                self.online_log.error(
+                                                    '{0} [process error (dda id&bc db save)] [task={1}] '
+                                                    '[error={2}]'.format(self.log_base, task_str,
+                                                                         traceback.format_exc()))
+
+                                            # TODO report
+
+                                    except Exception as e:
+                                        self.online_log.error('{0} [process error (dda process)] [task={1}] '
+                                                              '[error={2}]'.format(self.log_base, task_str,
+                                                                                   traceback.format_exc()))
+
                finally:
                    try:
                        img_save_path = os.path.join(doc_data_path, 'img')
--- a/src/apps/doc/models.py
View file @1cfd8e7
+++ b/src/apps/doc/models.py
View file @1cfd8e7
@@ -267,3 +267,52 @@ class HILOCRResult(models.Model):
        managed = False
        db_table = 'hil_ocr_result'

+
+# DDA ID & BC     ----> HIL SE 专有
+class DDARecords(models.Model):
+    id = models.AutoField(primary_key=True, verbose_name="id")  # 主键
+    application_id = models.CharField(max_length=64, verbose_name="申请id")  # 索引
+
+    is_dda_found = models.BooleanField(default=False, verbose_name="DDA是否找到")
+    is_id_found = models.BooleanField(default=False, verbose_name="身份证是否找到")
+    is_bc_found = models.BooleanField(default=False, verbose_name="银行卡是否找到")
+    all_found = models.BooleanField(default=False, verbose_name="是否全找到")
+
+    dda_path = models.CharField(null=True, max_length=1024, verbose_name="DDA图片路径")
+    id_path = models.CharField(null=True, max_length=1024, verbose_name="身份证图片路径")
+    bc_path = models.CharField(null=True, max_length=1024, verbose_name="银行卡图片路径")
+
+    customer_name = models.CharField(null=True, max_length=1024, verbose_name="DDA身份证姓名")
+    customer_id = models.CharField(null=True, max_length=1024, verbose_name="DDA身份证号码")
+    account_id = models.CharField(null=True, max_length=1024, verbose_name="DDA银行卡号")
+
+    dda_found_time = models.DateTimeField(null=True, verbose_name='DDA时间')
+    id_found_time = models.DateTimeField(null=True, verbose_name='身份证时间')
+    bc_found_time = models.DateTimeField(null=True, verbose_name='银行卡时间')
+
+    update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间')  # 索引
+    create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间')  # 索引
+
+    class Meta:
+        managed = False
+        db_table = 'dda_records'
+
+
+class IDBCRecords(models.Model):
+    id = models.AutoField(primary_key=True, verbose_name="id")  # 主键
+    application_id = models.CharField(max_length=64, verbose_name="申请id")  # 索引
+
+    target_name = models.CharField(null=True, max_length=1024, verbose_name="DDA身份证姓名")  # 与申请号联合索引
+    target_id = models.CharField(max_length=1024, verbose_name="DDA身份证号码or银行卡号")  # 与申请号联合索引
+
+    is_id = models.BooleanField(default=True, verbose_name="身份证or银行卡")
+
+    file_path = models.CharField(max_length=1024, verbose_name="图片路径")
+
+    create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间')
+
+    class Meta:
+        managed = False
+        db_table = 'idbc_records'
+
+
--- a/src/common/tools/mssql_script4.py 0 → 100644
View file @1cfd8e7
+++ b/src/common/tools/mssql_script4.py 0 → 100644
View file @1cfd8e7
+import pyodbc
+
+hil_sql = """
+    create table dda_records
+    (
+        id bigint identity
+            primary key,
+        application_id nvarchar(64) not null,
+        is_dda_found bit default 0 not null,
+        is_id_found bit default 0 not null,
+        is_bc_found bit default 0 not null,
+        all_found bit default 0 not null,
+        dda_path nvarchar(1024),
+        id_path nvarchar(1024),
+        bc_path nvarchar(1024),
+        customer_name nvarchar(1024),
+        customer_id nvarchar(1024),
+        account_id nvarchar(1024),
+        dda_found_time datetime,
+        id_found_time datetime,
+        bc_found_time datetime,
+        update_time datetime not null,
+        create_time datetime not null,
+    );
+
+    create index dda_records_application_id_index
+        on dda_records (application_id);
+    
+    create index dda_records_update_time_index
+        on dda_records (update_time);
+    
+    create index dda_records_create_time_index
+        on dda_records (create_time);
+
+    create table idbc_records
+    (
+        id bigint identity
+            primary key,
+        application_id nvarchar(64) not null,
+        target_name nvarchar(1024),
+        target_id nvarchar(1024) not null,
+        is_id bit default 1 not null,
+        file_path nvarchar(1024) not null,
+        create_time datetime not null,
+    );
+
+    create index idbc_records_application_id_index
+        on idbc_records (application_id);
+    
+    create index idbc_records_application_id_target_name_index
+        on idbc_records (application_id, target_name);
+    
+    create index idbc_records_application_id_target_id_index
+        on idbc_records (application_id, target_id);
+"""
+
+hil_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit=True)
+hil_cursor = hil_cnxn.cursor()
+hil_cursor.execute(hil_sql)
+hil_cursor.close()
+hil_cnxn.close()
+
+# afc_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit=True)
+# afc_cursor = afc_cnxn.cursor()
+# afc_cursor.execute(afc_sql)
+# afc_cursor.close()
+# afc_cnxn.close()
\ No newline at end of file