From fc3603674fa6969167218c6132d10d8b15aa3ef0 Mon Sep 17 00:00:00 2001 From: 周伟奇 <zhouweiqi@situdata.com> Date: Fri, 7 May 2021 17:54:07 +0800 Subject: [PATCH] report part 1 --- src/apps/doc/management/commands/ocr_process.py | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- src/apps/doc/models.py | 44 +++++++++++++++++++++++++++++++++++++++++++- src/apps/doc/named_enum.py | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 186 insertions(+), 3 deletions(-) diff --git a/src/apps/doc/management/commands/ocr_process.py b/src/apps/doc/management/commands/ocr_process.py index cdeb505..c1bc3dd 100644 --- a/src/apps/doc/management/commands/ocr_process.py +++ b/src/apps/doc/management/commands/ocr_process.py @@ -20,10 +20,10 @@ from common.tools.file_tools import write_zip_file from common.tools.pdf_to_img import PDFHandler from apps.doc import consts from apps.doc.ocr.edms import EDMS, rh -from apps.doc.named_enum import KeywordsType +from apps.doc.named_enum import KeywordsType, FailureReason, WorkflowName, ProcessName, RequestTeam, RequestTrigger from apps.doc.exceptions import EDMSException, OCR1Exception, OCR2Exception, OCR4Exception from apps.doc.ocr.wb import BSWorkbook -from apps.doc.models import DocStatus, HILDoc, AFCDoc, Keywords, HILOCRResult, AFCOCRResult +from apps.doc.models import DocStatus, HILDoc, AFCDoc, Keywords, HILOCRResult, AFCOCRResult, HILOCRReport, AFCOCRReport from celery_compare.tasks import compare @@ -604,6 +604,24 @@ class Command(BaseCommand, LoggerMixin): # return except Exception as e: try: + end_time = timezone.now() + report_table = HILOCRReport if business_type == consts.HIL_PREFIX else AFCOCRReport + report_table.objects.create( + case_number=doc.application_id, + request_team=RequestTeam.get_value(doc.document_scheme, 0), + request_trigger=RequestTrigger.get_value(doc.data_source, 0), + input_file=doc.document_name, + transaction_start=doc.start_time, + transaction_end=end_time, + successful_at_this_level=False, + failure_reason=FailureReason.PDF.value, + process_name=ProcessName.ALL.value, + ) + except Exception as e: + self.online_log.error('{0} [process error (report db save)] [error={1}]'.format( + self.log_base, traceback.format_exc())) + + try: doc.status = DocStatus.PROCESS_FAILED.value doc.save() self.online_log.warn('{0} [process failed (pdf_2_img_2_queue)] [task={1}] ' @@ -702,12 +720,19 @@ class Command(BaseCommand, LoggerMixin): try: doc = doc_class.objects.filter(id=doc_id).first() + # report_dict = { + # 'process': None or pdf or excel or edms + # 'idcard': True or False, + # 'bs': None or normal or mobile, + # } + report_list = [None, False, None] except Exception as e: self.online_log.error('{0} [process error (db filter)] [task={1}] [error={2}]'.format( self.log_base, task_str, traceback.format_exc())) else: try: # 4.OCR结果并且构建excel文件 + bs_classify_set = set() bs_summary = {} unknown_summary = {} license_summary = {} @@ -795,6 +820,7 @@ class Command(BaseCommand, LoggerMixin): self.online_log.warn( '{0} [ocr_2 failed] [img_path={1}]'.format(self.log_base, img_path)) else: # 流水处理 + bs_classify_set.add(classify) self.bs_process(wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx) else: res_list.append((pno, ino, part_idx, consts.RES_FAILED_1)) @@ -806,8 +832,15 @@ class Command(BaseCommand, LoggerMixin): # self.license_log.info('[task={0}] [license_summary={1}]'.format(task_str, license_summary)) idcard_list = license_summary.get(consts.IC_CLASSIFY) if idcard_list: + report_list[1] = True self.idcard_log.info('[task={0}] [idcard={1}]'.format(task_str, idcard_list)) + if len(bs_classify_set) > 0: + if consts.ALI_WECHART_CLASSIFY & bs_classify_set: + report_list[2] = WorkflowName.MOBILE.value + else: + report_list[2] = WorkflowName.NORMAL.value + merged_bs_summary = self.rebuild_bs_summary(bs_summary, unknown_summary) del unknown_summary @@ -821,6 +854,7 @@ class Command(BaseCommand, LoggerMixin): except Exception as e: + report_list[0] = FailureReason.EXCEL.value self.online_log.warn('{0} [process failed (res conformity)] [task={1}] [error={2}]'.format( self.log_base, task_str, traceback.format_exc())) @@ -842,6 +876,7 @@ class Command(BaseCommand, LoggerMixin): except Exception as e: + report_list[0] = FailureReason.EXCEL.value self.online_log.warn('{0} [process failed (wb rebuild)] [task={1}] [error={2}]'.format( self.log_base, task_str, traceback.format_exc())) @@ -868,12 +903,17 @@ class Command(BaseCommand, LoggerMixin): else: raise EDMSException(edms_exc) except Exception as e: + + report_list[0] = FailureReason.EDMS.value doc.status = DocStatus.UPLOAD_FAILED.value self.online_log.warn('{0} [process failed (edms upload)] [task={1}] [error={2}]'.format( self.log_base, task_str, traceback.format_exc())) + else: + doc.status = DocStatus.COMPLETE.value self.online_log.info('{0} [edms upload success] [task={1}]'.format(self.log_base, task_str)) + finally: try: doc.end_time = timezone.now() @@ -938,6 +978,74 @@ class Command(BaseCommand, LoggerMixin): else: self.online_log.info('{0} [comparison info send success] [task={1}] ' '[res_id={2}]'.format(self.log_base, task_str, res_obj.id)) + finally: + # report_dict = { + # 'process': None or pdf or excel or edms + # 'idcard': True or False, + # 'bs': None or normal or mobile, + # } + + end_time = timezone.now() + report_table = HILOCRReport if business_type == consts.HIL_PREFIX else AFCOCRReport + + try: + if report_list[0] is None: + report_table.objects.create( + case_number=doc.application_id, + request_team=RequestTeam.get_value(doc.document_scheme, 0), + request_trigger=RequestTrigger.get_value(doc.data_source, 0), + input_file=doc.document_name, + transaction_start=doc.start_time, + transaction_end=end_time, + process_name=ProcessName.ALL.value, + ) + else: + report_table.objects.create( + case_number=doc.application_id, + request_team=RequestTeam.get_value(doc.document_scheme, 0), + request_trigger=RequestTrigger.get_value(doc.data_source, 0), + input_file=doc.document_name, + transaction_start=doc.start_time, + transaction_end=end_time, + successful_at_this_level=False, + failure_reason=report_list[0], + process_name=ProcessName.ALL.value, + ) + except Exception as e: + self.online_log.error('{0} [process error (report db save)] [error={1}]'.format( + self.log_base, traceback.format_exc())) + + try: + if report_list[1]: + report_table.objects.create( + case_number=doc.application_id, + request_team=RequestTeam.CONTROLLING.value, + request_trigger=RequestTrigger.DOCUPLOAD.value, + input_file=doc.document_name, + transaction_start=doc.start_time, + transaction_end=end_time, + process_name=ProcessName.IDCARD.value, + ) + except Exception as e: + self.online_log.error('{0} [process error (report db save)] [error={1}]'.format( + self.log_base, traceback.format_exc())) + + try: + if report_list[2] is not None: + report_table.objects.create( + case_number=doc.application_id, + request_team=RequestTeam.get_value(doc.document_scheme, 0), + request_trigger=RequestTrigger.DOCUPLOAD.value, + input_file=doc.document_name, + transaction_start=doc.start_time, + transaction_end=end_time, + process_name=ProcessName.BS.value, + workflow_name=report_list[2], + ) + except Exception as e: + self.online_log.error('{0} [process error (report db save)] [error={1}]'.format( + self.log_base, traceback.format_exc())) + finally: try: img_save_path = os.path.join(doc_data_path, 'img') diff --git a/src/apps/doc/models.py b/src/apps/doc/models.py index 94260bd..fdc9a04 100644 --- a/src/apps/doc/models.py +++ b/src/apps/doc/models.py @@ -1,5 +1,5 @@ from django.db import models -from .named_enum import DocStatus, KeywordsType, RetryStep +from .named_enum import DocStatus, KeywordsType, RequestTeam, RequestTrigger, FailureReason, ProcessName, WorkflowName # Create your models here. @@ -267,3 +267,45 @@ class HILOCRResult(models.Model): managed = False db_table = 'hil_ocr_result' + +# OCR Report +class HILOCRReport(models.Model): + id = models.AutoField(primary_key=True, verbose_name="id") # 主键 + case_number = models.CharField(max_length=64, verbose_name="申请id") + request_team = models.SmallIntegerField(default=RequestTeam.ACCEPTANCE.value, verbose_name="来源") + request_trigger = models.SmallIntegerField(default=RequestTrigger.POS.value, verbose_name="触发者") + input_file = models.CharField(max_length=255, verbose_name="文件名") + transaction_start = models.DateTimeField(null=True, verbose_name='开始时间') # 索引 + transaction_end = models.DateTimeField(null=True, verbose_name='结束时间') + successful_at_this_level = models.BooleanField(default=True, verbose_name="是否成功") + failure_reason = models.SmallIntegerField(null=True, verbose_name="失败原因") + process_name = models.SmallIntegerField(default=ProcessName.ALL.value, verbose_name="流程名称") + total_fields = models.IntegerField(null=True, verbose_name='比对字段数目') + workflow_name = models.SmallIntegerField(null=True, verbose_name="工作流程") + + class Meta: + managed = False + db_table = 'hil_ocr_report' + + +class AFCOCRReport(models.Model): + id = models.AutoField(primary_key=True, verbose_name="id") # 主键 + case_number = models.CharField(max_length=64, verbose_name="申请id") + request_team = models.SmallIntegerField(default=RequestTeam.ACCEPTANCE.value, verbose_name="来源") + request_trigger = models.SmallIntegerField(default=RequestTrigger.POS.value, verbose_name="触发者") + input_file = models.CharField(max_length=255, verbose_name="文件名") + transaction_start = models.DateTimeField(null=True, verbose_name='开始时间') # 索引 + transaction_end = models.DateTimeField(null=True, verbose_name='结束时间') + successful_at_this_level = models.BooleanField(default=True, verbose_name="是否成功") + failure_reason = models.SmallIntegerField(null=True, verbose_name="失败原因") + process_name = models.SmallIntegerField(default=ProcessName.ALL.value, verbose_name="流程名称") + total_fields = models.IntegerField(null=True, verbose_name='比对字段数目') + workflow_name = models.SmallIntegerField(null=True, verbose_name="工作流程") + + class Meta: + managed = False + db_table = 'afc_ocr_report' + situ_db_label = 'afc' + + + diff --git a/src/apps/doc/named_enum.py b/src/apps/doc/named_enum.py index 13a8971..c9205b8 100644 --- a/src/apps/doc/named_enum.py +++ b/src/apps/doc/named_enum.py @@ -19,3 +19,36 @@ class KeywordsType(NamedEnum): SALARY = (1, '薪资') LOAN = (2, '贷款') ALI_WECHART = (3, '微信/支付宝') + + +class RequestTeam(NamedEnum): + ACCEPTANCE = (0, 'ACCEPTANCE') + SETTLEMENT = (1, 'SETTLEMENT') + CONTRACTMANAGEMENT = (2, 'CONTRACTMANAGEMENT') + CONTROLLING = (3, 'CONTROLLING') + + +class RequestTrigger(NamedEnum): + POS = (0, 'POS') + EAPP = (1, 'EAPP') + ECONTRACT = (2, 'ECONTRACT') + DOCUPLOAD = (3, 'Document Upload') + + +class FailureReason(NamedEnum): + PDF = (0, 'PDF处理失败') + EXCEL = (1, '构建excel失败') + EDMS = (2, 'EDMS上传失败') + + +class ProcessName(NamedEnum): + ALL = (0, 'S1_All_DocumentUpload') + BS = (1, 'S1_CA_BankStatementCalculation') + IDCARD = (2, 'F2_IDReport') + DDA = (3, 'CL_S1_DDAConsolidation') + + +class WorkflowName(NamedEnum): + NORMAL = (0, 'Normal BS') + MOBILE = (0, 'Mobile BS') + -- libgit2 0.24.0