Merge branch 'feature/CHINARPA-4659' into feature/uat-tmp

冯轩
Showing 3 changed files with 127 additions and 37 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/ocr/wb.py
--- a/src/apps/doc/consts.py
View file @7f9efd4
+++ b/src/apps/doc/consts.py
View file @7f9efd4
@@ -98,6 +98,7 @@ RES_SHEET_HEADER = ('页码', '图片序号', '检测图片序号', '结果')
 RES_SUCCESS = '识别成功'
 RES_SUCCESS_OTHER = '识别成功（其他类）'
 RES_SUCCESS_EMPTY = '识别成功（空数据）'
+RES_SUCCESS_FINANCIAL_STATEMENT = '识别成功（财务报表类）'
 RES_FAILED = '识别失败'
 RES_FAILED_1 = '识别失败(阶段1)'
 RES_FAILED_2 = '识别失败(阶段2)'
@@ -2562,4 +2563,9 @@ FSM_ACTIVITED_STATUS = {
 # 财务报表分类标签
 FINANCIAL_STATEMENT_CLASSIFY_LIST = [97, 98, 99]
 # 财务报表sheet名称
-FINANCIAL_SHEET_NAME = "财务报表"
\ No newline at end of file
+FINANCIAL_SHEET_NAME = "财务报表"
+
+# 财报情况说明分类标签
+FINANCIAL_EXPLANATION_CLASSIFY_LIST = [100]
+# 财报情况说明sheet名称
+FINANCIAL_EXPLANATION_SHEET_NAME = "财报情况说明"
\ No newline at end of file
--- a/src/apps/doc/management/commands/ocr_process.py
View file @7f9efd4
+++ b/src/apps/doc/management/commands/ocr_process.py
View file @7f9efd4
@@ -1725,7 +1725,9 @@ class Command(BaseCommand, LoggerMixin):
                        contract_result = {}
                        contract_result_compare = {}
                        # 添加财报三个报表的处理
-                        financial_statement_dict = {"code": {}, "stamp": {}}
+                        financial_statement_dict = {}
+                        # 添加财报情况说明的处理
+                        financial_explanation_dict = {}
                        res_list = []
                        interest_keyword = Keywords.objects.filter(
                            type=KeywordsType.INTEREST.value, on_off=True).values_list('keyword', flat=True)
@@ -1756,6 +1758,14 @@ class Command(BaseCommand, LoggerMixin):
                                                self.log_base, img_path))
                                            continue
                                        elif classify in consts.FINANCIAL_STATEMENT_CLASSIFY_LIST:
+                                            # 添加到 res_list 中
+                                            res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_FINANCIAL_STATEMENT))
+                                            # 只要分类为财报三个报表的，就在 financial_statement_dict 中添加对应的 code 和 stamp 两个dict
+                                            if "code" not in financial_statement_dict:
+                                                financial_statement_dict["code"] = {}
+                                            if "stamp" not in financial_statement_dict:
+                                                financial_statement_dict["stamp"] = {}
+                                            
                                            financial_statement_table_name = None
                                            if classify == 97:
                                                financial_statement_table_name = "balance_sheet"
@@ -1770,6 +1780,21 @@ class Command(BaseCommand, LoggerMixin):
                                                if "stamp" in ocr_data:
                                                    stamp = ocr_data.get("stamp", "")
                                                    financial_statement_dict["stamp"][financial_statement_table_name] = stamp
+                                        elif classify in consts.FINANCIAL_EXPLANATION_CLASSIFY_LIST:
+                                            # 添加到 res_list 中
+                                            res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_FINANCIAL_STATEMENT))
+                                            # 只要分类为财报情况说明的，就在 financial_explanation_dict 中添加对应的 title 和 stamp 两个dict
+                                            if "title" not in financial_explanation_dict:
+                                                financial_explanation_dict["title"] = {}
+                                            if "stamp" not in financial_explanation_dict:
+                                                financial_explanation_dict["stamp"] = {}
+                                            
+                                            if "title" in ocr_data:
+                                                title = ocr_data.get("title", "")
+                                                financial_explanation_dict["title"] = title
+                                            if "stamp" in ocr_data:
+                                                stamp = ocr_data.get("stamp", "")
+                                                financial_explanation_dict["stamp"] = stamp
                                        elif classify in consts.OTHER_CLASSIFY_SET:  # 其他类
                                            res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_OTHER))
                                            continue
@@ -1956,7 +1981,7 @@ class Command(BaseCommand, LoggerMixin):
                            # src_excel_path = os.path.join(doc_data_path, 'src.xlsx')
                            # wb.save(src_excel_path)
                            #need_follow表示在上传edms时文件名是否要添加"关注"两字
-                            count_list, need_follow = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme, contract_result, doc.metadata, financial_statement_dict)
+                            count_list, need_follow = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme, contract_result, doc.metadata, financial_statement_dict, financial_explanation_dict)
                            wb.save(excel_path)

                        except Exception as e:
@@ -2057,46 +2082,64 @@ class Command(BaseCommand, LoggerMixin):
                                license_summary[consts.BS_CLASSIFY] = bs_rebuild

                            # 比对
-                            if len(license_summary) > 0 and doc.document_scheme != consts.DOC_SCHEME_LIST[2]:
+                            if len(license_summary) > 0:
+                                if doc.document_scheme != consts.DOC_SCHEME_LIST[2]:
+                                # if len(license_summary) > 0 and doc.document_scheme != consts.DOC_SCHEME_LIST[2]:
+                                    try:
+                                        is_ca = True if doc.document_scheme == consts.DOC_SCHEME_LIST[0] else False
+                                        # 更新OCR累计识别结果表
+                                        if business_type == consts.HIL_PREFIX:
+                                            result_class = HILOCRResult if is_ca else HILSEOCRResult
+                                            res_obj = atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict)
+                                        else:
+                                            result_class = AFCOCRResult if is_ca else AFCSEOCRResult
+                                            res_obj = atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict)
+                                        
+                                    except Exception as e:
+                                        self.online_log.error(
+                                            '{0} [process error (ocr result save)] [task={1}] [error={2}]'.format(
+                                                self.log_base, task_str, traceback.format_exc()))
+                                    else:
+                                        self.online_log.info('{0} [ocr result save success] [task={1}] [res_id={2}]'.format(
+                                            self.log_base, task_str, res_obj.id))
+                                        # 触发比对
+                                        try:
+                                            # 是否fsm
+                                            cms_status_class = HILCmsStatusInfo if business_type in consts.HIL_SET else AFCCmsStatusInfo
+                                            cms_status_info = cms_status_class.objects.filter(application_id=doc.application_id).first()
+                                            is_fsm = cms_status_info is not None and cms_status_info.is_fsm == 1
+                                            self.online_log.info('{0} [isfsm] [task={1}] [true or false={2}]'.format(
+                                            self.log_base, task_str, is_fsm))
+                                            if is_fsm:
+                                                fsm_compare.apply_async((doc.application_id, business_type, None, res_obj.id, is_ca, True),
+                                                    queue='queue_compare')
+                                            else:
+                                                # pass
+                                                compare.apply_async((doc.application_id, business_type, None, res_obj.id,
+                                                                is_ca, True), queue='queue_compare')
+                                        except Exception as e:
+                                            self.online_log.error(
+                                                '{0} [process error (comparison info send)] [task={1}] [error={2}]'.format(
+                                                    self.log_base, task_str, traceback.format_exc()))
+                                        else:
+                                            self.online_log.info('{0} [comparison info send success] [task={1}] '
+                                                                '[res_id={2}]'.format(self.log_base, task_str, res_obj.id))
+                            else:
+                                # license_summary 为空
+                                self.online_log.info('{0} [task={1}] [no license_summary]'.format(self.log_base, task_str))
                                try:
                                    is_ca = True if doc.document_scheme == consts.DOC_SCHEME_LIST[0] else False
                                    # 更新OCR累计识别结果表
                                    if business_type == consts.HIL_PREFIX:
                                        result_class = HILOCRResult if is_ca else HILSEOCRResult
-                                        res_obj = atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict)
+                                        res_obj = atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict)
                                    else:
                                        result_class = AFCOCRResult if is_ca else AFCSEOCRResult
-                                        res_obj = atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict)
-                                    
+                                        res_obj = atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict)
                                except Exception as e:
                                    self.online_log.error(
                                        '{0} [process error (ocr result save)] [task={1}] [error={2}]'.format(
                                            self.log_base, task_str, traceback.format_exc()))
-                                else:
-                                    self.online_log.info('{0} [ocr result save success] [task={1}] [res_id={2}]'.format(
-                                        self.log_base, task_str, res_obj.id))
-                                    # 触发比对
-                                    try:
-                                        # 是否fsm
-                                        cms_status_class = HILCmsStatusInfo if business_type in consts.HIL_SET else AFCCmsStatusInfo
-                                        cms_status_info = cms_status_class.objects.filter(application_id=doc.application_id).first()
-                                        is_fsm = cms_status_info is not None and cms_status_info.is_fsm == 1
-                                        self.online_log.info('{0} [isfsm] [task={1}] [true or false={2}]'.format(
-                                        self.log_base, task_str, is_fsm))
-                                        if is_fsm:
-                                            fsm_compare.apply_async((doc.application_id, business_type, None, res_obj.id, is_ca, True),
-                                                queue='queue_compare')
-                                        else:
-                                            # pass
-                                            compare.apply_async((doc.application_id, business_type, None, res_obj.id,
-                                                             is_ca, True), queue='queue_compare')
-                                    except Exception as e:
-                                        self.online_log.error(
-                                            '{0} [process error (comparison info send)] [task={1}] [error={2}]'.format(
-                                                self.log_base, task_str, traceback.format_exc()))
-                                    else:
-                                        self.online_log.info('{0} [comparison info send success] [task={1}] '
-                                                             '[res_id={2}]'.format(self.log_base, task_str, res_obj.id))

                            # DDA处理
                            if do_dda:
@@ -2443,7 +2486,7 @@ class Command(BaseCommand, LoggerMixin):
            self.online_log.info('{0} [stop safely]'.format(self.log_base))

 @transaction.atomic
-def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task_str, financial_statement_dict):
+def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task_str, financial_statement_dict, financial_explanation_dict):
    with transaction.atomic('afc'):
        res_obj = result_class.objects.using('afc').select_for_update().filter(application_id=doc.application_id).first()
        self.online_log.info('{0} [sql lock AFC application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id))
@@ -2451,7 +2494,14 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task
            res_obj = result_class()
            res_obj.application_id = doc.application_id
            self.online_log.info('{0} [res_obj is None application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id))
-            res_obj.fs_ocr = json.dumps([financial_statement_dict])
+        # 财务报表存入数据库
+        if res_obj is not None:
+            if financial_statement_dict:
+                res_obj.fs_ocr = json.dumps([financial_statement_dict])
+        # 财报情况说明存入数据库
+        if res_obj is not None:
+            if financial_explanation_dict:
+                res_obj.fss_ocr = json.dumps([financial_explanation_dict])
        for classify, field in consts.RESULT_MAPPING.items():
            if not hasattr(res_obj, field):
                continue
@@ -2477,7 +2527,7 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task
    return res_obj

 @transaction.atomic
-def atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict):
+def atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict):
    with transaction.atomic('default'):
        res_obj = result_class.objects.using('default').select_for_update().filter(application_id=doc.application_id).first()
        self.online_log.info('{0} [sql lock HIL application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id))
@@ -2485,7 +2535,14 @@ def atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, tas
            res_obj = result_class()
            res_obj.application_id = doc.application_id
            self.online_log.info('{0} [res_obj is None application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id))
-            res_obj.fs_ocr = json.dumps([financial_statement_dict])
+        # 财务报表三个表存入数据库
+        if res_obj is not None:
+            if financial_statement_dict:
+                res_obj.fs_ocr = json.dumps([financial_statement_dict])
+        # 财报情况说明存入数据库
+        if res_obj is not None:
+            if financial_explanation_dict:
+                res_obj.fss_ocr = json.dumps([financial_explanation_dict])
        for classify, field in consts.RESULT_MAPPING.items():
            if not hasattr(res_obj, field):
                continue
--- a/src/apps/doc/ocr/wb.py
View file @7f9efd4
+++ b/src/apps/doc/ocr/wb.py
View file @7f9efd4
@@ -829,6 +829,10 @@ class BSWorkbook(Workbook):


    def financial_rebuild(self, financial_statement_dict):
+        # 如果 financial_statement_dict 为空，则不创建表
+        if not financial_statement_dict:
+            return
+        # 如果 financial_statement_dict 不为空，则创建表
        ws = self.create_sheet(consts.FINANCIAL_SHEET_NAME)
        for fin_key, fin_value in financial_statement_dict.items():
            table_str = "识别码"
@@ -849,6 +853,27 @@ class BSWorkbook(Workbook):
                    ws.append(row)


+    def financial_explanation_rebuild(self, financial_explanation_dict):
+        """
+        Desc:
+            重构财报情况说明sheet
+        """
+        # 如果 financial_explanation_dict 为空，则不创建sheet
+        if not financial_explanation_dict:
+            return
+        # 如果 financial_explanation_dict 不为空, 则创建sheet
+        ws = self.create_sheet(consts.FINANCIAL_EXPLANATION_SHEET_NAME)
+        for fin_key, fin_value in financial_explanation_dict.items():
+            table_str = "公司名称"
+            if fin_key == "title":
+                table_str = "公司名称"
+            elif fin_key == "stamp":
+                table_str = "印章"
+            
+            row = ["财报情况说明" + table_str, str(fin_value)]
+            ws.append(row)
+
+
    @staticmethod
    def remove_yuan(amount_key_set, key, src_str):
        if key in amount_key_set and isinstance(src_str, str):
@@ -948,7 +973,7 @@ class BSWorkbook(Workbook):
        if len(self.sheetnames) > 1:
            self.remove(self.get_sheet_by_name('Sheet'))

-    def rebuild(self, bs_summary, license_summary, res_list, document_scheme, contract_result, metadata, financial_statement_dict):
+    def rebuild(self, bs_summary, license_summary, res_list, document_scheme, contract_result, metadata, financial_statement_dict, financial_explanation_dict):
        res_count_tuple = self.res_sheet(res_list)

        count_list = [(consts.MODEL_FIELD_BS, len(bs_summary))]
@@ -957,11 +982,13 @@ class BSWorkbook(Workbook):
            self.contract_rebuild(contract_result)
            self.bs_rebuild(bs_summary, res_count_tuple, metadata)
            self.financial_rebuild(financial_statement_dict)
+            self.financial_explanation_rebuild(financial_explanation_dict)
        else:
            self.bs_rebuild(bs_summary, res_count_tuple, metadata)
            self.license_rebuild(license_summary, document_scheme, count_list)
            self.contract_rebuild(contract_result, True)
            self.financial_rebuild(financial_statement_dict)
+            self.financial_explanation_rebuild(financial_explanation_dict)
        self.move_res_sheet()
        self.remove_base_sheet()
        return count_list, self.need_follow