7f9efd41 by 冯轩

Merge branch 'feature/CHINARPA-4659' into feature/uat-tmp

2 parents 040c5381 d2a1f3f2
...@@ -98,6 +98,7 @@ RES_SHEET_HEADER = ('页码', '图片序号', '检测图片序号', '结果') ...@@ -98,6 +98,7 @@ RES_SHEET_HEADER = ('页码', '图片序号', '检测图片序号', '结果')
98 RES_SUCCESS = '识别成功' 98 RES_SUCCESS = '识别成功'
99 RES_SUCCESS_OTHER = '识别成功(其他类)' 99 RES_SUCCESS_OTHER = '识别成功(其他类)'
100 RES_SUCCESS_EMPTY = '识别成功(空数据)' 100 RES_SUCCESS_EMPTY = '识别成功(空数据)'
101 RES_SUCCESS_FINANCIAL_STATEMENT = '识别成功(财务报表类)'
101 RES_FAILED = '识别失败' 102 RES_FAILED = '识别失败'
102 RES_FAILED_1 = '识别失败(阶段1)' 103 RES_FAILED_1 = '识别失败(阶段1)'
103 RES_FAILED_2 = '识别失败(阶段2)' 104 RES_FAILED_2 = '识别失败(阶段2)'
...@@ -2562,4 +2563,9 @@ FSM_ACTIVITED_STATUS = { ...@@ -2562,4 +2563,9 @@ FSM_ACTIVITED_STATUS = {
2562 # 财务报表分类标签 2563 # 财务报表分类标签
2563 FINANCIAL_STATEMENT_CLASSIFY_LIST = [97, 98, 99] 2564 FINANCIAL_STATEMENT_CLASSIFY_LIST = [97, 98, 99]
2564 # 财务报表sheet名称 2565 # 财务报表sheet名称
2565 FINANCIAL_SHEET_NAME = "财务报表"
...\ No newline at end of file ...\ No newline at end of file
2566 FINANCIAL_SHEET_NAME = "财务报表"
2567
2568 # 财报情况说明分类标签
2569 FINANCIAL_EXPLANATION_CLASSIFY_LIST = [100]
2570 # 财报情况说明sheet名称
2571 FINANCIAL_EXPLANATION_SHEET_NAME = "财报情况说明"
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -1725,7 +1725,9 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1725,7 +1725,9 @@ class Command(BaseCommand, LoggerMixin):
1725 contract_result = {} 1725 contract_result = {}
1726 contract_result_compare = {} 1726 contract_result_compare = {}
1727 # 添加财报三个报表的处理 1727 # 添加财报三个报表的处理
1728 financial_statement_dict = {"code": {}, "stamp": {}} 1728 financial_statement_dict = {}
1729 # 添加财报情况说明的处理
1730 financial_explanation_dict = {}
1729 res_list = [] 1731 res_list = []
1730 interest_keyword = Keywords.objects.filter( 1732 interest_keyword = Keywords.objects.filter(
1731 type=KeywordsType.INTEREST.value, on_off=True).values_list('keyword', flat=True) 1733 type=KeywordsType.INTEREST.value, on_off=True).values_list('keyword', flat=True)
...@@ -1756,6 +1758,14 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1756,6 +1758,14 @@ class Command(BaseCommand, LoggerMixin):
1756 self.log_base, img_path)) 1758 self.log_base, img_path))
1757 continue 1759 continue
1758 elif classify in consts.FINANCIAL_STATEMENT_CLASSIFY_LIST: 1760 elif classify in consts.FINANCIAL_STATEMENT_CLASSIFY_LIST:
1761 # 添加到 res_list 中
1762 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_FINANCIAL_STATEMENT))
1763 # 只要分类为财报三个报表的,就在 financial_statement_dict 中添加对应的 code 和 stamp 两个dict
1764 if "code" not in financial_statement_dict:
1765 financial_statement_dict["code"] = {}
1766 if "stamp" not in financial_statement_dict:
1767 financial_statement_dict["stamp"] = {}
1768
1759 financial_statement_table_name = None 1769 financial_statement_table_name = None
1760 if classify == 97: 1770 if classify == 97:
1761 financial_statement_table_name = "balance_sheet" 1771 financial_statement_table_name = "balance_sheet"
...@@ -1770,6 +1780,21 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1770,6 +1780,21 @@ class Command(BaseCommand, LoggerMixin):
1770 if "stamp" in ocr_data: 1780 if "stamp" in ocr_data:
1771 stamp = ocr_data.get("stamp", "") 1781 stamp = ocr_data.get("stamp", "")
1772 financial_statement_dict["stamp"][financial_statement_table_name] = stamp 1782 financial_statement_dict["stamp"][financial_statement_table_name] = stamp
1783 elif classify in consts.FINANCIAL_EXPLANATION_CLASSIFY_LIST:
1784 # 添加到 res_list 中
1785 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_FINANCIAL_STATEMENT))
1786 # 只要分类为财报情况说明的,就在 financial_explanation_dict 中添加对应的 title 和 stamp 两个dict
1787 if "title" not in financial_explanation_dict:
1788 financial_explanation_dict["title"] = {}
1789 if "stamp" not in financial_explanation_dict:
1790 financial_explanation_dict["stamp"] = {}
1791
1792 if "title" in ocr_data:
1793 title = ocr_data.get("title", "")
1794 financial_explanation_dict["title"] = title
1795 if "stamp" in ocr_data:
1796 stamp = ocr_data.get("stamp", "")
1797 financial_explanation_dict["stamp"] = stamp
1773 elif classify in consts.OTHER_CLASSIFY_SET: # 其他类 1798 elif classify in consts.OTHER_CLASSIFY_SET: # 其他类
1774 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_OTHER)) 1799 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_OTHER))
1775 continue 1800 continue
...@@ -1956,7 +1981,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1956,7 +1981,7 @@ class Command(BaseCommand, LoggerMixin):
1956 # src_excel_path = os.path.join(doc_data_path, 'src.xlsx') 1981 # src_excel_path = os.path.join(doc_data_path, 'src.xlsx')
1957 # wb.save(src_excel_path) 1982 # wb.save(src_excel_path)
1958 #need_follow表示在上传edms时文件名是否要添加"关注"两字 1983 #need_follow表示在上传edms时文件名是否要添加"关注"两字
1959 count_list, need_follow = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme, contract_result, doc.metadata, financial_statement_dict) 1984 count_list, need_follow = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme, contract_result, doc.metadata, financial_statement_dict, financial_explanation_dict)
1960 wb.save(excel_path) 1985 wb.save(excel_path)
1961 1986
1962 except Exception as e: 1987 except Exception as e:
...@@ -2057,46 +2082,64 @@ class Command(BaseCommand, LoggerMixin): ...@@ -2057,46 +2082,64 @@ class Command(BaseCommand, LoggerMixin):
2057 license_summary[consts.BS_CLASSIFY] = bs_rebuild 2082 license_summary[consts.BS_CLASSIFY] = bs_rebuild
2058 2083
2059 # 比对 2084 # 比对
2060 if len(license_summary) > 0 and doc.document_scheme != consts.DOC_SCHEME_LIST[2]: 2085 if len(license_summary) > 0:
2086 if doc.document_scheme != consts.DOC_SCHEME_LIST[2]:
2087 # if len(license_summary) > 0 and doc.document_scheme != consts.DOC_SCHEME_LIST[2]:
2088 try:
2089 is_ca = True if doc.document_scheme == consts.DOC_SCHEME_LIST[0] else False
2090 # 更新OCR累计识别结果表
2091 if business_type == consts.HIL_PREFIX:
2092 result_class = HILOCRResult if is_ca else HILSEOCRResult
2093 res_obj = atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict)
2094 else:
2095 result_class = AFCOCRResult if is_ca else AFCSEOCRResult
2096 res_obj = atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict)
2097
2098 except Exception as e:
2099 self.online_log.error(
2100 '{0} [process error (ocr result save)] [task={1}] [error={2}]'.format(
2101 self.log_base, task_str, traceback.format_exc()))
2102 else:
2103 self.online_log.info('{0} [ocr result save success] [task={1}] [res_id={2}]'.format(
2104 self.log_base, task_str, res_obj.id))
2105 # 触发比对
2106 try:
2107 # 是否fsm
2108 cms_status_class = HILCmsStatusInfo if business_type in consts.HIL_SET else AFCCmsStatusInfo
2109 cms_status_info = cms_status_class.objects.filter(application_id=doc.application_id).first()
2110 is_fsm = cms_status_info is not None and cms_status_info.is_fsm == 1
2111 self.online_log.info('{0} [isfsm] [task={1}] [true or false={2}]'.format(
2112 self.log_base, task_str, is_fsm))
2113 if is_fsm:
2114 fsm_compare.apply_async((doc.application_id, business_type, None, res_obj.id, is_ca, True),
2115 queue='queue_compare')
2116 else:
2117 # pass
2118 compare.apply_async((doc.application_id, business_type, None, res_obj.id,
2119 is_ca, True), queue='queue_compare')
2120 except Exception as e:
2121 self.online_log.error(
2122 '{0} [process error (comparison info send)] [task={1}] [error={2}]'.format(
2123 self.log_base, task_str, traceback.format_exc()))
2124 else:
2125 self.online_log.info('{0} [comparison info send success] [task={1}] '
2126 '[res_id={2}]'.format(self.log_base, task_str, res_obj.id))
2127 else:
2128 # license_summary 为空
2129 self.online_log.info('{0} [task={1}] [no license_summary]'.format(self.log_base, task_str))
2061 try: 2130 try:
2062 is_ca = True if doc.document_scheme == consts.DOC_SCHEME_LIST[0] else False 2131 is_ca = True if doc.document_scheme == consts.DOC_SCHEME_LIST[0] else False
2063 # 更新OCR累计识别结果表 2132 # 更新OCR累计识别结果表
2064 if business_type == consts.HIL_PREFIX: 2133 if business_type == consts.HIL_PREFIX:
2065 result_class = HILOCRResult if is_ca else HILSEOCRResult 2134 result_class = HILOCRResult if is_ca else HILSEOCRResult
2066 res_obj = atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict) 2135 res_obj = atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict)
2067 else: 2136 else:
2068 result_class = AFCOCRResult if is_ca else AFCSEOCRResult 2137 result_class = AFCOCRResult if is_ca else AFCSEOCRResult
2069 res_obj = atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict) 2138 res_obj = atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict)
2070
2071 except Exception as e: 2139 except Exception as e:
2072 self.online_log.error( 2140 self.online_log.error(
2073 '{0} [process error (ocr result save)] [task={1}] [error={2}]'.format( 2141 '{0} [process error (ocr result save)] [task={1}] [error={2}]'.format(
2074 self.log_base, task_str, traceback.format_exc())) 2142 self.log_base, task_str, traceback.format_exc()))
2075 else:
2076 self.online_log.info('{0} [ocr result save success] [task={1}] [res_id={2}]'.format(
2077 self.log_base, task_str, res_obj.id))
2078 # 触发比对
2079 try:
2080 # 是否fsm
2081 cms_status_class = HILCmsStatusInfo if business_type in consts.HIL_SET else AFCCmsStatusInfo
2082 cms_status_info = cms_status_class.objects.filter(application_id=doc.application_id).first()
2083 is_fsm = cms_status_info is not None and cms_status_info.is_fsm == 1
2084 self.online_log.info('{0} [isfsm] [task={1}] [true or false={2}]'.format(
2085 self.log_base, task_str, is_fsm))
2086 if is_fsm:
2087 fsm_compare.apply_async((doc.application_id, business_type, None, res_obj.id, is_ca, True),
2088 queue='queue_compare')
2089 else:
2090 # pass
2091 compare.apply_async((doc.application_id, business_type, None, res_obj.id,
2092 is_ca, True), queue='queue_compare')
2093 except Exception as e:
2094 self.online_log.error(
2095 '{0} [process error (comparison info send)] [task={1}] [error={2}]'.format(
2096 self.log_base, task_str, traceback.format_exc()))
2097 else:
2098 self.online_log.info('{0} [comparison info send success] [task={1}] '
2099 '[res_id={2}]'.format(self.log_base, task_str, res_obj.id))
2100 2143
2101 # DDA处理 2144 # DDA处理
2102 if do_dda: 2145 if do_dda:
...@@ -2443,7 +2486,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -2443,7 +2486,7 @@ class Command(BaseCommand, LoggerMixin):
2443 self.online_log.info('{0} [stop safely]'.format(self.log_base)) 2486 self.online_log.info('{0} [stop safely]'.format(self.log_base))
2444 2487
2445 @transaction.atomic 2488 @transaction.atomic
2446 def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task_str, financial_statement_dict): 2489 def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task_str, financial_statement_dict, financial_explanation_dict):
2447 with transaction.atomic('afc'): 2490 with transaction.atomic('afc'):
2448 res_obj = result_class.objects.using('afc').select_for_update().filter(application_id=doc.application_id).first() 2491 res_obj = result_class.objects.using('afc').select_for_update().filter(application_id=doc.application_id).first()
2449 self.online_log.info('{0} [sql lock AFC application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id)) 2492 self.online_log.info('{0} [sql lock AFC application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id))
...@@ -2451,7 +2494,14 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task ...@@ -2451,7 +2494,14 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task
2451 res_obj = result_class() 2494 res_obj = result_class()
2452 res_obj.application_id = doc.application_id 2495 res_obj.application_id = doc.application_id
2453 self.online_log.info('{0} [res_obj is None application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id)) 2496 self.online_log.info('{0} [res_obj is None application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id))
2454 res_obj.fs_ocr = json.dumps([financial_statement_dict]) 2497 # 财务报表存入数据库
2498 if res_obj is not None:
2499 if financial_statement_dict:
2500 res_obj.fs_ocr = json.dumps([financial_statement_dict])
2501 # 财报情况说明存入数据库
2502 if res_obj is not None:
2503 if financial_explanation_dict:
2504 res_obj.fss_ocr = json.dumps([financial_explanation_dict])
2455 for classify, field in consts.RESULT_MAPPING.items(): 2505 for classify, field in consts.RESULT_MAPPING.items():
2456 if not hasattr(res_obj, field): 2506 if not hasattr(res_obj, field):
2457 continue 2507 continue
...@@ -2477,7 +2527,7 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task ...@@ -2477,7 +2527,7 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task
2477 return res_obj 2527 return res_obj
2478 2528
2479 @transaction.atomic 2529 @transaction.atomic
2480 def atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict): 2530 def atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict):
2481 with transaction.atomic('default'): 2531 with transaction.atomic('default'):
2482 res_obj = result_class.objects.using('default').select_for_update().filter(application_id=doc.application_id).first() 2532 res_obj = result_class.objects.using('default').select_for_update().filter(application_id=doc.application_id).first()
2483 self.online_log.info('{0} [sql lock HIL application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id)) 2533 self.online_log.info('{0} [sql lock HIL application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id))
...@@ -2485,7 +2535,14 @@ def atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, tas ...@@ -2485,7 +2535,14 @@ def atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, tas
2485 res_obj = result_class() 2535 res_obj = result_class()
2486 res_obj.application_id = doc.application_id 2536 res_obj.application_id = doc.application_id
2487 self.online_log.info('{0} [res_obj is None application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id)) 2537 self.online_log.info('{0} [res_obj is None application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id))
2488 res_obj.fs_ocr = json.dumps([financial_statement_dict]) 2538 # 财务报表三个表存入数据库
2539 if res_obj is not None:
2540 if financial_statement_dict:
2541 res_obj.fs_ocr = json.dumps([financial_statement_dict])
2542 # 财报情况说明存入数据库
2543 if res_obj is not None:
2544 if financial_explanation_dict:
2545 res_obj.fss_ocr = json.dumps([financial_explanation_dict])
2489 for classify, field in consts.RESULT_MAPPING.items(): 2546 for classify, field in consts.RESULT_MAPPING.items():
2490 if not hasattr(res_obj, field): 2547 if not hasattr(res_obj, field):
2491 continue 2548 continue
......
...@@ -829,6 +829,10 @@ class BSWorkbook(Workbook): ...@@ -829,6 +829,10 @@ class BSWorkbook(Workbook):
829 829
830 830
831 def financial_rebuild(self, financial_statement_dict): 831 def financial_rebuild(self, financial_statement_dict):
832 # 如果 financial_statement_dict 为空,则不创建表
833 if not financial_statement_dict:
834 return
835 # 如果 financial_statement_dict 不为空,则创建表
832 ws = self.create_sheet(consts.FINANCIAL_SHEET_NAME) 836 ws = self.create_sheet(consts.FINANCIAL_SHEET_NAME)
833 for fin_key, fin_value in financial_statement_dict.items(): 837 for fin_key, fin_value in financial_statement_dict.items():
834 table_str = "识别码" 838 table_str = "识别码"
...@@ -849,6 +853,27 @@ class BSWorkbook(Workbook): ...@@ -849,6 +853,27 @@ class BSWorkbook(Workbook):
849 ws.append(row) 853 ws.append(row)
850 854
851 855
856 def financial_explanation_rebuild(self, financial_explanation_dict):
857 """
858 Desc:
859 重构财报情况说明sheet
860 """
861 # 如果 financial_explanation_dict 为空,则不创建sheet
862 if not financial_explanation_dict:
863 return
864 # 如果 financial_explanation_dict 不为空, 则创建sheet
865 ws = self.create_sheet(consts.FINANCIAL_EXPLANATION_SHEET_NAME)
866 for fin_key, fin_value in financial_explanation_dict.items():
867 table_str = "公司名称"
868 if fin_key == "title":
869 table_str = "公司名称"
870 elif fin_key == "stamp":
871 table_str = "印章"
872
873 row = ["财报情况说明" + table_str, str(fin_value)]
874 ws.append(row)
875
876
852 @staticmethod 877 @staticmethod
853 def remove_yuan(amount_key_set, key, src_str): 878 def remove_yuan(amount_key_set, key, src_str):
854 if key in amount_key_set and isinstance(src_str, str): 879 if key in amount_key_set and isinstance(src_str, str):
...@@ -948,7 +973,7 @@ class BSWorkbook(Workbook): ...@@ -948,7 +973,7 @@ class BSWorkbook(Workbook):
948 if len(self.sheetnames) > 1: 973 if len(self.sheetnames) > 1:
949 self.remove(self.get_sheet_by_name('Sheet')) 974 self.remove(self.get_sheet_by_name('Sheet'))
950 975
951 def rebuild(self, bs_summary, license_summary, res_list, document_scheme, contract_result, metadata, financial_statement_dict): 976 def rebuild(self, bs_summary, license_summary, res_list, document_scheme, contract_result, metadata, financial_statement_dict, financial_explanation_dict):
952 res_count_tuple = self.res_sheet(res_list) 977 res_count_tuple = self.res_sheet(res_list)
953 978
954 count_list = [(consts.MODEL_FIELD_BS, len(bs_summary))] 979 count_list = [(consts.MODEL_FIELD_BS, len(bs_summary))]
...@@ -957,11 +982,13 @@ class BSWorkbook(Workbook): ...@@ -957,11 +982,13 @@ class BSWorkbook(Workbook):
957 self.contract_rebuild(contract_result) 982 self.contract_rebuild(contract_result)
958 self.bs_rebuild(bs_summary, res_count_tuple, metadata) 983 self.bs_rebuild(bs_summary, res_count_tuple, metadata)
959 self.financial_rebuild(financial_statement_dict) 984 self.financial_rebuild(financial_statement_dict)
985 self.financial_explanation_rebuild(financial_explanation_dict)
960 else: 986 else:
961 self.bs_rebuild(bs_summary, res_count_tuple, metadata) 987 self.bs_rebuild(bs_summary, res_count_tuple, metadata)
962 self.license_rebuild(license_summary, document_scheme, count_list) 988 self.license_rebuild(license_summary, document_scheme, count_list)
963 self.contract_rebuild(contract_result, True) 989 self.contract_rebuild(contract_result, True)
964 self.financial_rebuild(financial_statement_dict) 990 self.financial_rebuild(financial_statement_dict)
991 self.financial_explanation_rebuild(financial_explanation_dict)
965 self.move_res_sheet() 992 self.move_res_sheet()
966 self.remove_base_sheet() 993 self.remove_base_sheet()
967 return count_list, self.need_follow 994 return count_list, self.need_follow
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!