c811f8ce by 冯轩

Merge branch 'feature/CHINARPA-4659'

2 parents c576745b d2a1f3f2
...@@ -98,6 +98,7 @@ RES_SHEET_HEADER = ('页码', '图片序号', '检测图片序号', '结果') ...@@ -98,6 +98,7 @@ RES_SHEET_HEADER = ('页码', '图片序号', '检测图片序号', '结果')
98 RES_SUCCESS = '识别成功' 98 RES_SUCCESS = '识别成功'
99 RES_SUCCESS_OTHER = '识别成功(其他类)' 99 RES_SUCCESS_OTHER = '识别成功(其他类)'
100 RES_SUCCESS_EMPTY = '识别成功(空数据)' 100 RES_SUCCESS_EMPTY = '识别成功(空数据)'
101 RES_SUCCESS_FINANCIAL_STATEMENT = '识别成功(财务报表类)'
101 RES_FAILED = '识别失败' 102 RES_FAILED = '识别失败'
102 RES_FAILED_1 = '识别失败(阶段1)' 103 RES_FAILED_1 = '识别失败(阶段1)'
103 RES_FAILED_2 = '识别失败(阶段2)' 104 RES_FAILED_2 = '识别失败(阶段2)'
...@@ -2532,4 +2533,14 @@ FSM_ACTIVITED_STATUS = { ...@@ -2532,4 +2533,14 @@ FSM_ACTIVITED_STATUS = {
2532 "APIPN": "Activated-Invoice Passed-Non PT", 2533 "APIPN": "Activated-Invoice Passed-Non PT",
2533 "APIPP": "Activated-Invoice Passed-PT Doc Required", 2534 "APIPP": "Activated-Invoice Passed-PT Doc Required",
2534 "APARD": "Activated-Review done", 2535 "APARD": "Activated-Review done",
2535 }
...\ No newline at end of file ...\ No newline at end of file
2536 }
2537
2538 # 财务报表分类标签
2539 FINANCIAL_STATEMENT_CLASSIFY_LIST = [97, 98, 99]
2540 # 财务报表sheet名称
2541 FINANCIAL_SHEET_NAME = "财务报表"
2542
2543 # 财报情况说明分类标签
2544 FINANCIAL_EXPLANATION_CLASSIFY_LIST = [100]
2545 # 财报情况说明sheet名称
2546 FINANCIAL_EXPLANATION_SHEET_NAME = "财报情况说明"
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -1724,6 +1724,10 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1724,6 +1724,10 @@ class Command(BaseCommand, LoggerMixin):
1724 license_summary = {} 1724 license_summary = {}
1725 contract_result = {} 1725 contract_result = {}
1726 contract_result_compare = {} 1726 contract_result_compare = {}
1727 # 添加财报三个报表的处理
1728 financial_statement_dict = {}
1729 # 添加财报情况说明的处理
1730 financial_explanation_dict = {}
1727 res_list = [] 1731 res_list = []
1728 interest_keyword = Keywords.objects.filter( 1732 interest_keyword = Keywords.objects.filter(
1729 type=KeywordsType.INTEREST.value, on_off=True).values_list('keyword', flat=True) 1733 type=KeywordsType.INTEREST.value, on_off=True).values_list('keyword', flat=True)
...@@ -1753,6 +1757,44 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1753,6 +1757,44 @@ class Command(BaseCommand, LoggerMixin):
1753 self.online_log.warn('{0} [ocr_1 res error] [img={1}]'.format( 1757 self.online_log.warn('{0} [ocr_1 res error] [img={1}]'.format(
1754 self.log_base, img_path)) 1758 self.log_base, img_path))
1755 continue 1759 continue
1760 elif classify in consts.FINANCIAL_STATEMENT_CLASSIFY_LIST:
1761 # 添加到 res_list 中
1762 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_FINANCIAL_STATEMENT))
1763 # 只要分类为财报三个报表的,就在 financial_statement_dict 中添加对应的 code 和 stamp 两个dict
1764 if "code" not in financial_statement_dict:
1765 financial_statement_dict["code"] = {}
1766 if "stamp" not in financial_statement_dict:
1767 financial_statement_dict["stamp"] = {}
1768
1769 financial_statement_table_name = None
1770 if classify == 97:
1771 financial_statement_table_name = "balance_sheet"
1772 elif classify == 98:
1773 financial_statement_table_name = "income_statement"
1774 elif classify == 99:
1775 financial_statement_table_name = "cash_flow_statement"
1776 if financial_statement_table_name is not None:
1777 if "id_code" in ocr_data:
1778 id_code = ocr_data.get("id_code", "")
1779 financial_statement_dict["code"][financial_statement_table_name] = id_code
1780 if "stamp" in ocr_data:
1781 stamp = ocr_data.get("stamp", "")
1782 financial_statement_dict["stamp"][financial_statement_table_name] = stamp
1783 elif classify in consts.FINANCIAL_EXPLANATION_CLASSIFY_LIST:
1784 # 添加到 res_list 中
1785 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_FINANCIAL_STATEMENT))
1786 # 只要分类为财报情况说明的,就在 financial_explanation_dict 中添加对应的 title 和 stamp 两个dict
1787 if "title" not in financial_explanation_dict:
1788 financial_explanation_dict["title"] = {}
1789 if "stamp" not in financial_explanation_dict:
1790 financial_explanation_dict["stamp"] = {}
1791
1792 if "title" in ocr_data:
1793 title = ocr_data.get("title", "")
1794 financial_explanation_dict["title"] = title
1795 if "stamp" in ocr_data:
1796 stamp = ocr_data.get("stamp", "")
1797 financial_explanation_dict["stamp"] = stamp
1756 elif classify in consts.OTHER_CLASSIFY_SET: # 其他类 1798 elif classify in consts.OTHER_CLASSIFY_SET: # 其他类
1757 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_OTHER)) 1799 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_OTHER))
1758 continue 1800 continue
...@@ -1939,7 +1981,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1939,7 +1981,7 @@ class Command(BaseCommand, LoggerMixin):
1939 # src_excel_path = os.path.join(doc_data_path, 'src.xlsx') 1981 # src_excel_path = os.path.join(doc_data_path, 'src.xlsx')
1940 # wb.save(src_excel_path) 1982 # wb.save(src_excel_path)
1941 #need_follow表示在上传edms时文件名是否要添加"关注"两字 1983 #need_follow表示在上传edms时文件名是否要添加"关注"两字
1942 count_list, need_follow = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme, contract_result, doc.metadata) 1984 count_list, need_follow = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme, contract_result, doc.metadata, financial_statement_dict, financial_explanation_dict)
1943 wb.save(excel_path) 1985 wb.save(excel_path)
1944 1986
1945 except Exception as e: 1987 except Exception as e:
...@@ -2040,46 +2082,64 @@ class Command(BaseCommand, LoggerMixin): ...@@ -2040,46 +2082,64 @@ class Command(BaseCommand, LoggerMixin):
2040 license_summary[consts.BS_CLASSIFY] = bs_rebuild 2082 license_summary[consts.BS_CLASSIFY] = bs_rebuild
2041 2083
2042 # 比对 2084 # 比对
2043 if len(license_summary) > 0 and doc.document_scheme != consts.DOC_SCHEME_LIST[2]: 2085 if len(license_summary) > 0:
2086 if doc.document_scheme != consts.DOC_SCHEME_LIST[2]:
2087 # if len(license_summary) > 0 and doc.document_scheme != consts.DOC_SCHEME_LIST[2]:
2088 try:
2089 is_ca = True if doc.document_scheme == consts.DOC_SCHEME_LIST[0] else False
2090 # 更新OCR累计识别结果表
2091 if business_type == consts.HIL_PREFIX:
2092 result_class = HILOCRResult if is_ca else HILSEOCRResult
2093 res_obj = atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict)
2094 else:
2095 result_class = AFCOCRResult if is_ca else AFCSEOCRResult
2096 res_obj = atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict)
2097
2098 except Exception as e:
2099 self.online_log.error(
2100 '{0} [process error (ocr result save)] [task={1}] [error={2}]'.format(
2101 self.log_base, task_str, traceback.format_exc()))
2102 else:
2103 self.online_log.info('{0} [ocr result save success] [task={1}] [res_id={2}]'.format(
2104 self.log_base, task_str, res_obj.id))
2105 # 触发比对
2106 try:
2107 # 是否fsm
2108 cms_status_class = HILCmsStatusInfo if business_type in consts.HIL_SET else AFCCmsStatusInfo
2109 cms_status_info = cms_status_class.objects.filter(application_id=doc.application_id).first()
2110 is_fsm = cms_status_info is not None and cms_status_info.is_fsm == 1
2111 self.online_log.info('{0} [isfsm] [task={1}] [true or false={2}]'.format(
2112 self.log_base, task_str, is_fsm))
2113 if is_fsm:
2114 fsm_compare.apply_async((doc.application_id, business_type, None, res_obj.id, is_ca, True),
2115 queue='queue_compare')
2116 else:
2117 # pass
2118 compare.apply_async((doc.application_id, business_type, None, res_obj.id,
2119 is_ca, True), queue='queue_compare')
2120 except Exception as e:
2121 self.online_log.error(
2122 '{0} [process error (comparison info send)] [task={1}] [error={2}]'.format(
2123 self.log_base, task_str, traceback.format_exc()))
2124 else:
2125 self.online_log.info('{0} [comparison info send success] [task={1}] '
2126 '[res_id={2}]'.format(self.log_base, task_str, res_obj.id))
2127 else:
2128 # license_summary 为空
2129 self.online_log.info('{0} [task={1}] [no license_summary]'.format(self.log_base, task_str))
2044 try: 2130 try:
2045 is_ca = True if doc.document_scheme == consts.DOC_SCHEME_LIST[0] else False 2131 is_ca = True if doc.document_scheme == consts.DOC_SCHEME_LIST[0] else False
2046 # 更新OCR累计识别结果表 2132 # 更新OCR累计识别结果表
2047 if business_type == consts.HIL_PREFIX: 2133 if business_type == consts.HIL_PREFIX:
2048 result_class = HILOCRResult if is_ca else HILSEOCRResult 2134 result_class = HILOCRResult if is_ca else HILSEOCRResult
2049 res_obj = atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str) 2135 res_obj = atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict)
2050 else: 2136 else:
2051 result_class = AFCOCRResult if is_ca else AFCSEOCRResult 2137 result_class = AFCOCRResult if is_ca else AFCSEOCRResult
2052 res_obj = atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str) 2138 res_obj = atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict)
2053
2054 except Exception as e: 2139 except Exception as e:
2055 self.online_log.error( 2140 self.online_log.error(
2056 '{0} [process error (ocr result save)] [task={1}] [error={2}]'.format( 2141 '{0} [process error (ocr result save)] [task={1}] [error={2}]'.format(
2057 self.log_base, task_str, traceback.format_exc())) 2142 self.log_base, task_str, traceback.format_exc()))
2058 else:
2059 self.online_log.info('{0} [ocr result save success] [task={1}] [res_id={2}]'.format(
2060 self.log_base, task_str, res_obj.id))
2061 # 触发比对
2062 try:
2063 # 是否fsm
2064 cms_status_class = HILCmsStatusInfo if business_type in consts.HIL_SET else AFCCmsStatusInfo
2065 cms_status_info = cms_status_class.objects.filter(application_id=doc.application_id).first()
2066 is_fsm = cms_status_info is not None and cms_status_info.is_fsm == 1
2067 self.online_log.info('{0} [isfsm] [task={1}] [true or false={2}]'.format(
2068 self.log_base, task_str, is_fsm))
2069 if is_fsm:
2070 fsm_compare.apply_async((doc.application_id, business_type, None, res_obj.id, is_ca, True),
2071 queue='queue_compare')
2072 else:
2073 # pass
2074 compare.apply_async((doc.application_id, business_type, None, res_obj.id,
2075 is_ca, True), queue='queue_compare')
2076 except Exception as e:
2077 self.online_log.error(
2078 '{0} [process error (comparison info send)] [task={1}] [error={2}]'.format(
2079 self.log_base, task_str, traceback.format_exc()))
2080 else:
2081 self.online_log.info('{0} [comparison info send success] [task={1}] '
2082 '[res_id={2}]'.format(self.log_base, task_str, res_obj.id))
2083 2143
2084 # DDA处理 2144 # DDA处理
2085 if do_dda: 2145 if do_dda:
...@@ -2426,7 +2486,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -2426,7 +2486,7 @@ class Command(BaseCommand, LoggerMixin):
2426 self.online_log.info('{0} [stop safely]'.format(self.log_base)) 2486 self.online_log.info('{0} [stop safely]'.format(self.log_base))
2427 2487
2428 @transaction.atomic 2488 @transaction.atomic
2429 def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task_str): 2489 def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task_str, financial_statement_dict, financial_explanation_dict):
2430 with transaction.atomic('afc'): 2490 with transaction.atomic('afc'):
2431 res_obj = result_class.objects.using('afc').select_for_update().filter(application_id=doc.application_id).first() 2491 res_obj = result_class.objects.using('afc').select_for_update().filter(application_id=doc.application_id).first()
2432 self.online_log.info('{0} [sql lock AFC application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id)) 2492 self.online_log.info('{0} [sql lock AFC application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id))
...@@ -2434,6 +2494,14 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task ...@@ -2434,6 +2494,14 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task
2434 res_obj = result_class() 2494 res_obj = result_class()
2435 res_obj.application_id = doc.application_id 2495 res_obj.application_id = doc.application_id
2436 self.online_log.info('{0} [res_obj is None application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id)) 2496 self.online_log.info('{0} [res_obj is None application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id))
2497 # 财务报表存入数据库
2498 if res_obj is not None:
2499 if financial_statement_dict:
2500 res_obj.fs_ocr = json.dumps([financial_statement_dict])
2501 # 财报情况说明存入数据库
2502 if res_obj is not None:
2503 if financial_explanation_dict:
2504 res_obj.fss_ocr = json.dumps([financial_explanation_dict])
2437 for classify, field in consts.RESULT_MAPPING.items(): 2505 for classify, field in consts.RESULT_MAPPING.items():
2438 if not hasattr(res_obj, field): 2506 if not hasattr(res_obj, field):
2439 continue 2507 continue
...@@ -2459,7 +2527,7 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task ...@@ -2459,7 +2527,7 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task
2459 return res_obj 2527 return res_obj
2460 2528
2461 @transaction.atomic 2529 @transaction.atomic
2462 def atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str): 2530 def atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict):
2463 with transaction.atomic('default'): 2531 with transaction.atomic('default'):
2464 res_obj = result_class.objects.using('default').select_for_update().filter(application_id=doc.application_id).first() 2532 res_obj = result_class.objects.using('default').select_for_update().filter(application_id=doc.application_id).first()
2465 self.online_log.info('{0} [sql lock HIL application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id)) 2533 self.online_log.info('{0} [sql lock HIL application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id))
...@@ -2467,6 +2535,14 @@ def atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, tas ...@@ -2467,6 +2535,14 @@ def atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, tas
2467 res_obj = result_class() 2535 res_obj = result_class()
2468 res_obj.application_id = doc.application_id 2536 res_obj.application_id = doc.application_id
2469 self.online_log.info('{0} [res_obj is None application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id)) 2537 self.online_log.info('{0} [res_obj is None application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id))
2538 # 财务报表三个表存入数据库
2539 if res_obj is not None:
2540 if financial_statement_dict:
2541 res_obj.fs_ocr = json.dumps([financial_statement_dict])
2542 # 财报情况说明存入数据库
2543 if res_obj is not None:
2544 if financial_explanation_dict:
2545 res_obj.fss_ocr = json.dumps([financial_explanation_dict])
2470 for classify, field in consts.RESULT_MAPPING.items(): 2546 for classify, field in consts.RESULT_MAPPING.items():
2471 if not hasattr(res_obj, field): 2547 if not hasattr(res_obj, field):
2472 continue 2548 continue
......
...@@ -827,6 +827,53 @@ class BSWorkbook(Workbook): ...@@ -827,6 +827,53 @@ class BSWorkbook(Workbook):
827 ws.append(row) 827 ws.append(row)
828 ws.append((None, )) 828 ws.append((None, ))
829 829
830
831 def financial_rebuild(self, financial_statement_dict):
832 # 如果 financial_statement_dict 为空,则不创建表
833 if not financial_statement_dict:
834 return
835 # 如果 financial_statement_dict 不为空,则创建表
836 ws = self.create_sheet(consts.FINANCIAL_SHEET_NAME)
837 for fin_key, fin_value in financial_statement_dict.items():
838 table_str = "识别码"
839 if fin_key == "code":
840 table_str = "识别码"
841 elif fin_key == "stamp":
842 table_str = "印章"
843
844 for table_key, table_value in fin_value.items():
845 if table_key == "balance_sheet":
846 row = ["资产负债表" + table_str, str(table_value)]
847 ws.append(row)
848 elif table_key == "income_statement":
849 row = ["利润表" + table_str, str(table_value)]
850 ws.append(row)
851 elif table_key == "cash_flow_statement":
852 row = ["现金流量表" + table_str, str(table_value)]
853 ws.append(row)
854
855
856 def financial_explanation_rebuild(self, financial_explanation_dict):
857 """
858 Desc:
859 重构财报情况说明sheet
860 """
861 # 如果 financial_explanation_dict 为空,则不创建sheet
862 if not financial_explanation_dict:
863 return
864 # 如果 financial_explanation_dict 不为空, 则创建sheet
865 ws = self.create_sheet(consts.FINANCIAL_EXPLANATION_SHEET_NAME)
866 for fin_key, fin_value in financial_explanation_dict.items():
867 table_str = "公司名称"
868 if fin_key == "title":
869 table_str = "公司名称"
870 elif fin_key == "stamp":
871 table_str = "印章"
872
873 row = ["财报情况说明" + table_str, str(fin_value)]
874 ws.append(row)
875
876
830 @staticmethod 877 @staticmethod
831 def remove_yuan(amount_key_set, key, src_str): 878 def remove_yuan(amount_key_set, key, src_str):
832 if key in amount_key_set and isinstance(src_str, str): 879 if key in amount_key_set and isinstance(src_str, str):
...@@ -926,7 +973,7 @@ class BSWorkbook(Workbook): ...@@ -926,7 +973,7 @@ class BSWorkbook(Workbook):
926 if len(self.sheetnames) > 1: 973 if len(self.sheetnames) > 1:
927 self.remove(self.get_sheet_by_name('Sheet')) 974 self.remove(self.get_sheet_by_name('Sheet'))
928 975
929 def rebuild(self, bs_summary, license_summary, res_list, document_scheme, contract_result, metadata): 976 def rebuild(self, bs_summary, license_summary, res_list, document_scheme, contract_result, metadata, financial_statement_dict, financial_explanation_dict):
930 res_count_tuple = self.res_sheet(res_list) 977 res_count_tuple = self.res_sheet(res_list)
931 978
932 count_list = [(consts.MODEL_FIELD_BS, len(bs_summary))] 979 count_list = [(consts.MODEL_FIELD_BS, len(bs_summary))]
...@@ -934,10 +981,14 @@ class BSWorkbook(Workbook): ...@@ -934,10 +981,14 @@ class BSWorkbook(Workbook):
934 self.license_rebuild(license_summary, document_scheme, count_list) 981 self.license_rebuild(license_summary, document_scheme, count_list)
935 self.contract_rebuild(contract_result) 982 self.contract_rebuild(contract_result)
936 self.bs_rebuild(bs_summary, res_count_tuple, metadata) 983 self.bs_rebuild(bs_summary, res_count_tuple, metadata)
984 self.financial_rebuild(financial_statement_dict)
985 self.financial_explanation_rebuild(financial_explanation_dict)
937 else: 986 else:
938 self.bs_rebuild(bs_summary, res_count_tuple, metadata) 987 self.bs_rebuild(bs_summary, res_count_tuple, metadata)
939 self.license_rebuild(license_summary, document_scheme, count_list) 988 self.license_rebuild(license_summary, document_scheme, count_list)
940 self.contract_rebuild(contract_result, True) 989 self.contract_rebuild(contract_result, True)
990 self.financial_rebuild(financial_statement_dict)
991 self.financial_explanation_rebuild(financial_explanation_dict)
941 self.move_res_sheet() 992 self.move_res_sheet()
942 self.remove_base_sheet() 993 self.remove_base_sheet()
943 return count_list, self.need_follow 994 return count_list, self.need_follow
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!