Merge branch 'feature/CHINARPA-4659' into feature/uat-tmp
Showing
3 changed files
with
98 additions
and
8 deletions
... | @@ -98,6 +98,7 @@ RES_SHEET_HEADER = ('页码', '图片序号', '检测图片序号', '结果') | ... | @@ -98,6 +98,7 @@ RES_SHEET_HEADER = ('页码', '图片序号', '检测图片序号', '结果') |
98 | RES_SUCCESS = '识别成功' | 98 | RES_SUCCESS = '识别成功' |
99 | RES_SUCCESS_OTHER = '识别成功(其他类)' | 99 | RES_SUCCESS_OTHER = '识别成功(其他类)' |
100 | RES_SUCCESS_EMPTY = '识别成功(空数据)' | 100 | RES_SUCCESS_EMPTY = '识别成功(空数据)' |
101 | RES_SUCCESS_FINANCIAL_STATEMENT = '识别成功(财务报表类)' | ||
101 | RES_FAILED = '识别失败' | 102 | RES_FAILED = '识别失败' |
102 | RES_FAILED_1 = '识别失败(阶段1)' | 103 | RES_FAILED_1 = '识别失败(阶段1)' |
103 | RES_FAILED_2 = '识别失败(阶段2)' | 104 | RES_FAILED_2 = '识别失败(阶段2)' |
... | @@ -2563,3 +2564,8 @@ FSM_ACTIVITED_STATUS = { | ... | @@ -2563,3 +2564,8 @@ FSM_ACTIVITED_STATUS = { |
2563 | FINANCIAL_STATEMENT_CLASSIFY_LIST = [97, 98, 99] | 2564 | FINANCIAL_STATEMENT_CLASSIFY_LIST = [97, 98, 99] |
2564 | # 财务报表sheet名称 | 2565 | # 财务报表sheet名称 |
2565 | FINANCIAL_SHEET_NAME = "财务报表" | 2566 | FINANCIAL_SHEET_NAME = "财务报表" |
2567 | |||
2568 | # 财报情况说明分类标签 | ||
2569 | FINANCIAL_EXPLANATION_CLASSIFY_LIST = [100] | ||
2570 | # 财报情况说明sheet名称 | ||
2571 | FINANCIAL_EXPLANATION_SHEET_NAME = "财报情况说明" | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
... | @@ -1725,7 +1725,9 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1725,7 +1725,9 @@ class Command(BaseCommand, LoggerMixin): |
1725 | contract_result = {} | 1725 | contract_result = {} |
1726 | contract_result_compare = {} | 1726 | contract_result_compare = {} |
1727 | # 添加财报三个报表的处理 | 1727 | # 添加财报三个报表的处理 |
1728 | financial_statement_dict = {"code": {}, "stamp": {}} | 1728 | financial_statement_dict = {} |
1729 | # 添加财报情况说明的处理 | ||
1730 | financial_explanation_dict = {} | ||
1729 | res_list = [] | 1731 | res_list = [] |
1730 | interest_keyword = Keywords.objects.filter( | 1732 | interest_keyword = Keywords.objects.filter( |
1731 | type=KeywordsType.INTEREST.value, on_off=True).values_list('keyword', flat=True) | 1733 | type=KeywordsType.INTEREST.value, on_off=True).values_list('keyword', flat=True) |
... | @@ -1756,6 +1758,14 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1756,6 +1758,14 @@ class Command(BaseCommand, LoggerMixin): |
1756 | self.log_base, img_path)) | 1758 | self.log_base, img_path)) |
1757 | continue | 1759 | continue |
1758 | elif classify in consts.FINANCIAL_STATEMENT_CLASSIFY_LIST: | 1760 | elif classify in consts.FINANCIAL_STATEMENT_CLASSIFY_LIST: |
1761 | # 添加到 res_list 中 | ||
1762 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_FINANCIAL_STATEMENT)) | ||
1763 | # 只要分类为财报三个报表的,就在 financial_statement_dict 中添加对应的 code 和 stamp 两个dict | ||
1764 | if "code" not in financial_statement_dict: | ||
1765 | financial_statement_dict["code"] = {} | ||
1766 | if "stamp" not in financial_statement_dict: | ||
1767 | financial_statement_dict["stamp"] = {} | ||
1768 | |||
1759 | financial_statement_table_name = None | 1769 | financial_statement_table_name = None |
1760 | if classify == 97: | 1770 | if classify == 97: |
1761 | financial_statement_table_name = "balance_sheet" | 1771 | financial_statement_table_name = "balance_sheet" |
... | @@ -1770,6 +1780,21 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1770,6 +1780,21 @@ class Command(BaseCommand, LoggerMixin): |
1770 | if "stamp" in ocr_data: | 1780 | if "stamp" in ocr_data: |
1771 | stamp = ocr_data.get("stamp", "") | 1781 | stamp = ocr_data.get("stamp", "") |
1772 | financial_statement_dict["stamp"][financial_statement_table_name] = stamp | 1782 | financial_statement_dict["stamp"][financial_statement_table_name] = stamp |
1783 | elif classify in consts.FINANCIAL_EXPLANATION_CLASSIFY_LIST: | ||
1784 | # 添加到 res_list 中 | ||
1785 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_FINANCIAL_STATEMENT)) | ||
1786 | # 只要分类为财报情况说明的,就在 financial_explanation_dict 中添加对应的 title 和 stamp 两个dict | ||
1787 | if "title" not in financial_explanation_dict: | ||
1788 | financial_explanation_dict["title"] = {} | ||
1789 | if "stamp" not in financial_explanation_dict: | ||
1790 | financial_explanation_dict["stamp"] = {} | ||
1791 | |||
1792 | if "title" in ocr_data: | ||
1793 | title = ocr_data.get("title", "") | ||
1794 | financial_explanation_dict["title"] = title | ||
1795 | if "stamp" in ocr_data: | ||
1796 | stamp = ocr_data.get("stamp", "") | ||
1797 | financial_explanation_dict["stamp"] = stamp | ||
1773 | elif classify in consts.OTHER_CLASSIFY_SET: # 其他类 | 1798 | elif classify in consts.OTHER_CLASSIFY_SET: # 其他类 |
1774 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_OTHER)) | 1799 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_OTHER)) |
1775 | continue | 1800 | continue |
... | @@ -1956,7 +1981,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1956,7 +1981,7 @@ class Command(BaseCommand, LoggerMixin): |
1956 | # src_excel_path = os.path.join(doc_data_path, 'src.xlsx') | 1981 | # src_excel_path = os.path.join(doc_data_path, 'src.xlsx') |
1957 | # wb.save(src_excel_path) | 1982 | # wb.save(src_excel_path) |
1958 | #need_follow表示在上传edms时文件名是否要添加"关注"两字 | 1983 | #need_follow表示在上传edms时文件名是否要添加"关注"两字 |
1959 | count_list, need_follow = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme, contract_result, doc.metadata, financial_statement_dict) | 1984 | count_list, need_follow = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme, contract_result, doc.metadata, financial_statement_dict, financial_explanation_dict) |
1960 | wb.save(excel_path) | 1985 | wb.save(excel_path) |
1961 | 1986 | ||
1962 | except Exception as e: | 1987 | except Exception as e: |
... | @@ -2057,16 +2082,18 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -2057,16 +2082,18 @@ class Command(BaseCommand, LoggerMixin): |
2057 | license_summary[consts.BS_CLASSIFY] = bs_rebuild | 2082 | license_summary[consts.BS_CLASSIFY] = bs_rebuild |
2058 | 2083 | ||
2059 | # 比对 | 2084 | # 比对 |
2060 | if len(license_summary) > 0 and doc.document_scheme != consts.DOC_SCHEME_LIST[2]: | 2085 | if len(license_summary) > 0: |
2086 | if doc.document_scheme != consts.DOC_SCHEME_LIST[2]: | ||
2087 | # if len(license_summary) > 0 and doc.document_scheme != consts.DOC_SCHEME_LIST[2]: | ||
2061 | try: | 2088 | try: |
2062 | is_ca = True if doc.document_scheme == consts.DOC_SCHEME_LIST[0] else False | 2089 | is_ca = True if doc.document_scheme == consts.DOC_SCHEME_LIST[0] else False |
2063 | # 更新OCR累计识别结果表 | 2090 | # 更新OCR累计识别结果表 |
2064 | if business_type == consts.HIL_PREFIX: | 2091 | if business_type == consts.HIL_PREFIX: |
2065 | result_class = HILOCRResult if is_ca else HILSEOCRResult | 2092 | result_class = HILOCRResult if is_ca else HILSEOCRResult |
2066 | res_obj = atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict) | 2093 | res_obj = atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict) |
2067 | else: | 2094 | else: |
2068 | result_class = AFCOCRResult if is_ca else AFCSEOCRResult | 2095 | result_class = AFCOCRResult if is_ca else AFCSEOCRResult |
2069 | res_obj = atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict) | 2096 | res_obj = atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict) |
2070 | 2097 | ||
2071 | except Exception as e: | 2098 | except Exception as e: |
2072 | self.online_log.error( | 2099 | self.online_log.error( |
... | @@ -2097,6 +2124,22 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -2097,6 +2124,22 @@ class Command(BaseCommand, LoggerMixin): |
2097 | else: | 2124 | else: |
2098 | self.online_log.info('{0} [comparison info send success] [task={1}] ' | 2125 | self.online_log.info('{0} [comparison info send success] [task={1}] ' |
2099 | '[res_id={2}]'.format(self.log_base, task_str, res_obj.id)) | 2126 | '[res_id={2}]'.format(self.log_base, task_str, res_obj.id)) |
2127 | else: | ||
2128 | # license_summary 为空 | ||
2129 | self.online_log.info('{0} [task={1}] [no license_summary]'.format(self.log_base, task_str)) | ||
2130 | try: | ||
2131 | is_ca = True if doc.document_scheme == consts.DOC_SCHEME_LIST[0] else False | ||
2132 | # 更新OCR累计识别结果表 | ||
2133 | if business_type == consts.HIL_PREFIX: | ||
2134 | result_class = HILOCRResult if is_ca else HILSEOCRResult | ||
2135 | res_obj = atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict) | ||
2136 | else: | ||
2137 | result_class = AFCOCRResult if is_ca else AFCSEOCRResult | ||
2138 | res_obj = atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict) | ||
2139 | except Exception as e: | ||
2140 | self.online_log.error( | ||
2141 | '{0} [process error (ocr result save)] [task={1}] [error={2}]'.format( | ||
2142 | self.log_base, task_str, traceback.format_exc())) | ||
2100 | 2143 | ||
2101 | # DDA处理 | 2144 | # DDA处理 |
2102 | if do_dda: | 2145 | if do_dda: |
... | @@ -2443,7 +2486,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -2443,7 +2486,7 @@ class Command(BaseCommand, LoggerMixin): |
2443 | self.online_log.info('{0} [stop safely]'.format(self.log_base)) | 2486 | self.online_log.info('{0} [stop safely]'.format(self.log_base)) |
2444 | 2487 | ||
2445 | @transaction.atomic | 2488 | @transaction.atomic |
2446 | def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task_str, financial_statement_dict): | 2489 | def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task_str, financial_statement_dict, financial_explanation_dict): |
2447 | with transaction.atomic('afc'): | 2490 | with transaction.atomic('afc'): |
2448 | res_obj = result_class.objects.using('afc').select_for_update().filter(application_id=doc.application_id).first() | 2491 | res_obj = result_class.objects.using('afc').select_for_update().filter(application_id=doc.application_id).first() |
2449 | self.online_log.info('{0} [sql lock AFC application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id)) | 2492 | self.online_log.info('{0} [sql lock AFC application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id)) |
... | @@ -2451,7 +2494,14 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task | ... | @@ -2451,7 +2494,14 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task |
2451 | res_obj = result_class() | 2494 | res_obj = result_class() |
2452 | res_obj.application_id = doc.application_id | 2495 | res_obj.application_id = doc.application_id |
2453 | self.online_log.info('{0} [res_obj is None application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id)) | 2496 | self.online_log.info('{0} [res_obj is None application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id)) |
2497 | # 财务报表存入数据库 | ||
2498 | if res_obj is not None: | ||
2499 | if financial_statement_dict: | ||
2454 | res_obj.fs_ocr = json.dumps([financial_statement_dict]) | 2500 | res_obj.fs_ocr = json.dumps([financial_statement_dict]) |
2501 | # 财报情况说明存入数据库 | ||
2502 | if res_obj is not None: | ||
2503 | if financial_explanation_dict: | ||
2504 | res_obj.fss_ocr = json.dumps([financial_explanation_dict]) | ||
2455 | for classify, field in consts.RESULT_MAPPING.items(): | 2505 | for classify, field in consts.RESULT_MAPPING.items(): |
2456 | if not hasattr(res_obj, field): | 2506 | if not hasattr(res_obj, field): |
2457 | continue | 2507 | continue |
... | @@ -2477,7 +2527,7 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task | ... | @@ -2477,7 +2527,7 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task |
2477 | return res_obj | 2527 | return res_obj |
2478 | 2528 | ||
2479 | @transaction.atomic | 2529 | @transaction.atomic |
2480 | def atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict): | 2530 | def atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict): |
2481 | with transaction.atomic('default'): | 2531 | with transaction.atomic('default'): |
2482 | res_obj = result_class.objects.using('default').select_for_update().filter(application_id=doc.application_id).first() | 2532 | res_obj = result_class.objects.using('default').select_for_update().filter(application_id=doc.application_id).first() |
2483 | self.online_log.info('{0} [sql lock HIL application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id)) | 2533 | self.online_log.info('{0} [sql lock HIL application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id)) |
... | @@ -2485,7 +2535,14 @@ def atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, tas | ... | @@ -2485,7 +2535,14 @@ def atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, tas |
2485 | res_obj = result_class() | 2535 | res_obj = result_class() |
2486 | res_obj.application_id = doc.application_id | 2536 | res_obj.application_id = doc.application_id |
2487 | self.online_log.info('{0} [res_obj is None application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id)) | 2537 | self.online_log.info('{0} [res_obj is None application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id)) |
2538 | # 财务报表三个表存入数据库 | ||
2539 | if res_obj is not None: | ||
2540 | if financial_statement_dict: | ||
2488 | res_obj.fs_ocr = json.dumps([financial_statement_dict]) | 2541 | res_obj.fs_ocr = json.dumps([financial_statement_dict]) |
2542 | # 财报情况说明存入数据库 | ||
2543 | if res_obj is not None: | ||
2544 | if financial_explanation_dict: | ||
2545 | res_obj.fss_ocr = json.dumps([financial_explanation_dict]) | ||
2489 | for classify, field in consts.RESULT_MAPPING.items(): | 2546 | for classify, field in consts.RESULT_MAPPING.items(): |
2490 | if not hasattr(res_obj, field): | 2547 | if not hasattr(res_obj, field): |
2491 | continue | 2548 | continue | ... | ... |
... | @@ -829,6 +829,10 @@ class BSWorkbook(Workbook): | ... | @@ -829,6 +829,10 @@ class BSWorkbook(Workbook): |
829 | 829 | ||
830 | 830 | ||
831 | def financial_rebuild(self, financial_statement_dict): | 831 | def financial_rebuild(self, financial_statement_dict): |
832 | # 如果 financial_statement_dict 为空,则不创建表 | ||
833 | if not financial_statement_dict: | ||
834 | return | ||
835 | # 如果 financial_statement_dict 不为空,则创建表 | ||
832 | ws = self.create_sheet(consts.FINANCIAL_SHEET_NAME) | 836 | ws = self.create_sheet(consts.FINANCIAL_SHEET_NAME) |
833 | for fin_key, fin_value in financial_statement_dict.items(): | 837 | for fin_key, fin_value in financial_statement_dict.items(): |
834 | table_str = "识别码" | 838 | table_str = "识别码" |
... | @@ -849,6 +853,27 @@ class BSWorkbook(Workbook): | ... | @@ -849,6 +853,27 @@ class BSWorkbook(Workbook): |
849 | ws.append(row) | 853 | ws.append(row) |
850 | 854 | ||
851 | 855 | ||
856 | def financial_explanation_rebuild(self, financial_explanation_dict): | ||
857 | """ | ||
858 | Desc: | ||
859 | 重构财报情况说明sheet | ||
860 | """ | ||
861 | # 如果 financial_explanation_dict 为空,则不创建sheet | ||
862 | if not financial_explanation_dict: | ||
863 | return | ||
864 | # 如果 financial_explanation_dict 不为空, 则创建sheet | ||
865 | ws = self.create_sheet(consts.FINANCIAL_EXPLANATION_SHEET_NAME) | ||
866 | for fin_key, fin_value in financial_explanation_dict.items(): | ||
867 | table_str = "公司名称" | ||
868 | if fin_key == "title": | ||
869 | table_str = "公司名称" | ||
870 | elif fin_key == "stamp": | ||
871 | table_str = "印章" | ||
872 | |||
873 | row = ["财报情况说明" + table_str, str(fin_value)] | ||
874 | ws.append(row) | ||
875 | |||
876 | |||
852 | @staticmethod | 877 | @staticmethod |
853 | def remove_yuan(amount_key_set, key, src_str): | 878 | def remove_yuan(amount_key_set, key, src_str): |
854 | if key in amount_key_set and isinstance(src_str, str): | 879 | if key in amount_key_set and isinstance(src_str, str): |
... | @@ -948,7 +973,7 @@ class BSWorkbook(Workbook): | ... | @@ -948,7 +973,7 @@ class BSWorkbook(Workbook): |
948 | if len(self.sheetnames) > 1: | 973 | if len(self.sheetnames) > 1: |
949 | self.remove(self.get_sheet_by_name('Sheet')) | 974 | self.remove(self.get_sheet_by_name('Sheet')) |
950 | 975 | ||
951 | def rebuild(self, bs_summary, license_summary, res_list, document_scheme, contract_result, metadata, financial_statement_dict): | 976 | def rebuild(self, bs_summary, license_summary, res_list, document_scheme, contract_result, metadata, financial_statement_dict, financial_explanation_dict): |
952 | res_count_tuple = self.res_sheet(res_list) | 977 | res_count_tuple = self.res_sheet(res_list) |
953 | 978 | ||
954 | count_list = [(consts.MODEL_FIELD_BS, len(bs_summary))] | 979 | count_list = [(consts.MODEL_FIELD_BS, len(bs_summary))] |
... | @@ -957,11 +982,13 @@ class BSWorkbook(Workbook): | ... | @@ -957,11 +982,13 @@ class BSWorkbook(Workbook): |
957 | self.contract_rebuild(contract_result) | 982 | self.contract_rebuild(contract_result) |
958 | self.bs_rebuild(bs_summary, res_count_tuple, metadata) | 983 | self.bs_rebuild(bs_summary, res_count_tuple, metadata) |
959 | self.financial_rebuild(financial_statement_dict) | 984 | self.financial_rebuild(financial_statement_dict) |
985 | self.financial_explanation_rebuild(financial_explanation_dict) | ||
960 | else: | 986 | else: |
961 | self.bs_rebuild(bs_summary, res_count_tuple, metadata) | 987 | self.bs_rebuild(bs_summary, res_count_tuple, metadata) |
962 | self.license_rebuild(license_summary, document_scheme, count_list) | 988 | self.license_rebuild(license_summary, document_scheme, count_list) |
963 | self.contract_rebuild(contract_result, True) | 989 | self.contract_rebuild(contract_result, True) |
964 | self.financial_rebuild(financial_statement_dict) | 990 | self.financial_rebuild(financial_statement_dict) |
991 | self.financial_explanation_rebuild(financial_explanation_dict) | ||
965 | self.move_res_sheet() | 992 | self.move_res_sheet() |
966 | self.remove_base_sheet() | 993 | self.remove_base_sheet() |
967 | return count_list, self.need_follow | 994 | return count_list, self.need_follow | ... | ... |
-
Please register or sign in to post a comment