Merge branch 'feature/CHINARPA-4659'
Showing
3 changed files
with
145 additions
and
7 deletions
| ... | @@ -98,6 +98,7 @@ RES_SHEET_HEADER = ('页码', '图片序号', '检测图片序号', '结果') | ... | @@ -98,6 +98,7 @@ RES_SHEET_HEADER = ('页码', '图片序号', '检测图片序号', '结果') |
| 98 | RES_SUCCESS = '识别成功' | 98 | RES_SUCCESS = '识别成功' |
| 99 | RES_SUCCESS_OTHER = '识别成功(其他类)' | 99 | RES_SUCCESS_OTHER = '识别成功(其他类)' |
| 100 | RES_SUCCESS_EMPTY = '识别成功(空数据)' | 100 | RES_SUCCESS_EMPTY = '识别成功(空数据)' |
| 101 | RES_SUCCESS_FINANCIAL_STATEMENT = '识别成功(财务报表类)' | ||
| 101 | RES_FAILED = '识别失败' | 102 | RES_FAILED = '识别失败' |
| 102 | RES_FAILED_1 = '识别失败(阶段1)' | 103 | RES_FAILED_1 = '识别失败(阶段1)' |
| 103 | RES_FAILED_2 = '识别失败(阶段2)' | 104 | RES_FAILED_2 = '识别失败(阶段2)' |
| ... | @@ -2533,3 +2534,13 @@ FSM_ACTIVITED_STATUS = { | ... | @@ -2533,3 +2534,13 @@ FSM_ACTIVITED_STATUS = { |
| 2533 | "APIPP": "Activated-Invoice Passed-PT Doc Required", | 2534 | "APIPP": "Activated-Invoice Passed-PT Doc Required", |
| 2534 | "APARD": "Activated-Review done", | 2535 | "APARD": "Activated-Review done", |
| 2535 | } | 2536 | } |
| 2537 | |||
| 2538 | # 财务报表分类标签 | ||
| 2539 | FINANCIAL_STATEMENT_CLASSIFY_LIST = [97, 98, 99] | ||
| 2540 | # 财务报表sheet名称 | ||
| 2541 | FINANCIAL_SHEET_NAME = "财务报表" | ||
| 2542 | |||
| 2543 | # 财报情况说明分类标签 | ||
| 2544 | FINANCIAL_EXPLANATION_CLASSIFY_LIST = [100] | ||
| 2545 | # 财报情况说明sheet名称 | ||
| 2546 | FINANCIAL_EXPLANATION_SHEET_NAME = "财报情况说明" | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
| ... | @@ -1724,6 +1724,10 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1724,6 +1724,10 @@ class Command(BaseCommand, LoggerMixin): |
| 1724 | license_summary = {} | 1724 | license_summary = {} |
| 1725 | contract_result = {} | 1725 | contract_result = {} |
| 1726 | contract_result_compare = {} | 1726 | contract_result_compare = {} |
| 1727 | # 添加财报三个报表的处理 | ||
| 1728 | financial_statement_dict = {} | ||
| 1729 | # 添加财报情况说明的处理 | ||
| 1730 | financial_explanation_dict = {} | ||
| 1727 | res_list = [] | 1731 | res_list = [] |
| 1728 | interest_keyword = Keywords.objects.filter( | 1732 | interest_keyword = Keywords.objects.filter( |
| 1729 | type=KeywordsType.INTEREST.value, on_off=True).values_list('keyword', flat=True) | 1733 | type=KeywordsType.INTEREST.value, on_off=True).values_list('keyword', flat=True) |
| ... | @@ -1753,6 +1757,44 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1753,6 +1757,44 @@ class Command(BaseCommand, LoggerMixin): |
| 1753 | self.online_log.warn('{0} [ocr_1 res error] [img={1}]'.format( | 1757 | self.online_log.warn('{0} [ocr_1 res error] [img={1}]'.format( |
| 1754 | self.log_base, img_path)) | 1758 | self.log_base, img_path)) |
| 1755 | continue | 1759 | continue |
| 1760 | elif classify in consts.FINANCIAL_STATEMENT_CLASSIFY_LIST: | ||
| 1761 | # 添加到 res_list 中 | ||
| 1762 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_FINANCIAL_STATEMENT)) | ||
| 1763 | # 只要分类为财报三个报表的,就在 financial_statement_dict 中添加对应的 code 和 stamp 两个dict | ||
| 1764 | if "code" not in financial_statement_dict: | ||
| 1765 | financial_statement_dict["code"] = {} | ||
| 1766 | if "stamp" not in financial_statement_dict: | ||
| 1767 | financial_statement_dict["stamp"] = {} | ||
| 1768 | |||
| 1769 | financial_statement_table_name = None | ||
| 1770 | if classify == 97: | ||
| 1771 | financial_statement_table_name = "balance_sheet" | ||
| 1772 | elif classify == 98: | ||
| 1773 | financial_statement_table_name = "income_statement" | ||
| 1774 | elif classify == 99: | ||
| 1775 | financial_statement_table_name = "cash_flow_statement" | ||
| 1776 | if financial_statement_table_name is not None: | ||
| 1777 | if "id_code" in ocr_data: | ||
| 1778 | id_code = ocr_data.get("id_code", "") | ||
| 1779 | financial_statement_dict["code"][financial_statement_table_name] = id_code | ||
| 1780 | if "stamp" in ocr_data: | ||
| 1781 | stamp = ocr_data.get("stamp", "") | ||
| 1782 | financial_statement_dict["stamp"][financial_statement_table_name] = stamp | ||
| 1783 | elif classify in consts.FINANCIAL_EXPLANATION_CLASSIFY_LIST: | ||
| 1784 | # 添加到 res_list 中 | ||
| 1785 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_FINANCIAL_STATEMENT)) | ||
| 1786 | # 只要分类为财报情况说明的,就在 financial_explanation_dict 中添加对应的 title 和 stamp 两个dict | ||
| 1787 | if "title" not in financial_explanation_dict: | ||
| 1788 | financial_explanation_dict["title"] = {} | ||
| 1789 | if "stamp" not in financial_explanation_dict: | ||
| 1790 | financial_explanation_dict["stamp"] = {} | ||
| 1791 | |||
| 1792 | if "title" in ocr_data: | ||
| 1793 | title = ocr_data.get("title", "") | ||
| 1794 | financial_explanation_dict["title"] = title | ||
| 1795 | if "stamp" in ocr_data: | ||
| 1796 | stamp = ocr_data.get("stamp", "") | ||
| 1797 | financial_explanation_dict["stamp"] = stamp | ||
| 1756 | elif classify in consts.OTHER_CLASSIFY_SET: # 其他类 | 1798 | elif classify in consts.OTHER_CLASSIFY_SET: # 其他类 |
| 1757 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_OTHER)) | 1799 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_OTHER)) |
| 1758 | continue | 1800 | continue |
| ... | @@ -1939,7 +1981,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1939,7 +1981,7 @@ class Command(BaseCommand, LoggerMixin): |
| 1939 | # src_excel_path = os.path.join(doc_data_path, 'src.xlsx') | 1981 | # src_excel_path = os.path.join(doc_data_path, 'src.xlsx') |
| 1940 | # wb.save(src_excel_path) | 1982 | # wb.save(src_excel_path) |
| 1941 | #need_follow表示在上传edms时文件名是否要添加"关注"两字 | 1983 | #need_follow表示在上传edms时文件名是否要添加"关注"两字 |
| 1942 | count_list, need_follow = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme, contract_result, doc.metadata) | 1984 | count_list, need_follow = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme, contract_result, doc.metadata, financial_statement_dict, financial_explanation_dict) |
| 1943 | wb.save(excel_path) | 1985 | wb.save(excel_path) |
| 1944 | 1986 | ||
| 1945 | except Exception as e: | 1987 | except Exception as e: |
| ... | @@ -2040,16 +2082,18 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -2040,16 +2082,18 @@ class Command(BaseCommand, LoggerMixin): |
| 2040 | license_summary[consts.BS_CLASSIFY] = bs_rebuild | 2082 | license_summary[consts.BS_CLASSIFY] = bs_rebuild |
| 2041 | 2083 | ||
| 2042 | # 比对 | 2084 | # 比对 |
| 2043 | if len(license_summary) > 0 and doc.document_scheme != consts.DOC_SCHEME_LIST[2]: | 2085 | if len(license_summary) > 0: |
| 2086 | if doc.document_scheme != consts.DOC_SCHEME_LIST[2]: | ||
| 2087 | # if len(license_summary) > 0 and doc.document_scheme != consts.DOC_SCHEME_LIST[2]: | ||
| 2044 | try: | 2088 | try: |
| 2045 | is_ca = True if doc.document_scheme == consts.DOC_SCHEME_LIST[0] else False | 2089 | is_ca = True if doc.document_scheme == consts.DOC_SCHEME_LIST[0] else False |
| 2046 | # 更新OCR累计识别结果表 | 2090 | # 更新OCR累计识别结果表 |
| 2047 | if business_type == consts.HIL_PREFIX: | 2091 | if business_type == consts.HIL_PREFIX: |
| 2048 | result_class = HILOCRResult if is_ca else HILSEOCRResult | 2092 | result_class = HILOCRResult if is_ca else HILSEOCRResult |
| 2049 | res_obj = atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str) | 2093 | res_obj = atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict) |
| 2050 | else: | 2094 | else: |
| 2051 | result_class = AFCOCRResult if is_ca else AFCSEOCRResult | 2095 | result_class = AFCOCRResult if is_ca else AFCSEOCRResult |
| 2052 | res_obj = atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str) | 2096 | res_obj = atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict) |
| 2053 | 2097 | ||
| 2054 | except Exception as e: | 2098 | except Exception as e: |
| 2055 | self.online_log.error( | 2099 | self.online_log.error( |
| ... | @@ -2080,6 +2124,22 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -2080,6 +2124,22 @@ class Command(BaseCommand, LoggerMixin): |
| 2080 | else: | 2124 | else: |
| 2081 | self.online_log.info('{0} [comparison info send success] [task={1}] ' | 2125 | self.online_log.info('{0} [comparison info send success] [task={1}] ' |
| 2082 | '[res_id={2}]'.format(self.log_base, task_str, res_obj.id)) | 2126 | '[res_id={2}]'.format(self.log_base, task_str, res_obj.id)) |
| 2127 | else: | ||
| 2128 | # license_summary 为空 | ||
| 2129 | self.online_log.info('{0} [task={1}] [no license_summary]'.format(self.log_base, task_str)) | ||
| 2130 | try: | ||
| 2131 | is_ca = True if doc.document_scheme == consts.DOC_SCHEME_LIST[0] else False | ||
| 2132 | # 更新OCR累计识别结果表 | ||
| 2133 | if business_type == consts.HIL_PREFIX: | ||
| 2134 | result_class = HILOCRResult if is_ca else HILSEOCRResult | ||
| 2135 | res_obj = atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict) | ||
| 2136 | else: | ||
| 2137 | result_class = AFCOCRResult if is_ca else AFCSEOCRResult | ||
| 2138 | res_obj = atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict) | ||
| 2139 | except Exception as e: | ||
| 2140 | self.online_log.error( | ||
| 2141 | '{0} [process error (ocr result save)] [task={1}] [error={2}]'.format( | ||
| 2142 | self.log_base, task_str, traceback.format_exc())) | ||
| 2083 | 2143 | ||
| 2084 | # DDA处理 | 2144 | # DDA处理 |
| 2085 | if do_dda: | 2145 | if do_dda: |
| ... | @@ -2426,7 +2486,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -2426,7 +2486,7 @@ class Command(BaseCommand, LoggerMixin): |
| 2426 | self.online_log.info('{0} [stop safely]'.format(self.log_base)) | 2486 | self.online_log.info('{0} [stop safely]'.format(self.log_base)) |
| 2427 | 2487 | ||
| 2428 | @transaction.atomic | 2488 | @transaction.atomic |
| 2429 | def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task_str): | 2489 | def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task_str, financial_statement_dict, financial_explanation_dict): |
| 2430 | with transaction.atomic('afc'): | 2490 | with transaction.atomic('afc'): |
| 2431 | res_obj = result_class.objects.using('afc').select_for_update().filter(application_id=doc.application_id).first() | 2491 | res_obj = result_class.objects.using('afc').select_for_update().filter(application_id=doc.application_id).first() |
| 2432 | self.online_log.info('{0} [sql lock AFC application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id)) | 2492 | self.online_log.info('{0} [sql lock AFC application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id)) |
| ... | @@ -2434,6 +2494,14 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task | ... | @@ -2434,6 +2494,14 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task |
| 2434 | res_obj = result_class() | 2494 | res_obj = result_class() |
| 2435 | res_obj.application_id = doc.application_id | 2495 | res_obj.application_id = doc.application_id |
| 2436 | self.online_log.info('{0} [res_obj is None application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id)) | 2496 | self.online_log.info('{0} [res_obj is None application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id)) |
| 2497 | # 财务报表存入数据库 | ||
| 2498 | if res_obj is not None: | ||
| 2499 | if financial_statement_dict: | ||
| 2500 | res_obj.fs_ocr = json.dumps([financial_statement_dict]) | ||
| 2501 | # 财报情况说明存入数据库 | ||
| 2502 | if res_obj is not None: | ||
| 2503 | if financial_explanation_dict: | ||
| 2504 | res_obj.fss_ocr = json.dumps([financial_explanation_dict]) | ||
| 2437 | for classify, field in consts.RESULT_MAPPING.items(): | 2505 | for classify, field in consts.RESULT_MAPPING.items(): |
| 2438 | if not hasattr(res_obj, field): | 2506 | if not hasattr(res_obj, field): |
| 2439 | continue | 2507 | continue |
| ... | @@ -2459,7 +2527,7 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task | ... | @@ -2459,7 +2527,7 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task |
| 2459 | return res_obj | 2527 | return res_obj |
| 2460 | 2528 | ||
| 2461 | @transaction.atomic | 2529 | @transaction.atomic |
| 2462 | def atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str): | 2530 | def atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict): |
| 2463 | with transaction.atomic('default'): | 2531 | with transaction.atomic('default'): |
| 2464 | res_obj = result_class.objects.using('default').select_for_update().filter(application_id=doc.application_id).first() | 2532 | res_obj = result_class.objects.using('default').select_for_update().filter(application_id=doc.application_id).first() |
| 2465 | self.online_log.info('{0} [sql lock HIL application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id)) | 2533 | self.online_log.info('{0} [sql lock HIL application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id)) |
| ... | @@ -2467,6 +2535,14 @@ def atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, tas | ... | @@ -2467,6 +2535,14 @@ def atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, tas |
| 2467 | res_obj = result_class() | 2535 | res_obj = result_class() |
| 2468 | res_obj.application_id = doc.application_id | 2536 | res_obj.application_id = doc.application_id |
| 2469 | self.online_log.info('{0} [res_obj is None application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id)) | 2537 | self.online_log.info('{0} [res_obj is None application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id)) |
| 2538 | # 财务报表三个表存入数据库 | ||
| 2539 | if res_obj is not None: | ||
| 2540 | if financial_statement_dict: | ||
| 2541 | res_obj.fs_ocr = json.dumps([financial_statement_dict]) | ||
| 2542 | # 财报情况说明存入数据库 | ||
| 2543 | if res_obj is not None: | ||
| 2544 | if financial_explanation_dict: | ||
| 2545 | res_obj.fss_ocr = json.dumps([financial_explanation_dict]) | ||
| 2470 | for classify, field in consts.RESULT_MAPPING.items(): | 2546 | for classify, field in consts.RESULT_MAPPING.items(): |
| 2471 | if not hasattr(res_obj, field): | 2547 | if not hasattr(res_obj, field): |
| 2472 | continue | 2548 | continue | ... | ... |
| ... | @@ -827,6 +827,53 @@ class BSWorkbook(Workbook): | ... | @@ -827,6 +827,53 @@ class BSWorkbook(Workbook): |
| 827 | ws.append(row) | 827 | ws.append(row) |
| 828 | ws.append((None, )) | 828 | ws.append((None, )) |
| 829 | 829 | ||
| 830 | |||
| 831 | def financial_rebuild(self, financial_statement_dict): | ||
| 832 | # 如果 financial_statement_dict 为空,则不创建表 | ||
| 833 | if not financial_statement_dict: | ||
| 834 | return | ||
| 835 | # 如果 financial_statement_dict 不为空,则创建表 | ||
| 836 | ws = self.create_sheet(consts.FINANCIAL_SHEET_NAME) | ||
| 837 | for fin_key, fin_value in financial_statement_dict.items(): | ||
| 838 | table_str = "识别码" | ||
| 839 | if fin_key == "code": | ||
| 840 | table_str = "识别码" | ||
| 841 | elif fin_key == "stamp": | ||
| 842 | table_str = "印章" | ||
| 843 | |||
| 844 | for table_key, table_value in fin_value.items(): | ||
| 845 | if table_key == "balance_sheet": | ||
| 846 | row = ["资产负债表" + table_str, str(table_value)] | ||
| 847 | ws.append(row) | ||
| 848 | elif table_key == "income_statement": | ||
| 849 | row = ["利润表" + table_str, str(table_value)] | ||
| 850 | ws.append(row) | ||
| 851 | elif table_key == "cash_flow_statement": | ||
| 852 | row = ["现金流量表" + table_str, str(table_value)] | ||
| 853 | ws.append(row) | ||
| 854 | |||
| 855 | |||
| 856 | def financial_explanation_rebuild(self, financial_explanation_dict): | ||
| 857 | """ | ||
| 858 | Desc: | ||
| 859 | 重构财报情况说明sheet | ||
| 860 | """ | ||
| 861 | # 如果 financial_explanation_dict 为空,则不创建sheet | ||
| 862 | if not financial_explanation_dict: | ||
| 863 | return | ||
| 864 | # 如果 financial_explanation_dict 不为空, 则创建sheet | ||
| 865 | ws = self.create_sheet(consts.FINANCIAL_EXPLANATION_SHEET_NAME) | ||
| 866 | for fin_key, fin_value in financial_explanation_dict.items(): | ||
| 867 | table_str = "公司名称" | ||
| 868 | if fin_key == "title": | ||
| 869 | table_str = "公司名称" | ||
| 870 | elif fin_key == "stamp": | ||
| 871 | table_str = "印章" | ||
| 872 | |||
| 873 | row = ["财报情况说明" + table_str, str(fin_value)] | ||
| 874 | ws.append(row) | ||
| 875 | |||
| 876 | |||
| 830 | @staticmethod | 877 | @staticmethod |
| 831 | def remove_yuan(amount_key_set, key, src_str): | 878 | def remove_yuan(amount_key_set, key, src_str): |
| 832 | if key in amount_key_set and isinstance(src_str, str): | 879 | if key in amount_key_set and isinstance(src_str, str): |
| ... | @@ -926,7 +973,7 @@ class BSWorkbook(Workbook): | ... | @@ -926,7 +973,7 @@ class BSWorkbook(Workbook): |
| 926 | if len(self.sheetnames) > 1: | 973 | if len(self.sheetnames) > 1: |
| 927 | self.remove(self.get_sheet_by_name('Sheet')) | 974 | self.remove(self.get_sheet_by_name('Sheet')) |
| 928 | 975 | ||
| 929 | def rebuild(self, bs_summary, license_summary, res_list, document_scheme, contract_result, metadata): | 976 | def rebuild(self, bs_summary, license_summary, res_list, document_scheme, contract_result, metadata, financial_statement_dict, financial_explanation_dict): |
| 930 | res_count_tuple = self.res_sheet(res_list) | 977 | res_count_tuple = self.res_sheet(res_list) |
| 931 | 978 | ||
| 932 | count_list = [(consts.MODEL_FIELD_BS, len(bs_summary))] | 979 | count_list = [(consts.MODEL_FIELD_BS, len(bs_summary))] |
| ... | @@ -934,10 +981,14 @@ class BSWorkbook(Workbook): | ... | @@ -934,10 +981,14 @@ class BSWorkbook(Workbook): |
| 934 | self.license_rebuild(license_summary, document_scheme, count_list) | 981 | self.license_rebuild(license_summary, document_scheme, count_list) |
| 935 | self.contract_rebuild(contract_result) | 982 | self.contract_rebuild(contract_result) |
| 936 | self.bs_rebuild(bs_summary, res_count_tuple, metadata) | 983 | self.bs_rebuild(bs_summary, res_count_tuple, metadata) |
| 984 | self.financial_rebuild(financial_statement_dict) | ||
| 985 | self.financial_explanation_rebuild(financial_explanation_dict) | ||
| 937 | else: | 986 | else: |
| 938 | self.bs_rebuild(bs_summary, res_count_tuple, metadata) | 987 | self.bs_rebuild(bs_summary, res_count_tuple, metadata) |
| 939 | self.license_rebuild(license_summary, document_scheme, count_list) | 988 | self.license_rebuild(license_summary, document_scheme, count_list) |
| 940 | self.contract_rebuild(contract_result, True) | 989 | self.contract_rebuild(contract_result, True) |
| 990 | self.financial_rebuild(financial_statement_dict) | ||
| 991 | self.financial_explanation_rebuild(financial_explanation_dict) | ||
| 941 | self.move_res_sheet() | 992 | self.move_res_sheet() |
| 942 | self.remove_base_sheet() | 993 | self.remove_base_sheet() |
| 943 | return count_list, self.need_follow | 994 | return count_list, self.need_follow | ... | ... |
-
Please register or sign in to post a comment