7f9efd41 by 冯轩

Merge branch 'feature/CHINARPA-4659' into feature/uat-tmp

2 parents 040c5381 d2a1f3f2
......@@ -98,6 +98,7 @@ RES_SHEET_HEADER = ('页码', '图片序号', '检测图片序号', '结果')
RES_SUCCESS = '识别成功'
RES_SUCCESS_OTHER = '识别成功(其他类)'
RES_SUCCESS_EMPTY = '识别成功(空数据)'
RES_SUCCESS_FINANCIAL_STATEMENT = '识别成功(财务报表类)'
RES_FAILED = '识别失败'
RES_FAILED_1 = '识别失败(阶段1)'
RES_FAILED_2 = '识别失败(阶段2)'
......@@ -2562,4 +2563,9 @@ FSM_ACTIVITED_STATUS = {
# 财务报表分类标签
FINANCIAL_STATEMENT_CLASSIFY_LIST = [97, 98, 99]
# 财务报表sheet名称
FINANCIAL_SHEET_NAME = "财务报表"
\ No newline at end of file
FINANCIAL_SHEET_NAME = "财务报表"
# 财报情况说明分类标签
FINANCIAL_EXPLANATION_CLASSIFY_LIST = [100]
# 财报情况说明sheet名称
FINANCIAL_EXPLANATION_SHEET_NAME = "财报情况说明"
\ No newline at end of file
......
......@@ -1725,7 +1725,9 @@ class Command(BaseCommand, LoggerMixin):
contract_result = {}
contract_result_compare = {}
# 添加财报三个报表的处理
financial_statement_dict = {"code": {}, "stamp": {}}
financial_statement_dict = {}
# 添加财报情况说明的处理
financial_explanation_dict = {}
res_list = []
interest_keyword = Keywords.objects.filter(
type=KeywordsType.INTEREST.value, on_off=True).values_list('keyword', flat=True)
......@@ -1756,6 +1758,14 @@ class Command(BaseCommand, LoggerMixin):
self.log_base, img_path))
continue
elif classify in consts.FINANCIAL_STATEMENT_CLASSIFY_LIST:
# 添加到 res_list 中
res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_FINANCIAL_STATEMENT))
# 只要分类为财报三个报表的,就在 financial_statement_dict 中添加对应的 code 和 stamp 两个dict
if "code" not in financial_statement_dict:
financial_statement_dict["code"] = {}
if "stamp" not in financial_statement_dict:
financial_statement_dict["stamp"] = {}
financial_statement_table_name = None
if classify == 97:
financial_statement_table_name = "balance_sheet"
......@@ -1770,6 +1780,21 @@ class Command(BaseCommand, LoggerMixin):
if "stamp" in ocr_data:
stamp = ocr_data.get("stamp", "")
financial_statement_dict["stamp"][financial_statement_table_name] = stamp
elif classify in consts.FINANCIAL_EXPLANATION_CLASSIFY_LIST:
# 添加到 res_list 中
res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_FINANCIAL_STATEMENT))
# 只要分类为财报情况说明的,就在 financial_explanation_dict 中添加对应的 title 和 stamp 两个dict
if "title" not in financial_explanation_dict:
financial_explanation_dict["title"] = {}
if "stamp" not in financial_explanation_dict:
financial_explanation_dict["stamp"] = {}
if "title" in ocr_data:
title = ocr_data.get("title", "")
financial_explanation_dict["title"] = title
if "stamp" in ocr_data:
stamp = ocr_data.get("stamp", "")
financial_explanation_dict["stamp"] = stamp
elif classify in consts.OTHER_CLASSIFY_SET: # 其他类
res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_OTHER))
continue
......@@ -1956,7 +1981,7 @@ class Command(BaseCommand, LoggerMixin):
# src_excel_path = os.path.join(doc_data_path, 'src.xlsx')
# wb.save(src_excel_path)
#need_follow表示在上传edms时文件名是否要添加"关注"两字
count_list, need_follow = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme, contract_result, doc.metadata, financial_statement_dict)
count_list, need_follow = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme, contract_result, doc.metadata, financial_statement_dict, financial_explanation_dict)
wb.save(excel_path)
except Exception as e:
......@@ -2057,46 +2082,64 @@ class Command(BaseCommand, LoggerMixin):
license_summary[consts.BS_CLASSIFY] = bs_rebuild
# 比对
if len(license_summary) > 0 and doc.document_scheme != consts.DOC_SCHEME_LIST[2]:
if len(license_summary) > 0:
if doc.document_scheme != consts.DOC_SCHEME_LIST[2]:
# if len(license_summary) > 0 and doc.document_scheme != consts.DOC_SCHEME_LIST[2]:
try:
is_ca = True if doc.document_scheme == consts.DOC_SCHEME_LIST[0] else False
# 更新OCR累计识别结果表
if business_type == consts.HIL_PREFIX:
result_class = HILOCRResult if is_ca else HILSEOCRResult
res_obj = atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict)
else:
result_class = AFCOCRResult if is_ca else AFCSEOCRResult
res_obj = atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict)
except Exception as e:
self.online_log.error(
'{0} [process error (ocr result save)] [task={1}] [error={2}]'.format(
self.log_base, task_str, traceback.format_exc()))
else:
self.online_log.info('{0} [ocr result save success] [task={1}] [res_id={2}]'.format(
self.log_base, task_str, res_obj.id))
# 触发比对
try:
# 是否fsm
cms_status_class = HILCmsStatusInfo if business_type in consts.HIL_SET else AFCCmsStatusInfo
cms_status_info = cms_status_class.objects.filter(application_id=doc.application_id).first()
is_fsm = cms_status_info is not None and cms_status_info.is_fsm == 1
self.online_log.info('{0} [isfsm] [task={1}] [true or false={2}]'.format(
self.log_base, task_str, is_fsm))
if is_fsm:
fsm_compare.apply_async((doc.application_id, business_type, None, res_obj.id, is_ca, True),
queue='queue_compare')
else:
# pass
compare.apply_async((doc.application_id, business_type, None, res_obj.id,
is_ca, True), queue='queue_compare')
except Exception as e:
self.online_log.error(
'{0} [process error (comparison info send)] [task={1}] [error={2}]'.format(
self.log_base, task_str, traceback.format_exc()))
else:
self.online_log.info('{0} [comparison info send success] [task={1}] '
'[res_id={2}]'.format(self.log_base, task_str, res_obj.id))
else:
# license_summary 为空
self.online_log.info('{0} [task={1}] [no license_summary]'.format(self.log_base, task_str))
try:
is_ca = True if doc.document_scheme == consts.DOC_SCHEME_LIST[0] else False
# 更新OCR累计识别结果表
if business_type == consts.HIL_PREFIX:
result_class = HILOCRResult if is_ca else HILSEOCRResult
res_obj = atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict)
res_obj = atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict)
else:
result_class = AFCOCRResult if is_ca else AFCSEOCRResult
res_obj = atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict)
res_obj = atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict)
except Exception as e:
self.online_log.error(
'{0} [process error (ocr result save)] [task={1}] [error={2}]'.format(
self.log_base, task_str, traceback.format_exc()))
else:
self.online_log.info('{0} [ocr result save success] [task={1}] [res_id={2}]'.format(
self.log_base, task_str, res_obj.id))
# 触发比对
try:
# 是否fsm
cms_status_class = HILCmsStatusInfo if business_type in consts.HIL_SET else AFCCmsStatusInfo
cms_status_info = cms_status_class.objects.filter(application_id=doc.application_id).first()
is_fsm = cms_status_info is not None and cms_status_info.is_fsm == 1
self.online_log.info('{0} [isfsm] [task={1}] [true or false={2}]'.format(
self.log_base, task_str, is_fsm))
if is_fsm:
fsm_compare.apply_async((doc.application_id, business_type, None, res_obj.id, is_ca, True),
queue='queue_compare')
else:
# pass
compare.apply_async((doc.application_id, business_type, None, res_obj.id,
is_ca, True), queue='queue_compare')
except Exception as e:
self.online_log.error(
'{0} [process error (comparison info send)] [task={1}] [error={2}]'.format(
self.log_base, task_str, traceback.format_exc()))
else:
self.online_log.info('{0} [comparison info send success] [task={1}] '
'[res_id={2}]'.format(self.log_base, task_str, res_obj.id))
# DDA处理
if do_dda:
......@@ -2443,7 +2486,7 @@ class Command(BaseCommand, LoggerMixin):
self.online_log.info('{0} [stop safely]'.format(self.log_base))
@transaction.atomic
def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task_str, financial_statement_dict):
def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task_str, financial_statement_dict, financial_explanation_dict):
with transaction.atomic('afc'):
res_obj = result_class.objects.using('afc').select_for_update().filter(application_id=doc.application_id).first()
self.online_log.info('{0} [sql lock AFC application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id))
......@@ -2451,7 +2494,14 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task
res_obj = result_class()
res_obj.application_id = doc.application_id
self.online_log.info('{0} [res_obj is None application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id))
res_obj.fs_ocr = json.dumps([financial_statement_dict])
# 财务报表存入数据库
if res_obj is not None:
if financial_statement_dict:
res_obj.fs_ocr = json.dumps([financial_statement_dict])
# 财报情况说明存入数据库
if res_obj is not None:
if financial_explanation_dict:
res_obj.fss_ocr = json.dumps([financial_explanation_dict])
for classify, field in consts.RESULT_MAPPING.items():
if not hasattr(res_obj, field):
continue
......@@ -2477,7 +2527,7 @@ def atomicSaveDBAFC(self,result_class,doc,license_summary,ic_merge,rp_merge,task
return res_obj
@transaction.atomic
def atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict):
def atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, task_str, financial_statement_dict, financial_explanation_dict):
with transaction.atomic('default'):
res_obj = result_class.objects.using('default').select_for_update().filter(application_id=doc.application_id).first()
self.online_log.info('{0} [sql lock HIL application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id))
......@@ -2485,7 +2535,14 @@ def atomicSaveDBHIL(self,result_class,doc,license_summary,ic_merge,rp_merge, tas
res_obj = result_class()
res_obj.application_id = doc.application_id
self.online_log.info('{0} [res_obj is None application_id={1} doc_id={2}]'.format(self.log_base, doc.application_id,doc.id))
res_obj.fs_ocr = json.dumps([financial_statement_dict])
# 财务报表三个表存入数据库
if res_obj is not None:
if financial_statement_dict:
res_obj.fs_ocr = json.dumps([financial_statement_dict])
# 财报情况说明存入数据库
if res_obj is not None:
if financial_explanation_dict:
res_obj.fss_ocr = json.dumps([financial_explanation_dict])
for classify, field in consts.RESULT_MAPPING.items():
if not hasattr(res_obj, field):
continue
......
......@@ -829,6 +829,10 @@ class BSWorkbook(Workbook):
def financial_rebuild(self, financial_statement_dict):
# 如果 financial_statement_dict 为空,则不创建表
if not financial_statement_dict:
return
# 如果 financial_statement_dict 不为空,则创建表
ws = self.create_sheet(consts.FINANCIAL_SHEET_NAME)
for fin_key, fin_value in financial_statement_dict.items():
table_str = "识别码"
......@@ -849,6 +853,27 @@ class BSWorkbook(Workbook):
ws.append(row)
def financial_explanation_rebuild(self, financial_explanation_dict):
"""
Desc:
重构财报情况说明sheet
"""
# 如果 financial_explanation_dict 为空,则不创建sheet
if not financial_explanation_dict:
return
# 如果 financial_explanation_dict 不为空, 则创建sheet
ws = self.create_sheet(consts.FINANCIAL_EXPLANATION_SHEET_NAME)
for fin_key, fin_value in financial_explanation_dict.items():
table_str = "公司名称"
if fin_key == "title":
table_str = "公司名称"
elif fin_key == "stamp":
table_str = "印章"
row = ["财报情况说明" + table_str, str(fin_value)]
ws.append(row)
@staticmethod
def remove_yuan(amount_key_set, key, src_str):
if key in amount_key_set and isinstance(src_str, str):
......@@ -948,7 +973,7 @@ class BSWorkbook(Workbook):
if len(self.sheetnames) > 1:
self.remove(self.get_sheet_by_name('Sheet'))
def rebuild(self, bs_summary, license_summary, res_list, document_scheme, contract_result, metadata, financial_statement_dict):
def rebuild(self, bs_summary, license_summary, res_list, document_scheme, contract_result, metadata, financial_statement_dict, financial_explanation_dict):
res_count_tuple = self.res_sheet(res_list)
count_list = [(consts.MODEL_FIELD_BS, len(bs_summary))]
......@@ -957,11 +982,13 @@ class BSWorkbook(Workbook):
self.contract_rebuild(contract_result)
self.bs_rebuild(bs_summary, res_count_tuple, metadata)
self.financial_rebuild(financial_statement_dict)
self.financial_explanation_rebuild(financial_explanation_dict)
else:
self.bs_rebuild(bs_summary, res_count_tuple, metadata)
self.license_rebuild(license_summary, document_scheme, count_list)
self.contract_rebuild(contract_result, True)
self.financial_rebuild(financial_statement_dict)
self.financial_explanation_rebuild(financial_explanation_dict)
self.move_res_sheet()
self.remove_base_sheet()
return count_list, self.need_follow
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!