c8bacd75 by 冯轩

Merge remote-tracking branch 'origin/feature_add_insurance_sec_page'

2 parents 83c49cee d62f4b3c
...@@ -2055,6 +2055,43 @@ class Command(BaseCommand, LoggerMixin): ...@@ -2055,6 +2055,43 @@ class Command(BaseCommand, LoggerMixin):
2055 # src_excel_path = os.path.join(doc_data_path, 'src.xlsx') 2055 # src_excel_path = os.path.join(doc_data_path, 'src.xlsx')
2056 # wb.save(src_excel_path) 2056 # wb.save(src_excel_path)
2057 #need_follow表示在上传edms时文件名是否要添加"关注"两字 2057 #need_follow表示在上传edms时文件名是否要添加"关注"两字
2058
2059 # 处理 保单 的后续页的章识别不到的问题
2060 if len(license_summary) > 0:
2061 # 如果是保单
2062 if consts.INSURANCE_CLASSIFY in license_summary.keys():
2063 # 获取 license_list
2064 license_list_tmp = license_summary.get(consts.INSURANCE_CLASSIFY, [])
2065 if len(license_list_tmp) > 0:
2066 # 章的有无
2067 stamp_flag_list = [0] * len(license_list_tmp)
2068 for license_list_tmp_idx, license_dict_tmp in enumerate(license_list_tmp):
2069 if isinstance(license_dict_tmp, dict):
2070 if "保单章" in license_dict_tmp.keys():
2071 if license_dict_tmp.get("保单章") is not None:
2072 if isinstance(license_dict_tmp.get("保单章"), str) and license_dict_tmp.get("保单章") == "有":
2073 stamp_flag_list[license_list_tmp_idx] = 1
2074 # 计算 stamp_flag_list 中的 sum
2075 stamp_flag_sum = sum(stamp_flag_list)
2076
2077 # 检查 license_list_tmp 中的每个元素中 [被保险人姓名] 的值是否为空, 若为空, 则不是第一页
2078 license_first_page_list = [0] * len(license_list_tmp)
2079 for license_list_tmp_idx, license_dict_tmp in enumerate(license_list_tmp):
2080 if isinstance(license_dict_tmp, dict):
2081 if "被保险人姓名" in license_dict_tmp.keys():
2082 if license_dict_tmp.get("被保险人姓名") is not None:
2083 if isinstance(license_dict_tmp.get("被保险人姓名"), str) and license_dict_tmp.get("被保险人姓名") != "" and "保险" not in license_dict_tmp.get("被保险人姓名"):
2084 license_first_page_list[license_list_tmp_idx] = 1
2085 break
2086 # 获取 license_first_page_list 中为 1 的索引
2087 license_first_page_idx = [i for i, x in enumerate(license_first_page_list) if x == 1]
2088 # 将 license_summary 中key为 INSURANCE_CLASSIFY 的 value 替换为 first_page 中的内容, 且只保留 1 页
2089 if len(license_first_page_idx) > 0:
2090 license_summary[consts.INSURANCE_CLASSIFY] = [license_list_tmp[license_first_page_idx[0]]]
2091 # 如果 sum > 0, 说明有章, 无论是在第几页, 接下来将第一页的内容提取出来,只保留第一页
2092 if stamp_flag_sum > 0:
2093 license_summary[consts.INSURANCE_CLASSIFY][0]["保单章"] = "有"
2094
2058 count_list, need_follow = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme, contract_result, doc.metadata, financial_statement_dict, financial_explanation_dict, down_payment_dict) 2095 count_list, need_follow = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme, contract_result, doc.metadata, financial_statement_dict, financial_explanation_dict, down_payment_dict)
2059 wb.save(excel_path) 2096 wb.save(excel_path)
2060 2097
......
...@@ -209,7 +209,8 @@ class Retriever: ...@@ -209,7 +209,8 @@ class Retriever:
209 if pno_str == '-1': 209 if pno_str == '-1':
210 is_last_pno = True 210 is_last_pno = True
211 pno_int_list = [int(pno_str) for pno_str in pdf_text_list.keys()] 211 pno_int_list = [int(pno_str) for pno_str in pdf_text_list.keys()]
212 pno_str = str(max(pno_int_list)) 212 if len(pno_int_list) > 0:
213 pno_str = str(max(pno_int_list))
213 214
214 # 搜索关键词 215 # 搜索关键词
215 key_text_info = dict() 216 key_text_info = dict()
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!