Merge remote-tracking branch 'origin/feature_add_insurance_sec_page'
Showing
2 changed files
with
39 additions
and
1 deletions
| ... | @@ -2055,6 +2055,43 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -2055,6 +2055,43 @@ class Command(BaseCommand, LoggerMixin): |
| 2055 | # src_excel_path = os.path.join(doc_data_path, 'src.xlsx') | 2055 | # src_excel_path = os.path.join(doc_data_path, 'src.xlsx') |
| 2056 | # wb.save(src_excel_path) | 2056 | # wb.save(src_excel_path) |
| 2057 | #need_follow表示在上传edms时文件名是否要添加"关注"两字 | 2057 | #need_follow表示在上传edms时文件名是否要添加"关注"两字 |
| 2058 | |||
| 2059 | # 处理 保单 的后续页的章识别不到的问题 | ||
| 2060 | if len(license_summary) > 0: | ||
| 2061 | # 如果是保单 | ||
| 2062 | if consts.INSURANCE_CLASSIFY in license_summary.keys(): | ||
| 2063 | # 获取 license_list | ||
| 2064 | license_list_tmp = license_summary.get(consts.INSURANCE_CLASSIFY, []) | ||
| 2065 | if len(license_list_tmp) > 0: | ||
| 2066 | # 章的有无 | ||
| 2067 | stamp_flag_list = [0] * len(license_list_tmp) | ||
| 2068 | for license_list_tmp_idx, license_dict_tmp in enumerate(license_list_tmp): | ||
| 2069 | if isinstance(license_dict_tmp, dict): | ||
| 2070 | if "保单章" in license_dict_tmp.keys(): | ||
| 2071 | if license_dict_tmp.get("保单章") is not None: | ||
| 2072 | if isinstance(license_dict_tmp.get("保单章"), str) and license_dict_tmp.get("保单章") == "有": | ||
| 2073 | stamp_flag_list[license_list_tmp_idx] = 1 | ||
| 2074 | # 计算 stamp_flag_list 中的 sum | ||
| 2075 | stamp_flag_sum = sum(stamp_flag_list) | ||
| 2076 | |||
| 2077 | # 检查 license_list_tmp 中的每个元素中 [被保险人姓名] 的值是否为空, 若为空, 则不是第一页 | ||
| 2078 | license_first_page_list = [0] * len(license_list_tmp) | ||
| 2079 | for license_list_tmp_idx, license_dict_tmp in enumerate(license_list_tmp): | ||
| 2080 | if isinstance(license_dict_tmp, dict): | ||
| 2081 | if "被保险人姓名" in license_dict_tmp.keys(): | ||
| 2082 | if license_dict_tmp.get("被保险人姓名") is not None: | ||
| 2083 | if isinstance(license_dict_tmp.get("被保险人姓名"), str) and license_dict_tmp.get("被保险人姓名") != "" and "保险" not in license_dict_tmp.get("被保险人姓名"): | ||
| 2084 | license_first_page_list[license_list_tmp_idx] = 1 | ||
| 2085 | break | ||
| 2086 | # 获取 license_first_page_list 中为 1 的索引 | ||
| 2087 | license_first_page_idx = [i for i, x in enumerate(license_first_page_list) if x == 1] | ||
| 2088 | # 将 license_summary 中key为 INSURANCE_CLASSIFY 的 value 替换为 first_page 中的内容, 且只保留 1 页 | ||
| 2089 | if len(license_first_page_idx) > 0: | ||
| 2090 | license_summary[consts.INSURANCE_CLASSIFY] = [license_list_tmp[license_first_page_idx[0]]] | ||
| 2091 | # 如果 sum > 0, 说明有章, 无论是在第几页, 接下来将第一页的内容提取出来,只保留第一页 | ||
| 2092 | if stamp_flag_sum > 0: | ||
| 2093 | license_summary[consts.INSURANCE_CLASSIFY][0]["保单章"] = "有" | ||
| 2094 | |||
| 2058 | count_list, need_follow = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme, contract_result, doc.metadata, financial_statement_dict, financial_explanation_dict, down_payment_dict) | 2095 | count_list, need_follow = wb.rebuild(merged_bs_summary, license_summary, res_list, doc.document_scheme, contract_result, doc.metadata, financial_statement_dict, financial_explanation_dict, down_payment_dict) |
| 2059 | wb.save(excel_path) | 2096 | wb.save(excel_path) |
| 2060 | 2097 | ... | ... |
| ... | @@ -209,7 +209,8 @@ class Retriever: | ... | @@ -209,7 +209,8 @@ class Retriever: |
| 209 | if pno_str == '-1': | 209 | if pno_str == '-1': |
| 210 | is_last_pno = True | 210 | is_last_pno = True |
| 211 | pno_int_list = [int(pno_str) for pno_str in pdf_text_list.keys()] | 211 | pno_int_list = [int(pno_str) for pno_str in pdf_text_list.keys()] |
| 212 | pno_str = str(max(pno_int_list)) | 212 | if len(pno_int_list) > 0: |
| 213 | pno_str = str(max(pno_int_list)) | ||
| 213 | 214 | ||
| 214 | # 搜索关键词 | 215 | # 搜索关键词 |
| 215 | key_text_info = dict() | 216 | key_text_info = dict() | ... | ... |
-
Please register or sign in to post a comment