diff --git a/src/apps/doc/consts.py b/src/apps/doc/consts.py index ac00fb8..4b5b3ba 100644 --- a/src/apps/doc/consts.py +++ b/src/apps/doc/consts.py @@ -2434,14 +2434,14 @@ ECONTRACT_KEYWORDS_MAP = { FSM_ECONTRACT_KEYWORDS_MAP = { AFC_PREFIX: [ - ('延长保修条款与条件', FSM_CONTRACT_WEP_CLASSIFY), + ('延长保修服务合约', FSM_CONTRACT_WEP_CLASSIFY), ('长悦保养套餐服务合约', FSM_CONTRACT_MSI_CLASSIFY), ('汽车销售合同补充合同', FSM_CONTRACT_SC2_CLASSIFY), ('汽车销售合同', FSM_CONTRACT_SC_CLASSIFY), ], HIL_PREFIX: [ - ('延长保修条款与条件', FSM_CONTRACT_WEP_CLASSIFY), - ('长悦保养套餐服务合同', FSM_CONTRACT_MSI_CLASSIFY), + ('延长保修服务合约', FSM_CONTRACT_WEP_CLASSIFY), + ('长悦保养套餐服务合约', FSM_CONTRACT_MSI_CLASSIFY), ('汽车销售合同补充合同', FSM_CONTRACT_SC2_CLASSIFY), ('汽车销售合同', FSM_CONTRACT_SC_CLASSIFY), ] diff --git a/src/common/fsm_econtract/fsm_contract_ocr.py b/src/common/fsm_econtract/fsm_contract_ocr.py index fa07b7a..69aa470 100644 --- a/src/common/fsm_econtract/fsm_contract_ocr.py +++ b/src/common/fsm_econtract/fsm_contract_ocr.py @@ -6,7 +6,7 @@ retriever_list = [Retriever(WEP_FIELD), Retriever(MSI_FIELD), Retriever(SC_FIELD def predict(pdf_info, file_type=0): retriever = retriever_list[file_type] - pdf_text_list, pdf_img_list = pdf_info_rebuild(pdf_info) + pdf_text_list, pdf_img_list = pdf_info_rebuild(pdf_info, file_type=file_type) return retriever.get_target_fields(pdf_text_list, pdf_img_list) diff --git a/src/common/fsm_econtract/tools.py b/src/common/fsm_econtract/tools.py index e50bcc3..cf34980 100644 --- a/src/common/fsm_econtract/tools.py +++ b/src/common/fsm_econtract/tools.py @@ -1,4 +1,4 @@ -def pdf_info_rebuild(pdf_info, fix_bbox=True): +def pdf_info_rebuild(pdf_info, fix_bbox=True, file_type=0): pdf_text_info = dict() pdf_img_info = dict() for pno_str, page_info in pdf_info.items(): @@ -11,7 +11,8 @@ def pdf_info_rebuild(pdf_info, fix_bbox=True): for span in line['spans']: bbox, text = span['bbox'], span['text'].strip() if len(text) != 0 and text not in text_set: - text_set.add(text) + if file_type != 3: # 汽车销售合同补充协议,相同的总价会被过滤,所以取消 + text_set.add(text) # bbox的高,不准 if fix_bbox and bbox[-1] - bbox[1] < span['size']: bbox[-1] = bbox[-1] + span['size']