Merge branch 'feature/4058' into 'master'

Feature/4058 See merge request !24

Merge branch 'feature/4058' into 'master'
Feature/4058 See merge request !24
周伟奇
Showing 3 changed files with 7 additions and 6 deletions
src/apps/doc/consts.py
src/common/fsm_econtract/fsm_contract_ocr.py
src/common/fsm_econtract/tools.py
--- a/src/apps/doc/consts.py
View file @b8ef3e1
+++ b/src/apps/doc/consts.py
View file @b8ef3e1
@@ -2434,14 +2434,14 @@ ECONTRACT_KEYWORDS_MAP = {
 FSM_ECONTRACT_KEYWORDS_MAP = {
    AFC_PREFIX: [
-        ('延长保修条款与条件', FSM_CONTRACT_WEP_CLASSIFY),
+        ('延长保修服务合约', FSM_CONTRACT_WEP_CLASSIFY),
        ('长悦保养套餐服务合约', FSM_CONTRACT_MSI_CLASSIFY),
        ('汽车销售合同补充合同', FSM_CONTRACT_SC2_CLASSIFY),
        ('汽车销售合同', FSM_CONTRACT_SC_CLASSIFY),
    ],
    HIL_PREFIX: [
-        ('延长保修条款与条件', FSM_CONTRACT_WEP_CLASSIFY),
+        ('延长保修服务合约', FSM_CONTRACT_WEP_CLASSIFY),
-        ('长悦保养套餐服务合同', FSM_CONTRACT_MSI_CLASSIFY),
+        ('长悦保养套餐服务合约', FSM_CONTRACT_MSI_CLASSIFY),
        ('汽车销售合同补充合同', FSM_CONTRACT_SC2_CLASSIFY),
        ('汽车销售合同', FSM_CONTRACT_SC_CLASSIFY),
    ]
--- a/src/common/fsm_econtract/fsm_contract_ocr.py
View file @b8ef3e1
+++ b/src/common/fsm_econtract/fsm_contract_ocr.py
View file @b8ef3e1
@@ -6,7 +6,7 @@ retriever_list = [Retriever(WEP_FIELD), Retriever(MSI_FIELD), Retriever(SC_FIELD
 def predict(pdf_info, file_type=0):
    retriever =  retriever_list[file_type]
-    pdf_text_list, pdf_img_list = pdf_info_rebuild(pdf_info) 
+    pdf_text_list, pdf_img_list = pdf_info_rebuild(pdf_info, file_type=file_type) 
    return retriever.get_target_fields(pdf_text_list, pdf_img_list)
--- a/src/common/fsm_econtract/tools.py
View file @b8ef3e1
+++ b/src/common/fsm_econtract/tools.py
View file @b8ef3e1
-def pdf_info_rebuild(pdf_info, fix_bbox=True):
+def pdf_info_rebuild(pdf_info, fix_bbox=True, file_type=0):
    pdf_text_info = dict()
    pdf_img_info = dict()
    for pno_str, page_info in pdf_info.items():
@@ -11,7 +11,8 @@ def pdf_info_rebuild(pdf_info, fix_bbox=True):
                    for span in line['spans']:
                        bbox, text = span['bbox'], span['text'].strip()
                        if len(text) != 0 and text not in text_set:
-                            text_set.add(text)
+                            if file_type != 3:  # 汽车销售合同补充协议，相同的总价会被过滤，所以取消
+                                text_set.add(text)
                            # bbox的高，不准
                            if fix_bbox and bbox[-1] - bbox[1] < span['size']:
                                bbox[-1] = bbox[-1] + span['size']