b8ef3e1f by 周伟奇

Merge branch 'feature/4058' into 'master'

Feature/4058

See merge request !24
2 parents 88e5fc6b 65b19211
...@@ -2434,14 +2434,14 @@ ECONTRACT_KEYWORDS_MAP = { ...@@ -2434,14 +2434,14 @@ ECONTRACT_KEYWORDS_MAP = {
2434 2434
2435 FSM_ECONTRACT_KEYWORDS_MAP = { 2435 FSM_ECONTRACT_KEYWORDS_MAP = {
2436 AFC_PREFIX: [ 2436 AFC_PREFIX: [
2437 ('延长保修条款与条件', FSM_CONTRACT_WEP_CLASSIFY), 2437 ('延长保修服务合约', FSM_CONTRACT_WEP_CLASSIFY),
2438 ('长悦保养套餐服务合约', FSM_CONTRACT_MSI_CLASSIFY), 2438 ('长悦保养套餐服务合约', FSM_CONTRACT_MSI_CLASSIFY),
2439 ('汽车销售合同补充合同', FSM_CONTRACT_SC2_CLASSIFY), 2439 ('汽车销售合同补充合同', FSM_CONTRACT_SC2_CLASSIFY),
2440 ('汽车销售合同', FSM_CONTRACT_SC_CLASSIFY), 2440 ('汽车销售合同', FSM_CONTRACT_SC_CLASSIFY),
2441 ], 2441 ],
2442 HIL_PREFIX: [ 2442 HIL_PREFIX: [
2443 ('延长保修条款与条件', FSM_CONTRACT_WEP_CLASSIFY), 2443 ('延长保修服务合约', FSM_CONTRACT_WEP_CLASSIFY),
2444 ('长悦保养套餐服务合', FSM_CONTRACT_MSI_CLASSIFY), 2444 ('长悦保养套餐服务合', FSM_CONTRACT_MSI_CLASSIFY),
2445 ('汽车销售合同补充合同', FSM_CONTRACT_SC2_CLASSIFY), 2445 ('汽车销售合同补充合同', FSM_CONTRACT_SC2_CLASSIFY),
2446 ('汽车销售合同', FSM_CONTRACT_SC_CLASSIFY), 2446 ('汽车销售合同', FSM_CONTRACT_SC_CLASSIFY),
2447 ] 2447 ]
......
...@@ -6,7 +6,7 @@ retriever_list = [Retriever(WEP_FIELD), Retriever(MSI_FIELD), Retriever(SC_FIELD ...@@ -6,7 +6,7 @@ retriever_list = [Retriever(WEP_FIELD), Retriever(MSI_FIELD), Retriever(SC_FIELD
6 6
7 def predict(pdf_info, file_type=0): 7 def predict(pdf_info, file_type=0):
8 retriever = retriever_list[file_type] 8 retriever = retriever_list[file_type]
9 pdf_text_list, pdf_img_list = pdf_info_rebuild(pdf_info) 9 pdf_text_list, pdf_img_list = pdf_info_rebuild(pdf_info, file_type=file_type)
10 return retriever.get_target_fields(pdf_text_list, pdf_img_list) 10 return retriever.get_target_fields(pdf_text_list, pdf_img_list)
11 11
12 12
......
1 def pdf_info_rebuild(pdf_info, fix_bbox=True): 1 def pdf_info_rebuild(pdf_info, fix_bbox=True, file_type=0):
2 pdf_text_info = dict() 2 pdf_text_info = dict()
3 pdf_img_info = dict() 3 pdf_img_info = dict()
4 for pno_str, page_info in pdf_info.items(): 4 for pno_str, page_info in pdf_info.items():
...@@ -11,7 +11,8 @@ def pdf_info_rebuild(pdf_info, fix_bbox=True): ...@@ -11,7 +11,8 @@ def pdf_info_rebuild(pdf_info, fix_bbox=True):
11 for span in line['spans']: 11 for span in line['spans']:
12 bbox, text = span['bbox'], span['text'].strip() 12 bbox, text = span['bbox'], span['text'].strip()
13 if len(text) != 0 and text not in text_set: 13 if len(text) != 0 and text not in text_set:
14 text_set.add(text) 14 if file_type != 3: # 汽车销售合同补充协议,相同的总价会被过滤,所以取消
15 text_set.add(text)
15 # bbox的高,不准 16 # bbox的高,不准
16 if fix_bbox and bbox[-1] - bbox[1] < span['size']: 17 if fix_bbox and bbox[-1] - bbox[1] < span['size']:
17 bbox[-1] = bbox[-1] + span['size'] 18 bbox[-1] = bbox[-1] + span['size']
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!