e2de024d by 周伟奇

Merge branch 'feature/fsm-contract' into fix/report_ca

2 parents dc481cd4 8d595a3e
...@@ -11,7 +11,7 @@ PAGE_SIZE_DEFAULT = 10 ...@@ -11,7 +11,7 @@ PAGE_SIZE_DEFAULT = 10
11 FIXED_APPLICATION_ID_PREFIX = 'CH-S' 11 FIXED_APPLICATION_ID_PREFIX = 'CH-S'
12 12
13 DOC_SCHEME_LIST = ['ACCEPTANCE', 'SETTLEMENT', 'CONTRACTMANAGEMENT'] 13 DOC_SCHEME_LIST = ['ACCEPTANCE', 'SETTLEMENT', 'CONTRACTMANAGEMENT']
14 DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT'] 14 DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT', 'OVP']
15 COMPARE_DOC_SCHEME_LIST = ['CA', 'SE'] 15 COMPARE_DOC_SCHEME_LIST = ['CA', 'SE']
16 16
17 HIL_PREFIX = 'HIL' 17 HIL_PREFIX = 'HIL'
......
...@@ -1476,7 +1476,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1476,7 +1476,8 @@ class Command(BaseCommand, LoggerMixin):
1476 1476
1477 # AFC合同 1477 # AFC合同
1478 if classify_1_str == str(consts.CONTRACT_CLASSIFY): 1478 if classify_1_str == str(consts.CONTRACT_CLASSIFY):
1479 ocr_result = afc_predict(pdf_handler.pdf_info) 1479 is_fsm = doc.data_source == consts.DATA_SOURCE_LIST[3]
1480 ocr_result = afc_predict(pdf_handler.pdf_info, is_fsm=is_fsm)
1480 page_res = {} 1481 page_res = {}
1481 for page_num, page_info in ocr_result.get('page_info', {}).items(): 1482 for page_num, page_info in ocr_result.get('page_info', {}).items():
1482 if isinstance(page_num, str) and page_num.startswith('page_'): 1483 if isinstance(page_num, str) and page_num.startswith('page_'):
...@@ -1499,8 +1500,9 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1499,8 +1500,9 @@ class Command(BaseCommand, LoggerMixin):
1499 } 1500 }
1500 # HIL合同 1501 # HIL合同
1501 elif classify_1_str in consts.HIL_CONTRACT_TYPE_MAP: 1502 elif classify_1_str in consts.HIL_CONTRACT_TYPE_MAP:
1503 is_fsm = doc.data_source == consts.DATA_SOURCE_LIST[3]
1502 file_type_1 = consts.HIL_CONTRACT_TYPE_MAP.get(classify_1_str) 1504 file_type_1 = consts.HIL_CONTRACT_TYPE_MAP.get(classify_1_str)
1503 ocr_result_1 = hil_predict(pdf_handler.pdf_info, file_type_1) 1505 ocr_result_1 = hil_predict(pdf_handler.pdf_info, file_type_1, is_fsm=is_fsm)
1504 rebuild_res_1 = {} 1506 rebuild_res_1 = {}
1505 page_res = {} 1507 page_res = {}
1506 for field_name, field_info in ocr_result_1.items(): 1508 for field_name, field_info in ocr_result_1.items():
...@@ -1526,8 +1528,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1526,8 +1528,8 @@ class Command(BaseCommand, LoggerMixin):
1526 'page_info': page_info 1528 'page_info': page_info
1527 } 1529 }
1528 # hmh 1530 # hmh
1529 else: 1531 # else:
1530 pass 1532 # pass
1531 1533
1532 1534
1533 contract_res = {} 1535 contract_res = {}
......
...@@ -36,6 +36,7 @@ class RequestTrigger(NamedEnum): ...@@ -36,6 +36,7 @@ class RequestTrigger(NamedEnum):
36 DOCUPLOAD = (3, 'Document Upload') 36 DOCUPLOAD = (3, 'Document Upload')
37 SUBMITING = (4, 'Submiting') 37 SUBMITING = (4, 'Submiting')
38 UPLOADING = (5, 'Uploading') 38 UPLOADING = (5, 'Uploading')
39 OVP = (6, 'OVP')
39 40
40 41
41 class FailureReason(NamedEnum): 42 class FailureReason(NamedEnum):
......
...@@ -602,12 +602,13 @@ class UploadDocView(GenericView, DocHandler): ...@@ -602,12 +602,13 @@ class UploadDocView(GenericView, DocHandler):
602 is_zip = False 602 is_zip = False
603 603
604 classify_1 = 0 604 classify_1 = 0
605 # 电子合同 605 # 电子合同 Econtract or OVP(FSM)
606 if data_source == consts.DATA_SOURCE_LIST[-1] and document_scheme == consts.DOC_SCHEME_LIST[1]: 606 if data_source == consts.DATA_SOURCE_LIST[2] or data_source == consts.DATA_SOURCE_LIST[3]:
607 for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix): 607 if document_scheme == consts.DOC_SCHEME_LIST[1]:
608 if keyword in document_name: 608 for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix):
609 classify_1 = classify_1_tmp 609 if keyword in document_name:
610 break 610 classify_1 = classify_1_tmp
611 break
611 # FSM合同:WEP/MSI/SC 612 # FSM合同:WEP/MSI/SC
612 elif data_source == consts.DATA_SOURCE_LIST[0] and document_scheme == consts.DOC_SCHEME_LIST[0]: 613 elif data_source == consts.DATA_SOURCE_LIST[0] and document_scheme == consts.DOC_SCHEME_LIST[0]:
613 for keyword, classify_1_tmp in consts.FSM_ECONTRACT_KEYWORDS_MAP.get(prefix): 614 for keyword, classify_1_tmp in consts.FSM_ECONTRACT_KEYWORDS_MAP.get(prefix):
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
6 # @Description : 6 # @Description :
7 7
8 from .get_char import Finder 8 from .get_char import Finder
9 from .get_char_fsm import Finder as FSMFinder
9 import numpy as np 10 import numpy as np
10 11
11 12
...@@ -23,7 +24,7 @@ def extract_info(ocr_results): ...@@ -23,7 +24,7 @@ def extract_info(ocr_results):
23 return {'page_1': {'合同编号': contract_no}} 24 return {'page_1': {'合同编号': contract_no}}
24 25
25 26
26 def predict(pdf_info, is_qrs=False): 27 def predict(pdf_info, is_qrs=False, is_fsm=False):
27 ocr_results = {} 28 ocr_results = {}
28 for pno in pdf_info: 29 for pno in pdf_info:
29 ocr_results[pno] = {} 30 ocr_results[pno] = {}
...@@ -50,7 +51,10 @@ def predict(pdf_info, is_qrs=False): ...@@ -50,7 +51,10 @@ def predict(pdf_info, is_qrs=False):
50 results = extract_info(ocr_results) 51 results = extract_info(ocr_results)
51 else: 52 else:
52 # 输入是整个 PDF 中的信息 53 # 输入是整个 PDF 中的信息
53 f = Finder(pdf_info, ocr_results=ocr_results) 54 if is_fsm:
55 f = FSMFinder(pdf_info, ocr_results=ocr_results)
56 else:
57 f = Finder(pdf_info, ocr_results=ocr_results)
54 results = f.get_info() 58 results = f.get_info()
55 return results 59 return results
56 60
......
...@@ -6,9 +6,10 @@ ...@@ -6,9 +6,10 @@
6 # @Description : 6 # @Description :
7 7
8 from .get_char import Finder 8 from .get_char import Finder
9 from .get_char_fsm import Finder as FSMFinder
9 10
10 11
11 def predict(pdf_info, file_cls): 12 def predict(pdf_info, file_cls, is_fsm=False):
12 """Summary 13 """Summary
13 14
14 Args: 15 Args:
...@@ -58,7 +59,11 @@ def predict(pdf_info, file_cls): ...@@ -58,7 +59,11 @@ def predict(pdf_info, file_cls):
58 pdf_info = dict() 59 pdf_info = dict()
59 for pno, page_info in enumerate(pdf_info_1): 60 for pno, page_info in enumerate(pdf_info_1):
60 pdf_info[str(pno)] = page_info 61 pdf_info[str(pno)] = page_info
61 f = Finder(pdf_info) 62
63 if is_fsm:
64 f = FSMFinder(pdf_info)
65 else:
66 f = Finder(pdf_info)
62 if file_cls == 0: 67 if file_cls == 0:
63 results = f.get_info() 68 results = f.get_info()
64 if file_cls == 1: 69 if file_cls == 1:
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!