Merge branch 'feature/fsm-contract' into fix/report_ca
Showing
8 changed files
with
28 additions
and
15 deletions
... | @@ -11,7 +11,7 @@ PAGE_SIZE_DEFAULT = 10 | ... | @@ -11,7 +11,7 @@ PAGE_SIZE_DEFAULT = 10 |
11 | FIXED_APPLICATION_ID_PREFIX = 'CH-S' | 11 | FIXED_APPLICATION_ID_PREFIX = 'CH-S' |
12 | 12 | ||
13 | DOC_SCHEME_LIST = ['ACCEPTANCE', 'SETTLEMENT', 'CONTRACTMANAGEMENT'] | 13 | DOC_SCHEME_LIST = ['ACCEPTANCE', 'SETTLEMENT', 'CONTRACTMANAGEMENT'] |
14 | DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT'] | 14 | DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT', 'OVP'] |
15 | COMPARE_DOC_SCHEME_LIST = ['CA', 'SE'] | 15 | COMPARE_DOC_SCHEME_LIST = ['CA', 'SE'] |
16 | 16 | ||
17 | HIL_PREFIX = 'HIL' | 17 | HIL_PREFIX = 'HIL' | ... | ... |
... | @@ -1476,7 +1476,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1476,7 +1476,8 @@ class Command(BaseCommand, LoggerMixin): |
1476 | 1476 | ||
1477 | # AFC合同 | 1477 | # AFC合同 |
1478 | if classify_1_str == str(consts.CONTRACT_CLASSIFY): | 1478 | if classify_1_str == str(consts.CONTRACT_CLASSIFY): |
1479 | ocr_result = afc_predict(pdf_handler.pdf_info) | 1479 | is_fsm = doc.data_source == consts.DATA_SOURCE_LIST[3] |
1480 | ocr_result = afc_predict(pdf_handler.pdf_info, is_fsm=is_fsm) | ||
1480 | page_res = {} | 1481 | page_res = {} |
1481 | for page_num, page_info in ocr_result.get('page_info', {}).items(): | 1482 | for page_num, page_info in ocr_result.get('page_info', {}).items(): |
1482 | if isinstance(page_num, str) and page_num.startswith('page_'): | 1483 | if isinstance(page_num, str) and page_num.startswith('page_'): |
... | @@ -1499,8 +1500,9 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1499,8 +1500,9 @@ class Command(BaseCommand, LoggerMixin): |
1499 | } | 1500 | } |
1500 | # HIL合同 | 1501 | # HIL合同 |
1501 | elif classify_1_str in consts.HIL_CONTRACT_TYPE_MAP: | 1502 | elif classify_1_str in consts.HIL_CONTRACT_TYPE_MAP: |
1503 | is_fsm = doc.data_source == consts.DATA_SOURCE_LIST[3] | ||
1502 | file_type_1 = consts.HIL_CONTRACT_TYPE_MAP.get(classify_1_str) | 1504 | file_type_1 = consts.HIL_CONTRACT_TYPE_MAP.get(classify_1_str) |
1503 | ocr_result_1 = hil_predict(pdf_handler.pdf_info, file_type_1) | 1505 | ocr_result_1 = hil_predict(pdf_handler.pdf_info, file_type_1, is_fsm=is_fsm) |
1504 | rebuild_res_1 = {} | 1506 | rebuild_res_1 = {} |
1505 | page_res = {} | 1507 | page_res = {} |
1506 | for field_name, field_info in ocr_result_1.items(): | 1508 | for field_name, field_info in ocr_result_1.items(): |
... | @@ -1526,8 +1528,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1526,8 +1528,8 @@ class Command(BaseCommand, LoggerMixin): |
1526 | 'page_info': page_info | 1528 | 'page_info': page_info |
1527 | } | 1529 | } |
1528 | # hmh | 1530 | # hmh |
1529 | else: | 1531 | # else: |
1530 | pass | 1532 | # pass |
1531 | 1533 | ||
1532 | 1534 | ||
1533 | contract_res = {} | 1535 | contract_res = {} | ... | ... |
... | @@ -36,6 +36,7 @@ class RequestTrigger(NamedEnum): | ... | @@ -36,6 +36,7 @@ class RequestTrigger(NamedEnum): |
36 | DOCUPLOAD = (3, 'Document Upload') | 36 | DOCUPLOAD = (3, 'Document Upload') |
37 | SUBMITING = (4, 'Submiting') | 37 | SUBMITING = (4, 'Submiting') |
38 | UPLOADING = (5, 'Uploading') | 38 | UPLOADING = (5, 'Uploading') |
39 | OVP = (6, 'OVP') | ||
39 | 40 | ||
40 | 41 | ||
41 | class FailureReason(NamedEnum): | 42 | class FailureReason(NamedEnum): | ... | ... |
... | @@ -602,12 +602,13 @@ class UploadDocView(GenericView, DocHandler): | ... | @@ -602,12 +602,13 @@ class UploadDocView(GenericView, DocHandler): |
602 | is_zip = False | 602 | is_zip = False |
603 | 603 | ||
604 | classify_1 = 0 | 604 | classify_1 = 0 |
605 | # 电子合同 | 605 | # 电子合同 Econtract or OVP(FSM) |
606 | if data_source == consts.DATA_SOURCE_LIST[-1] and document_scheme == consts.DOC_SCHEME_LIST[1]: | 606 | if data_source == consts.DATA_SOURCE_LIST[2] or data_source == consts.DATA_SOURCE_LIST[3]: |
607 | for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix): | 607 | if document_scheme == consts.DOC_SCHEME_LIST[1]: |
608 | if keyword in document_name: | 608 | for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix): |
609 | classify_1 = classify_1_tmp | 609 | if keyword in document_name: |
610 | break | 610 | classify_1 = classify_1_tmp |
611 | break | ||
611 | # FSM合同:WEP/MSI/SC | 612 | # FSM合同:WEP/MSI/SC |
612 | elif data_source == consts.DATA_SOURCE_LIST[0] and document_scheme == consts.DOC_SCHEME_LIST[0]: | 613 | elif data_source == consts.DATA_SOURCE_LIST[0] and document_scheme == consts.DOC_SCHEME_LIST[0]: |
613 | for keyword, classify_1_tmp in consts.FSM_ECONTRACT_KEYWORDS_MAP.get(prefix): | 614 | for keyword, classify_1_tmp in consts.FSM_ECONTRACT_KEYWORDS_MAP.get(prefix): | ... | ... |
... | @@ -6,6 +6,7 @@ | ... | @@ -6,6 +6,7 @@ |
6 | # @Description : | 6 | # @Description : |
7 | 7 | ||
8 | from .get_char import Finder | 8 | from .get_char import Finder |
9 | from .get_char_fsm import Finder as FSMFinder | ||
9 | import numpy as np | 10 | import numpy as np |
10 | 11 | ||
11 | 12 | ||
... | @@ -23,7 +24,7 @@ def extract_info(ocr_results): | ... | @@ -23,7 +24,7 @@ def extract_info(ocr_results): |
23 | return {'page_1': {'合同编号': contract_no}} | 24 | return {'page_1': {'合同编号': contract_no}} |
24 | 25 | ||
25 | 26 | ||
26 | def predict(pdf_info, is_qrs=False): | 27 | def predict(pdf_info, is_qrs=False, is_fsm=False): |
27 | ocr_results = {} | 28 | ocr_results = {} |
28 | for pno in pdf_info: | 29 | for pno in pdf_info: |
29 | ocr_results[pno] = {} | 30 | ocr_results[pno] = {} |
... | @@ -50,7 +51,10 @@ def predict(pdf_info, is_qrs=False): | ... | @@ -50,7 +51,10 @@ def predict(pdf_info, is_qrs=False): |
50 | results = extract_info(ocr_results) | 51 | results = extract_info(ocr_results) |
51 | else: | 52 | else: |
52 | # 输入是整个 PDF 中的信息 | 53 | # 输入是整个 PDF 中的信息 |
53 | f = Finder(pdf_info, ocr_results=ocr_results) | 54 | if is_fsm: |
55 | f = FSMFinder(pdf_info, ocr_results=ocr_results) | ||
56 | else: | ||
57 | f = Finder(pdf_info, ocr_results=ocr_results) | ||
54 | results = f.get_info() | 58 | results = f.get_info() |
55 | return results | 59 | return results |
56 | 60 | ... | ... |
This diff is collapsed.
Click to expand it.
This diff is collapsed.
Click to expand it.
... | @@ -6,9 +6,10 @@ | ... | @@ -6,9 +6,10 @@ |
6 | # @Description : | 6 | # @Description : |
7 | 7 | ||
8 | from .get_char import Finder | 8 | from .get_char import Finder |
9 | from .get_char_fsm import Finder as FSMFinder | ||
9 | 10 | ||
10 | 11 | ||
11 | def predict(pdf_info, file_cls): | 12 | def predict(pdf_info, file_cls, is_fsm=False): |
12 | """Summary | 13 | """Summary |
13 | 14 | ||
14 | Args: | 15 | Args: |
... | @@ -58,7 +59,11 @@ def predict(pdf_info, file_cls): | ... | @@ -58,7 +59,11 @@ def predict(pdf_info, file_cls): |
58 | pdf_info = dict() | 59 | pdf_info = dict() |
59 | for pno, page_info in enumerate(pdf_info_1): | 60 | for pno, page_info in enumerate(pdf_info_1): |
60 | pdf_info[str(pno)] = page_info | 61 | pdf_info[str(pno)] = page_info |
61 | f = Finder(pdf_info) | 62 | |
63 | if is_fsm: | ||
64 | f = FSMFinder(pdf_info) | ||
65 | else: | ||
66 | f = Finder(pdf_info) | ||
62 | if file_cls == 0: | 67 | if file_cls == 0: |
63 | results = f.get_info() | 68 | results = f.get_info() |
64 | if file_cls == 1: | 69 | if file_cls == 1: | ... | ... |
-
Please register or sign in to post a comment