add FSM AFC/HIL Contract
Showing
8 changed files
with
28 additions
and
15 deletions
| ... | @@ -11,7 +11,7 @@ PAGE_SIZE_DEFAULT = 10 | ... | @@ -11,7 +11,7 @@ PAGE_SIZE_DEFAULT = 10 |
| 11 | FIXED_APPLICATION_ID_PREFIX = 'CH-S' | 11 | FIXED_APPLICATION_ID_PREFIX = 'CH-S' |
| 12 | 12 | ||
| 13 | DOC_SCHEME_LIST = ['ACCEPTANCE', 'SETTLEMENT', 'CONTRACTMANAGEMENT'] | 13 | DOC_SCHEME_LIST = ['ACCEPTANCE', 'SETTLEMENT', 'CONTRACTMANAGEMENT'] |
| 14 | DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT'] | 14 | DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT', 'OVP'] |
| 15 | COMPARE_DOC_SCHEME_LIST = ['CA', 'SE'] | 15 | COMPARE_DOC_SCHEME_LIST = ['CA', 'SE'] |
| 16 | 16 | ||
| 17 | HIL_PREFIX = 'HIL' | 17 | HIL_PREFIX = 'HIL' | ... | ... |
| ... | @@ -1476,7 +1476,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1476,7 +1476,8 @@ class Command(BaseCommand, LoggerMixin): |
| 1476 | 1476 | ||
| 1477 | # AFC合同 | 1477 | # AFC合同 |
| 1478 | if classify_1_str == str(consts.CONTRACT_CLASSIFY): | 1478 | if classify_1_str == str(consts.CONTRACT_CLASSIFY): |
| 1479 | ocr_result = afc_predict(pdf_handler.pdf_info) | 1479 | is_fsm = doc.data_source == consts.DATA_SOURCE_LIST[3] |
| 1480 | ocr_result = afc_predict(pdf_handler.pdf_info, is_fsm=is_fsm) | ||
| 1480 | page_res = {} | 1481 | page_res = {} |
| 1481 | for page_num, page_info in ocr_result.get('page_info', {}).items(): | 1482 | for page_num, page_info in ocr_result.get('page_info', {}).items(): |
| 1482 | if isinstance(page_num, str) and page_num.startswith('page_'): | 1483 | if isinstance(page_num, str) and page_num.startswith('page_'): |
| ... | @@ -1499,8 +1500,9 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1499,8 +1500,9 @@ class Command(BaseCommand, LoggerMixin): |
| 1499 | } | 1500 | } |
| 1500 | # HIL合同 | 1501 | # HIL合同 |
| 1501 | elif classify_1_str in consts.HIL_CONTRACT_TYPE_MAP: | 1502 | elif classify_1_str in consts.HIL_CONTRACT_TYPE_MAP: |
| 1503 | is_fsm = doc.data_source == consts.DATA_SOURCE_LIST[3] | ||
| 1502 | file_type_1 = consts.HIL_CONTRACT_TYPE_MAP.get(classify_1_str) | 1504 | file_type_1 = consts.HIL_CONTRACT_TYPE_MAP.get(classify_1_str) |
| 1503 | ocr_result_1 = hil_predict(pdf_handler.pdf_info, file_type_1) | 1505 | ocr_result_1 = hil_predict(pdf_handler.pdf_info, file_type_1, is_fsm=is_fsm) |
| 1504 | rebuild_res_1 = {} | 1506 | rebuild_res_1 = {} |
| 1505 | page_res = {} | 1507 | page_res = {} |
| 1506 | for field_name, field_info in ocr_result_1.items(): | 1508 | for field_name, field_info in ocr_result_1.items(): |
| ... | @@ -1526,8 +1528,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1526,8 +1528,8 @@ class Command(BaseCommand, LoggerMixin): |
| 1526 | 'page_info': page_info | 1528 | 'page_info': page_info |
| 1527 | } | 1529 | } |
| 1528 | # hmh | 1530 | # hmh |
| 1529 | else: | 1531 | # else: |
| 1530 | pass | 1532 | # pass |
| 1531 | 1533 | ||
| 1532 | 1534 | ||
| 1533 | contract_res = {} | 1535 | contract_res = {} | ... | ... |
| ... | @@ -36,6 +36,7 @@ class RequestTrigger(NamedEnum): | ... | @@ -36,6 +36,7 @@ class RequestTrigger(NamedEnum): |
| 36 | DOCUPLOAD = (3, 'Document Upload') | 36 | DOCUPLOAD = (3, 'Document Upload') |
| 37 | SUBMITING = (4, 'Submiting') | 37 | SUBMITING = (4, 'Submiting') |
| 38 | UPLOADING = (5, 'Uploading') | 38 | UPLOADING = (5, 'Uploading') |
| 39 | OVP = (6, 'OVP') | ||
| 39 | 40 | ||
| 40 | 41 | ||
| 41 | class FailureReason(NamedEnum): | 42 | class FailureReason(NamedEnum): | ... | ... |
| ... | @@ -590,12 +590,13 @@ class UploadDocView(GenericView, DocHandler): | ... | @@ -590,12 +590,13 @@ class UploadDocView(GenericView, DocHandler): |
| 590 | is_zip = False | 590 | is_zip = False |
| 591 | 591 | ||
| 592 | classify_1 = 0 | 592 | classify_1 = 0 |
| 593 | # 电子合同 | 593 | # 电子合同 Econtract or OVP(FSM) |
| 594 | if data_source == consts.DATA_SOURCE_LIST[-1] and document_scheme == consts.DOC_SCHEME_LIST[1]: | 594 | if data_source == consts.DATA_SOURCE_LIST[2] or data_source == consts.DATA_SOURCE_LIST[3]: |
| 595 | for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix): | 595 | if document_scheme == consts.DOC_SCHEME_LIST[1]: |
| 596 | if keyword in document_name: | 596 | for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix): |
| 597 | classify_1 = classify_1_tmp | 597 | if keyword in document_name: |
| 598 | break | 598 | classify_1 = classify_1_tmp |
| 599 | break | ||
| 599 | # FSM合同:WEP/MSI/SC | 600 | # FSM合同:WEP/MSI/SC |
| 600 | elif data_source == consts.DATA_SOURCE_LIST[0] and document_scheme == consts.DOC_SCHEME_LIST[0]: | 601 | elif data_source == consts.DATA_SOURCE_LIST[0] and document_scheme == consts.DOC_SCHEME_LIST[0]: |
| 601 | for keyword, classify_1_tmp in consts.FSM_ECONTRACT_KEYWORDS_MAP.get(prefix): | 602 | for keyword, classify_1_tmp in consts.FSM_ECONTRACT_KEYWORDS_MAP.get(prefix): | ... | ... |
| ... | @@ -6,6 +6,7 @@ | ... | @@ -6,6 +6,7 @@ |
| 6 | # @Description : | 6 | # @Description : |
| 7 | 7 | ||
| 8 | from .get_char import Finder | 8 | from .get_char import Finder |
| 9 | from .get_char_fsm import Finder as FSMFinder | ||
| 9 | import numpy as np | 10 | import numpy as np |
| 10 | 11 | ||
| 11 | 12 | ||
| ... | @@ -23,7 +24,7 @@ def extract_info(ocr_results): | ... | @@ -23,7 +24,7 @@ def extract_info(ocr_results): |
| 23 | return {'page_1': {'合同编号': contract_no}} | 24 | return {'page_1': {'合同编号': contract_no}} |
| 24 | 25 | ||
| 25 | 26 | ||
| 26 | def predict(pdf_info, is_qrs=False): | 27 | def predict(pdf_info, is_qrs=False, is_fsm=False): |
| 27 | ocr_results = {} | 28 | ocr_results = {} |
| 28 | for pno in pdf_info: | 29 | for pno in pdf_info: |
| 29 | ocr_results[pno] = {} | 30 | ocr_results[pno] = {} |
| ... | @@ -50,7 +51,10 @@ def predict(pdf_info, is_qrs=False): | ... | @@ -50,7 +51,10 @@ def predict(pdf_info, is_qrs=False): |
| 50 | results = extract_info(ocr_results) | 51 | results = extract_info(ocr_results) |
| 51 | else: | 52 | else: |
| 52 | # 输入是整个 PDF 中的信息 | 53 | # 输入是整个 PDF 中的信息 |
| 53 | f = Finder(pdf_info, ocr_results=ocr_results) | 54 | if is_fsm: |
| 55 | f = FSMFinder(pdf_info, ocr_results=ocr_results) | ||
| 56 | else: | ||
| 57 | f = Finder(pdf_info, ocr_results=ocr_results) | ||
| 54 | results = f.get_info() | 58 | results = f.get_info() |
| 55 | return results | 59 | return results |
| 56 | 60 | ... | ... |
This diff is collapsed.
Click to expand it.
This diff is collapsed.
Click to expand it.
| ... | @@ -6,9 +6,10 @@ | ... | @@ -6,9 +6,10 @@ |
| 6 | # @Description : | 6 | # @Description : |
| 7 | 7 | ||
| 8 | from .get_char import Finder | 8 | from .get_char import Finder |
| 9 | from .get_char_fsm import Finder as FSMFinder | ||
| 9 | 10 | ||
| 10 | 11 | ||
| 11 | def predict(pdf_info, file_cls): | 12 | def predict(pdf_info, file_cls, is_fsm=False): |
| 12 | """Summary | 13 | """Summary |
| 13 | 14 | ||
| 14 | Args: | 15 | Args: |
| ... | @@ -58,7 +59,11 @@ def predict(pdf_info, file_cls): | ... | @@ -58,7 +59,11 @@ def predict(pdf_info, file_cls): |
| 58 | pdf_info = dict() | 59 | pdf_info = dict() |
| 59 | for pno, page_info in enumerate(pdf_info_1): | 60 | for pno, page_info in enumerate(pdf_info_1): |
| 60 | pdf_info[str(pno)] = page_info | 61 | pdf_info[str(pno)] = page_info |
| 61 | f = Finder(pdf_info) | 62 | |
| 63 | if is_fsm: | ||
| 64 | f = FSMFinder(pdf_info) | ||
| 65 | else: | ||
| 66 | f = Finder(pdf_info) | ||
| 62 | if file_cls == 0: | 67 | if file_cls == 0: |
| 63 | results = f.get_info() | 68 | results = f.get_info() |
| 64 | if file_cls == 1: | 69 | if file_cls == 1: | ... | ... |
-
Please register or sign in to post a comment