Merge branch 'feature/uat-tmp' into 'master'
Feature/uat tmp See merge request !18
Showing
19 changed files
with
3341 additions
and
48 deletions
... | @@ -10,8 +10,8 @@ PAGE_SIZE_DEFAULT = 10 | ... | @@ -10,8 +10,8 @@ PAGE_SIZE_DEFAULT = 10 |
10 | 10 | ||
11 | FIXED_APPLICATION_ID_PREFIX = 'CH-S' | 11 | FIXED_APPLICATION_ID_PREFIX = 'CH-S' |
12 | 12 | ||
13 | DOC_SCHEME_LIST = ['ACCEPTANCE', 'SETTLEMENT', 'CONTRACTMANAGEMENT'] | 13 | DOC_SCHEME_LIST = ['ACCEPTANCE', 'SETTLEMENT', 'CONTRACTMANAGEMENT', 'INSURANCE'] |
14 | DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT'] | 14 | DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT', 'OVP'] |
15 | COMPARE_DOC_SCHEME_LIST = ['CA', 'SE'] | 15 | COMPARE_DOC_SCHEME_LIST = ['CA', 'SE'] |
16 | 16 | ||
17 | HIL_PREFIX = 'HIL' | 17 | HIL_PREFIX = 'HIL' |
... | @@ -1057,7 +1057,25 @@ HIL_CONTRACT_2_CLASSIFY = 44 | ... | @@ -1057,7 +1057,25 @@ HIL_CONTRACT_2_CLASSIFY = 44 |
1057 | HIL_CONTRACT_3_CN_NAME = '车辆处置协议' | 1057 | HIL_CONTRACT_3_CN_NAME = '车辆处置协议' |
1058 | HIL_CONTRACT_3_CLASSIFY = 45 | 1058 | HIL_CONTRACT_3_CLASSIFY = 45 |
1059 | 1059 | ||
1060 | CONTRACT_SET = {CONTRACT_QRS_CLASSIFY, CONTRACT_CLASSIFY, HIL_CONTRACT_1_CLASSIFY, HIL_CONTRACT_2_CLASSIFY, HIL_CONTRACT_3_CLASSIFY} | 1060 | FSM_CONTRACT_WEP_CN_NAME = '延长保修合同' |
1061 | FSM_CONTRACT_WEP_CLASSIFY = 51 | ||
1062 | |||
1063 | FSM_CONTRACT_MSI_CN_NAME = '长悦保养合同' | ||
1064 | FSM_CONTRACT_MSI_CLASSIFY = 52 | ||
1065 | |||
1066 | FSM_CONTRACT_SC_CN_NAME = '汽车销售合同' | ||
1067 | FSM_CONTRACT_SC_CLASSIFY = 53 | ||
1068 | |||
1069 | CONTRACT_SET = { | ||
1070 | CONTRACT_QRS_CLASSIFY, | ||
1071 | CONTRACT_CLASSIFY, | ||
1072 | HIL_CONTRACT_1_CLASSIFY, | ||
1073 | HIL_CONTRACT_2_CLASSIFY, | ||
1074 | HIL_CONTRACT_3_CLASSIFY, | ||
1075 | FSM_CONTRACT_WEP_CLASSIFY, | ||
1076 | FSM_CONTRACT_MSI_CLASSIFY, | ||
1077 | FSM_CONTRACT_SC_CLASSIFY, | ||
1078 | } | ||
1061 | 1079 | ||
1062 | CONTRACT_MAP = { | 1080 | CONTRACT_MAP = { |
1063 | HIL_CONTRACT_1_CLASSIFY: HIL_CONTRACT_1_CN_NAME, | 1081 | HIL_CONTRACT_1_CLASSIFY: HIL_CONTRACT_1_CN_NAME, |
... | @@ -1065,8 +1083,13 @@ CONTRACT_MAP = { | ... | @@ -1065,8 +1083,13 @@ CONTRACT_MAP = { |
1065 | HIL_CONTRACT_3_CLASSIFY: HIL_CONTRACT_3_CN_NAME, | 1083 | HIL_CONTRACT_3_CLASSIFY: HIL_CONTRACT_3_CN_NAME, |
1066 | CONTRACT_CLASSIFY: CONTRACT_CN_NAME, | 1084 | CONTRACT_CLASSIFY: CONTRACT_CN_NAME, |
1067 | CONTRACT_QRS_CLASSIFY: CONTRACT_QRS_CN_NAME, | 1085 | CONTRACT_QRS_CLASSIFY: CONTRACT_QRS_CN_NAME, |
1086 | FSM_CONTRACT_WEP_CLASSIFY: FSM_CONTRACT_WEP_CN_NAME, | ||
1087 | FSM_CONTRACT_MSI_CLASSIFY: FSM_CONTRACT_MSI_CN_NAME, | ||
1088 | FSM_CONTRACT_SC_CLASSIFY: FSM_CONTRACT_SC_CN_NAME, | ||
1068 | } | 1089 | } |
1069 | 1090 | ||
1091 | FSM_CONTRACT_CLASSIFY_SET = {FSM_CONTRACT_WEP_CLASSIFY, FSM_CONTRACT_MSI_CLASSIFY, FSM_CONTRACT_SC_CLASSIFY} | ||
1092 | |||
1070 | # 保单 | 1093 | # 保单 |
1071 | INSURANCE_CN_NAME = '保单' | 1094 | INSURANCE_CN_NAME = '保单' |
1072 | INSURANCE_CLASSIFY = 42 | 1095 | INSURANCE_CLASSIFY = 42 |
... | @@ -1215,6 +1238,11 @@ BS_FIELD = 'bss_ocr' | ... | @@ -1215,6 +1238,11 @@ BS_FIELD = 'bss_ocr' |
1215 | HIL_CONTRACT_1_FIELD = 'hil_contract_1_ocr' | 1238 | HIL_CONTRACT_1_FIELD = 'hil_contract_1_ocr' |
1216 | HIL_CONTRACT_2_FIELD = 'hil_contract_2_ocr' | 1239 | HIL_CONTRACT_2_FIELD = 'hil_contract_2_ocr' |
1217 | HIL_CONTRACT_3_FIELD = 'hil_contract_3_ocr' | 1240 | HIL_CONTRACT_3_FIELD = 'hil_contract_3_ocr' |
1241 | FSM_CONTRACT_WEP_FIELD = 'fsm_wep_ocr' | ||
1242 | FSM_CONTRACT_MSI_FIELD = 'fsm_msi_ocr' | ||
1243 | FSM_CONTRACT_SC_FIELD = 'fsm_sc_ocr' | ||
1244 | |||
1245 | |||
1218 | BS_CLASSIFY = 10089 | 1246 | BS_CLASSIFY = 10089 |
1219 | 1247 | ||
1220 | RESULT_MAPPING = { | 1248 | RESULT_MAPPING = { |
... | @@ -1239,6 +1267,9 @@ RESULT_MAPPING = { | ... | @@ -1239,6 +1267,9 @@ RESULT_MAPPING = { |
1239 | HIL_CONTRACT_1_CLASSIFY: HIL_CONTRACT_1_FIELD, | 1267 | HIL_CONTRACT_1_CLASSIFY: HIL_CONTRACT_1_FIELD, |
1240 | HIL_CONTRACT_2_CLASSIFY: HIL_CONTRACT_2_FIELD, | 1268 | HIL_CONTRACT_2_CLASSIFY: HIL_CONTRACT_2_FIELD, |
1241 | HIL_CONTRACT_3_CLASSIFY: HIL_CONTRACT_3_FIELD, | 1269 | HIL_CONTRACT_3_CLASSIFY: HIL_CONTRACT_3_FIELD, |
1270 | FSM_CONTRACT_WEP_CLASSIFY: FSM_CONTRACT_WEP_FIELD, | ||
1271 | FSM_CONTRACT_MSI_CLASSIFY: FSM_CONTRACT_MSI_FIELD, | ||
1272 | FSM_CONTRACT_SC_CLASSIFY: FSM_CONTRACT_SC_FIELD, | ||
1242 | } | 1273 | } |
1243 | 1274 | ||
1244 | CA_ADD_COMPARE_FIELDS = (IC_OCR_FIELD, BL_OCR_FIELD, BS_FIELD) | 1275 | CA_ADD_COMPARE_FIELDS = (IC_OCR_FIELD, BL_OCR_FIELD, BS_FIELD) |
... | @@ -1511,6 +1542,9 @@ SE_AFC_CON_MAP = { | ... | @@ -1511,6 +1542,9 @@ SE_AFC_CON_MAP = { |
1511 | '还款账号': (2, 2, '还款账户', '账号'), | 1542 | '还款账号': (2, 2, '还款账户', '账号'), |
1512 | '户名': (2, 2, '还款账户', '户名'), | 1543 | '户名': (2, 2, '还款账户', '户名'), |
1513 | '开户行': (2, 2, '还款账户', '开户行'), | 1544 | '开户行': (2, 2, '还款账户', '开户行'), |
1545 | '收款账号': (2, 2, '借款人收款账户', '账号'), | ||
1546 | '收款户名': (2, 2, '借款人收款账户', '户名'), | ||
1547 | '收款开户行': (2, 2, '借款人收款账户', '开户行'), | ||
1514 | 1548 | ||
1515 | '借款人签字及时间': (1, 1, '借款人签字及时间', None), | 1549 | '借款人签字及时间': (1, 1, '借款人签字及时间', None), |
1516 | 1550 | ||
... | @@ -1550,9 +1584,12 @@ SE_HIL_CON_1_MAP = { | ... | @@ -1550,9 +1584,12 @@ SE_HIL_CON_1_MAP = { |
1550 | '融资成本总额': (5, 4, 7, '融资成本总额', None), | 1584 | '融资成本总额': (5, 4, 7, '融资成本总额', None), |
1551 | '租期': (5, 4, 7, '租期', None), | 1585 | '租期': (5, 4, 7, '租期', None), |
1552 | '还款计划表': (5, 5, 7, '付款计划表', None), | 1586 | '还款计划表': (5, 5, 7, '付款计划表', None), |
1553 | '还款账号': (5, 5, 7, '银行账户-银行账号', None), | 1587 | '还款账号': (5, 6, 7, '银行账户-银行账号', None), |
1554 | '户名': (5, 5, 7, '银行账户-户名', None), | 1588 | '户名': (5, 6, 7, '银行账户-户名', None), |
1555 | '开户行': (5, 5, 7, '银行账户-开户行', None), | 1589 | '开户行': (5, 6, 7, '银行账户-开户行', None), |
1590 | '收款账号': (5, 5, 7, '收款银行账户-银行账号', None), | ||
1591 | '收款户名': (5, 5, 7, '收款银行账户-户名', None), | ||
1592 | '收款开户行': (5, 5, 7, '收款银行账户-开户行', None), | ||
1556 | 'ASP项目详情': (5, 4, 7, '车辆附加产品明细表', None), | 1593 | 'ASP项目详情': (5, 4, 7, '车辆附加产品明细表', None), |
1557 | '承租人法定代表人或授权代表': (1, 1, 7, '承租人-法定代表人或授权代表', None), | 1594 | '承租人法定代表人或授权代表': (1, 1, 7, '承租人-法定代表人或授权代表', None), |
1558 | '共同承租人法定代表人或授权代表': (1, 1, 7, '共同承租人-法定代表人或授权代表', None), | 1595 | '共同承租人法定代表人或授权代表': (1, 1, 7, '共同承租人-法定代表人或授权代表', None), |
... | @@ -1608,6 +1645,39 @@ SE_HIL_CON_MAP = { | ... | @@ -1608,6 +1645,39 @@ SE_HIL_CON_MAP = { |
1608 | HIL_CONTRACT_3_CLASSIFY: SE_HIL_CON_3_MAP, | 1645 | HIL_CONTRACT_3_CLASSIFY: SE_HIL_CON_3_MAP, |
1609 | } | 1646 | } |
1610 | 1647 | ||
1648 | SE_FSM_WEP_MAP = { | ||
1649 | '客户姓名': (1, '客户姓名'), | ||
1650 | '证件类型': (1, '证件类型'), | ||
1651 | '证件号码': (1, '证件号码'), | ||
1652 | '合同价格(小写)': (1, '合同价格(小写)'), | ||
1653 | '客户签名': (1, '客户签名'), | ||
1654 | '签单日期': (1, '签单日期'), | ||
1655 | } | ||
1656 | |||
1657 | SE_FSM_MSI_MAP = { | ||
1658 | '客户姓名': (1, '客户姓名'), | ||
1659 | '证件类型': (1, '证件类型'), | ||
1660 | '证件号码': (1, '证件号码'), | ||
1661 | '合同价格(小写)': (1, '合同价格(小写)'), | ||
1662 | '客户签名': (2, '客户签名'), | ||
1663 | '签单日期': (2, '签单日期'), | ||
1664 | } | ||
1665 | |||
1666 | SE_FSM_SC_MAP = { | ||
1667 | '姓名': (1, '姓名'), | ||
1668 | '证件类型': (1, '证件类型'), | ||
1669 | '证件号码': (1, '证件号码'), | ||
1670 | '总价': (1, '总价'), | ||
1671 | '客户签名': (12, '客户签名'), | ||
1672 | '签单日期': (12, '签单日期'), | ||
1673 | } | ||
1674 | |||
1675 | SE_FSM_CON_MAP = { | ||
1676 | FSM_CONTRACT_WEP_CLASSIFY: SE_FSM_WEP_MAP, | ||
1677 | FSM_CONTRACT_MSI_CLASSIFY: SE_FSM_MSI_MAP, | ||
1678 | FSM_CONTRACT_SC_CLASSIFY: SE_FSM_SC_MAP, | ||
1679 | } | ||
1680 | |||
1611 | SE_AFC_CON_QRS_FIELD = ['合同编号'] | 1681 | SE_AFC_CON_QRS_FIELD = ['合同编号'] |
1612 | SE_AFC_CON_FIELD = ['合同编号-每页', '所购车辆价格-小写-重要条款', '车架号-重要条款', '贷款本金金额-重要条款', '贷款期限-重要条款', | 1682 | SE_AFC_CON_FIELD = ['合同编号-每页', '所购车辆价格-小写-重要条款', '车架号-重要条款', '贷款本金金额-重要条款', '贷款期限-重要条款', |
1613 | '车辆贷款本金金额-重要条款', '附加产品融资贷款本金总额-重要条款', '所购车辆价格', '车架号', '经销商', | 1683 | '车辆贷款本金金额-重要条款', '附加产品融资贷款本金总额-重要条款', '所购车辆价格', '车架号', '经销商', |
... | @@ -2314,29 +2384,42 @@ APPLICANT_TYPE_MAP = { | ... | @@ -2314,29 +2384,42 @@ APPLICANT_TYPE_MAP = { |
2314 | 2384 | ||
2315 | APPLICANT_TYPE_ORDER = ['Borrower', 'Co-Borrower', 'Guarantor', 'Mortgager'] | 2385 | APPLICANT_TYPE_ORDER = ['Borrower', 'Co-Borrower', 'Guarantor', 'Mortgager'] |
2316 | 2386 | ||
2317 | FILE_NAME_PREFIX_MAP = { | 2387 | # FILE_NAME_PREFIX_MAP = { |
2318 | AFC_PREFIX: [ | 2388 | # AFC_PREFIX: [ |
2319 | ((CONTRACT_CLASSIFY, 0), '{0}_电子签署-汽车抵押贷款合同'), | 2389 | # ((CONTRACT_CLASSIFY, 0), '{0}_电子签署-汽车抵押贷款合同'), |
2320 | ((HMH_CLASSIFY, 0), '{0}_电子签署-抵押登记豁免函'), | 2390 | # ((HMH_CLASSIFY, 0), '{0}_电子签署-抵押登记豁免函'), |
2321 | ], | 2391 | # ], |
2322 | HIL_PREFIX: [ | 2392 | # HIL_PREFIX: [ |
2323 | ((HIL_CONTRACT_1_CLASSIFY, HIL_CONTRACT_3_CLASSIFY), '{0}_电子签署-售后回租合同'), | 2393 | # ((HIL_CONTRACT_1_CLASSIFY, HIL_CONTRACT_3_CLASSIFY), '{0}_电子签署-售后回租合同'), |
2324 | ((HIL_CONTRACT_2_CLASSIFY, 0), '{0}_电子签署-汽车租赁抵押合同'), | 2394 | # ((HIL_CONTRACT_2_CLASSIFY, 0), '{0}_电子签署-汽车租赁抵押合同'), |
2325 | ((HMH_CLASSIFY, 0), '{0}_电子签署-抵押登记豁免函'), | 2395 | # ((HMH_CLASSIFY, 0), '{0}_电子签署-抵押登记豁免函'), |
2326 | ] | 2396 | # ] |
2327 | } | 2397 | # } |
2328 | 2398 | ||
2329 | ECONTRACT_KEYWORDS_MAP = { | 2399 | ECONTRACT_KEYWORDS_MAP = { |
2330 | AFC_PREFIX: [ | 2400 | AFC_PREFIX: [ |
2331 | ('抵押贷款合同', CONTRACT_CLASSIFY), | 2401 | ('抵押贷款合同', CONTRACT_CLASSIFY), |
2332 | ('送达地址确认书', CONTRACT_QRS_CLASSIFY), | 2402 | ('送达地址确认书', CONTRACT_QRS_CLASSIFY), |
2333 | # ('电子签署-抵押登记豁免函', HMH_CLASSIFY, 0), | 2403 | ('抵押登记豁免函', HMH_CLASSIFY), |
2334 | ], | 2404 | ], |
2335 | HIL_PREFIX: [ | 2405 | HIL_PREFIX: [ |
2336 | ('售后回租合同', HIL_CONTRACT_1_CLASSIFY), | 2406 | ('售后回租合同', HIL_CONTRACT_1_CLASSIFY), |
2337 | ('租赁抵押合同', HIL_CONTRACT_2_CLASSIFY), | 2407 | ('租赁抵押合同', HIL_CONTRACT_2_CLASSIFY), |
2338 | ('车辆处置协议', HIL_CONTRACT_3_CLASSIFY), | 2408 | ('车辆处置协议', HIL_CONTRACT_3_CLASSIFY), |
2339 | # ('电子签署-抵押登记豁免函', HMH_CLASSIFY, 0), | 2409 | ('抵押登记豁免函', HMH_CLASSIFY), |
2410 | ] | ||
2411 | } | ||
2412 | |||
2413 | FSM_ECONTRACT_KEYWORDS_MAP = { | ||
2414 | AFC_PREFIX: [ | ||
2415 | ('延长保修条款与条件', FSM_CONTRACT_WEP_CLASSIFY), | ||
2416 | ('长悦保养套餐服务合约', FSM_CONTRACT_MSI_CLASSIFY), | ||
2417 | ('汽车销售合同', FSM_CONTRACT_SC_CLASSIFY), | ||
2418 | ], | ||
2419 | HIL_PREFIX: [ | ||
2420 | ('延长保修条款与条件', FSM_CONTRACT_WEP_CLASSIFY), | ||
2421 | ('长悦保养套餐服务合同', FSM_CONTRACT_MSI_CLASSIFY), | ||
2422 | ('汽车销售合同', FSM_CONTRACT_SC_CLASSIFY), | ||
2340 | ] | 2423 | ] |
2341 | } | 2424 | } |
2342 | 2425 | ||
... | @@ -2346,6 +2429,12 @@ HIL_CONTRACT_TYPE_MAP = { | ... | @@ -2346,6 +2429,12 @@ HIL_CONTRACT_TYPE_MAP = { |
2346 | str(HIL_CONTRACT_3_CLASSIFY): 1, | 2429 | str(HIL_CONTRACT_3_CLASSIFY): 1, |
2347 | } | 2430 | } |
2348 | 2431 | ||
2432 | FSM_CONTRACT_TYPE_MAP = { | ||
2433 | str(FSM_CONTRACT_WEP_CLASSIFY): 0, | ||
2434 | str(FSM_CONTRACT_MSI_CLASSIFY): 1, | ||
2435 | str(FSM_CONTRACT_SC_CLASSIFY): 2, | ||
2436 | } | ||
2437 | |||
2349 | RESULT_MAP = { | 2438 | RESULT_MAP = { |
2350 | 0: None, | 2439 | 0: None, |
2351 | 1: True, | 2440 | 1: True, |
... | @@ -2379,3 +2468,26 @@ MPOS_MAP = { | ... | @@ -2379,3 +2468,26 @@ MPOS_MAP = { |
2379 | } | 2468 | } |
2380 | 2469 | ||
2381 | FOLDER_WSC_CLASSIFY = 199 | 2470 | FOLDER_WSC_CLASSIFY = 199 |
2471 | |||
2472 | |||
2473 | FSM_BEFORE_ACTIVITED_STATUS = { | ||
2474 | "APSVD": "Saved", | ||
2475 | "APEAE": "E-app Editing", | ||
2476 | "APADA": "Awaiting Dealer Action", | ||
2477 | "APAPR": "Acceptance Processing", | ||
2478 | "APPSB": "Pre-submit Processed", | ||
2479 | "APSBT": "Submitted", | ||
2480 | "APAPP": "Approved", | ||
2481 | "APHOC": "Held Offer-Docs", | ||
2482 | "APHOD": "Held Offer-Data", | ||
2483 | "APINI": "Initiated", | ||
2484 | "APSEP": "Settlement Processing" | ||
2485 | } | ||
2486 | |||
2487 | FSM_ACTIVITED_STATUS = { | ||
2488 | "APADF": "Activated-Document Follow up", | ||
2489 | "APASC": "Activated-Awaiting Settlement Check", | ||
2490 | "APIPN": "Activated-Invoice Passed-Non PT", | ||
2491 | "APIPP": "Activated-Invoice Passed-PT Doc Required", | ||
2492 | "APARD": "Activated-Review done", | ||
2493 | } | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
... | @@ -20,6 +20,8 @@ from common.tools.file_tools import get_pwd_list_from_str, extract_zip_or_rar, g | ... | @@ -20,6 +20,8 @@ from common.tools.file_tools import get_pwd_list_from_str, extract_zip_or_rar, g |
20 | from common.tools.pdf_to_img import PDFHandler | 20 | from common.tools.pdf_to_img import PDFHandler |
21 | from common.electronic_afc_contract.afc_contract_ocr import predict as afc_predict | 21 | from common.electronic_afc_contract.afc_contract_ocr import predict as afc_predict |
22 | from common.electronic_hil_contract.hil_contract_ocr import predict as hil_predict | 22 | from common.electronic_hil_contract.hil_contract_ocr import predict as hil_predict |
23 | from common.fsm_econtract.fsm_contract_ocr import predict as fsm_predict | ||
24 | from common.fsm_econtract.hmh_ocr import predict as hmh_predict | ||
23 | from apps.doc import consts | 25 | from apps.doc import consts |
24 | # from apps.doc.ocr.edms import EDMS, rh | 26 | # from apps.doc.ocr.edms import EDMS, rh |
25 | from apps.doc.ocr.ecm import ECM, rh | 27 | from apps.doc.ocr.ecm import ECM, rh |
... | @@ -40,8 +42,10 @@ from apps.doc.models import ( | ... | @@ -40,8 +42,10 @@ from apps.doc.models import ( |
40 | DDARecords, | 42 | DDARecords, |
41 | IDBCRecords, | 43 | IDBCRecords, |
42 | Configs, | 44 | Configs, |
45 | AFCCmsStatusInfo, | ||
46 | HILCmsStatusInfo, | ||
43 | ) | 47 | ) |
44 | from celery_compare.tasks import compare | 48 | from celery_compare.tasks import compare, fsm_compare |
45 | 49 | ||
46 | 50 | ||
47 | class Command(BaseCommand, LoggerMixin): | 51 | class Command(BaseCommand, LoggerMixin): |
... | @@ -996,7 +1000,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -996,7 +1000,7 @@ class Command(BaseCommand, LoggerMixin): |
996 | res.setdefault(consts.ALL_POSITION_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get( | 1000 | res.setdefault(consts.ALL_POSITION_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get( |
997 | consts.ALL_POSITION_KEY, {}).get(key1, []) | 1001 | consts.ALL_POSITION_KEY, {}).get(key1, []) |
998 | license_summary[classify] = [res] | 1002 | license_summary[classify] = [res] |
999 | else: | 1003 | elif classify in consts.SE_HIL_CON_MAP: |
1000 | res = {} | 1004 | res = {} |
1001 | for key, (pno1, pno2, end_idx, key1, key2) in consts.SE_HIL_CON_MAP[classify].items(): | 1005 | for key, (pno1, pno2, end_idx, key1, key2) in consts.SE_HIL_CON_MAP[classify].items(): |
1002 | if pno1 is None: | 1006 | if pno1 is None: |
... | @@ -1020,7 +1024,14 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1020,7 +1024,14 @@ class Command(BaseCommand, LoggerMixin): |
1020 | res[key] = tmp_res | 1024 | res[key] = tmp_res |
1021 | res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(img_pno), {}).get( | 1025 | res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(img_pno), {}).get( |
1022 | consts.IMG_PATH_KEY, '') | 1026 | consts.IMG_PATH_KEY, '') |
1027 | license_summary[classify] = [res] | ||
1023 | 1028 | ||
1029 | elif classify in consts.SE_FSM_CON_MAP: | ||
1030 | res = {} | ||
1031 | for key, (pno1, key1) in consts.SE_FSM_CON_MAP[classify].items(): | ||
1032 | res[key] = page_info_dict.get(str(pno1), {}).get(key1) | ||
1033 | res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(pno1), {}).get( | ||
1034 | consts.IMG_PATH_KEY, '') | ||
1024 | license_summary[classify] = [res] | 1035 | license_summary[classify] = [res] |
1025 | 1036 | ||
1026 | def rebuild_bs_summary(self, bs_summary, unknown_summary): | 1037 | def rebuild_bs_summary(self, bs_summary, unknown_summary): |
... | @@ -1442,7 +1453,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1442,7 +1453,7 @@ class Command(BaseCommand, LoggerMixin): |
1442 | self.log_base, traceback.format_exc())) | 1453 | self.log_base, traceback.format_exc())) |
1443 | error_list.append(1) | 1454 | error_list.append(1) |
1444 | return | 1455 | return |
1445 | else: # e-contract | 1456 | else: # e-contract or or e-fsm-contract or e-hmh |
1446 | try: | 1457 | try: |
1447 | # pdf下载 处理 图片存储 识别 | 1458 | # pdf下载 处理 图片存储 识别 |
1448 | for times in range(consts.RETRY_TIMES): | 1459 | for times in range(consts.RETRY_TIMES): |
... | @@ -1472,8 +1483,10 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1472,8 +1483,10 @@ class Command(BaseCommand, LoggerMixin): |
1472 | self.online_log.error('{0} [process error (db save)] [error={1}]'.format( | 1483 | self.online_log.error('{0} [process error (db save)] [error={1}]'.format( |
1473 | self.log_base, traceback.format_exc())) | 1484 | self.log_base, traceback.format_exc())) |
1474 | 1485 | ||
1486 | # AFC合同 | ||
1475 | if classify_1_str == str(consts.CONTRACT_CLASSIFY): | 1487 | if classify_1_str == str(consts.CONTRACT_CLASSIFY): |
1476 | ocr_result = afc_predict(pdf_handler.pdf_info) | 1488 | is_fsm = doc.data_source == consts.DATA_SOURCE_LIST[3] |
1489 | ocr_result = afc_predict(pdf_handler.pdf_info, is_fsm=is_fsm) | ||
1477 | page_res = {} | 1490 | page_res = {} |
1478 | for page_num, page_info in ocr_result.get('page_info', {}).items(): | 1491 | for page_num, page_info in ocr_result.get('page_info', {}).items(): |
1479 | if isinstance(page_num, str) and page_num.startswith('page_'): | 1492 | if isinstance(page_num, str) and page_num.startswith('page_'): |
... | @@ -1483,6 +1496,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1483,6 +1496,7 @@ class Command(BaseCommand, LoggerMixin): |
1483 | 'page_num': page_num, | 1496 | 'page_num': page_num, |
1484 | 'page_info': page_info | 1497 | 'page_info': page_info |
1485 | } | 1498 | } |
1499 | # 送达地址确认书 | ||
1486 | elif classify_1_str == str(consts.CONTRACT_QRS_CLASSIFY): | 1500 | elif classify_1_str == str(consts.CONTRACT_QRS_CLASSIFY): |
1487 | ocr_result = afc_predict(pdf_handler.pdf_info, is_qrs=True) | 1501 | ocr_result = afc_predict(pdf_handler.pdf_info, is_qrs=True) |
1488 | page_num = 'page_1' | 1502 | page_num = 'page_1' |
... | @@ -1493,9 +1507,11 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1493,9 +1507,11 @@ class Command(BaseCommand, LoggerMixin): |
1493 | 'page_info': ocr_result.pop(page_num, {}) | 1507 | 'page_info': ocr_result.pop(page_num, {}) |
1494 | } | 1508 | } |
1495 | } | 1509 | } |
1496 | else: | 1510 | # HIL合同 |
1511 | elif classify_1_str in consts.HIL_CONTRACT_TYPE_MAP: | ||
1512 | is_fsm = doc.data_source == consts.DATA_SOURCE_LIST[3] | ||
1497 | file_type_1 = consts.HIL_CONTRACT_TYPE_MAP.get(classify_1_str) | 1513 | file_type_1 = consts.HIL_CONTRACT_TYPE_MAP.get(classify_1_str) |
1498 | ocr_result_1 = hil_predict(pdf_handler.pdf_info, file_type_1) | 1514 | ocr_result_1 = hil_predict(pdf_handler.pdf_info, file_type_1, is_fsm=is_fsm) |
1499 | rebuild_res_1 = {} | 1515 | rebuild_res_1 = {} |
1500 | page_res = {} | 1516 | page_res = {} |
1501 | for field_name, field_info in ocr_result_1.items(): | 1517 | for field_name, field_info in ocr_result_1.items(): |
... | @@ -1508,9 +1524,36 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1508,9 +1524,36 @@ class Command(BaseCommand, LoggerMixin): |
1508 | 'page_num': page_num, | 1524 | 'page_num': page_num, |
1509 | 'page_info': page_info | 1525 | 'page_info': page_info |
1510 | } | 1526 | } |
1527 | # FSM合同 WEP MSI SC | ||
1528 | elif classify_1_str in consts.FSM_CONTRACT_TYPE_MAP: | ||
1529 | file_type = consts.FSM_CONTRACT_TYPE_MAP.get(classify_1_str) | ||
1530 | ocr_result = fsm_predict(pdf_handler.pdf_info, file_type) | ||
1531 | page_res = {} | ||
1532 | for page_num, page_info in ocr_result.items(): | ||
1533 | if isinstance(page_num, str) and page_num.startswith('page_'): | ||
1534 | page_res[page_num] = { | ||
1535 | 'classify': int(classify_1_str), | ||
1536 | 'page_num': page_num, | ||
1537 | 'page_info': page_info | ||
1538 | } | ||
1539 | # hmh | ||
1540 | # else: | ||
1541 | # pass | ||
1542 | |||
1511 | 1543 | ||
1512 | contract_res = {} | 1544 | contract_res = {} |
1513 | for img_path_tmp, page_key in pdf_handler.img_path_pno_list: | 1545 | for img_path_tmp, page_key in pdf_handler.img_path_pno_list: |
1546 | if classify_1_str == str(consts.HMH_CLASSIFY): | ||
1547 | img_contract_res = { | ||
1548 | 'code': 1, | ||
1549 | 'data': [ | ||
1550 | { | ||
1551 | 'classify': consts.HMH_CLASSIFY, | ||
1552 | 'data': hmh_predict(pdf_handler.pdf_info) | ||
1553 | } | ||
1554 | ] | ||
1555 | } | ||
1556 | else: | ||
1514 | if page_key in page_res: | 1557 | if page_key in page_res: |
1515 | img_contract_res = { | 1558 | img_contract_res = { |
1516 | 'code': 1, | 1559 | 'code': 1, |
... | @@ -1966,6 +2009,9 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1966,6 +2009,9 @@ class Command(BaseCommand, LoggerMixin): |
1966 | report_list[5] = BSCheckResult.CHECK_FAILED.value | 2009 | report_list[5] = BSCheckResult.CHECK_FAILED.value |
1967 | 2010 | ||
1968 | finally: | 2011 | finally: |
2012 | self.online_log.info('{0} [task={1}] [license_summary={2}] ' | ||
2013 | '[contract_result_compare={3}]'.format(self.log_base, task_str, | ||
2014 | license_summary, contract_result_compare)) | ||
1969 | self.rebuild_contract(license_summary, contract_result_compare) | 2015 | self.rebuild_contract(license_summary, contract_result_compare) |
1970 | 2016 | ||
1971 | bs_rebuild = self.rebuild_bs(merged_bs_summary) | 2017 | bs_rebuild = self.rebuild_bs(merged_bs_summary) |
... | @@ -2015,6 +2061,16 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -2015,6 +2061,16 @@ class Command(BaseCommand, LoggerMixin): |
2015 | self.log_base, task_str, res_obj.id)) | 2061 | self.log_base, task_str, res_obj.id)) |
2016 | # 触发比对 | 2062 | # 触发比对 |
2017 | try: | 2063 | try: |
2064 | # 是否fsm | ||
2065 | cms_status_class = HILCmsStatusInfo if business_type in consts.HIL_SET else AFCCmsStatusInfo | ||
2066 | cms_status_info = cms_status_class.objects.filter(application_id=doc.application_id).first() | ||
2067 | is_fsm = cms_status_info is not None and cms_status_info.is_fsm == 1 | ||
2068 | self.online_log.info('{0} [isfsm] [task={1}] [true or false={2}]'.format( | ||
2069 | self.log_base, task_str, is_fsm)) | ||
2070 | if is_fsm: | ||
2071 | fsm_compare.apply_async((doc.application_id, business_type, None, res_obj.id, is_ca, True), | ||
2072 | queue='queue_compare') | ||
2073 | else: | ||
2018 | # pass | 2074 | # pass |
2019 | compare.apply_async((doc.application_id, business_type, None, res_obj.id, | 2075 | compare.apply_async((doc.application_id, business_type, None, res_obj.id, |
2020 | is_ca, True), queue='queue_compare') | 2076 | is_ca, True), queue='queue_compare') | ... | ... |
... | @@ -329,6 +329,11 @@ class AFCOCRResult(models.Model): | ... | @@ -329,6 +329,11 @@ class AFCOCRResult(models.Model): |
329 | hil_contract_2_ocr = models.TextField(null=True, verbose_name="HIL合同2") | 329 | hil_contract_2_ocr = models.TextField(null=True, verbose_name="HIL合同2") |
330 | hil_contract_3_ocr = models.TextField(null=True, verbose_name="HIL合同3") | 330 | hil_contract_3_ocr = models.TextField(null=True, verbose_name="HIL合同3") |
331 | qrs_ocr = models.TextField(null=True, verbose_name="AFC合同确认书") | 331 | qrs_ocr = models.TextField(null=True, verbose_name="AFC合同确认书") |
332 | fsm_wep_ocr = models.TextField(null=True, verbose_name="延长保修合同") | ||
333 | fsm_msi_ocr = models.TextField(null=True, verbose_name="长悦保养合同") | ||
334 | fsm_sc_ocr = models.TextField(null=True, verbose_name="汽车销售合同") | ||
335 | fsm_activited = models.IntegerField(null=False, default=0, verbose_name="fsm激活状态 1:激活") | ||
336 | |||
332 | 337 | ||
333 | update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间') | 338 | update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间') |
334 | create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间') | 339 | create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间') |
... | @@ -366,6 +371,11 @@ class HILOCRResult(models.Model): | ... | @@ -366,6 +371,11 @@ class HILOCRResult(models.Model): |
366 | hil_contract_3_ocr = models.TextField(null=True, verbose_name="HIL合同3") | 371 | hil_contract_3_ocr = models.TextField(null=True, verbose_name="HIL合同3") |
367 | qrs_ocr = models.TextField(null=True, verbose_name="AFC合同确认书") | 372 | qrs_ocr = models.TextField(null=True, verbose_name="AFC合同确认书") |
368 | 373 | ||
374 | fsm_wep_ocr = models.TextField(null=True, verbose_name="延长保修合同") | ||
375 | fsm_msi_ocr = models.TextField(null=True, verbose_name="长悦保养合同") | ||
376 | fsm_sc_ocr = models.TextField(null=True, verbose_name="汽车销售合同") | ||
377 | fsm_activited = models.IntegerField(null=False, default=0, verbose_name="fsm激活状态 1:激活") | ||
378 | |||
369 | update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间') | 379 | update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间') |
370 | create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间') | 380 | create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间') |
371 | 381 | ||
... | @@ -401,6 +411,11 @@ class AFCSEOCRResult(models.Model): | ... | @@ -401,6 +411,11 @@ class AFCSEOCRResult(models.Model): |
401 | hil_contract_3_ocr = models.TextField(null=True, verbose_name="HIL合同3") | 411 | hil_contract_3_ocr = models.TextField(null=True, verbose_name="HIL合同3") |
402 | qrs_ocr = models.TextField(null=True, verbose_name="AFC合同确认书") | 412 | qrs_ocr = models.TextField(null=True, verbose_name="AFC合同确认书") |
403 | 413 | ||
414 | fsm_wep_ocr = models.TextField(null=True, verbose_name="延长保修合同") | ||
415 | fsm_msi_ocr = models.TextField(null=True, verbose_name="长悦保养合同") | ||
416 | fsm_sc_ocr = models.TextField(null=True, verbose_name="汽车销售合同") | ||
417 | fsm_activited = models.IntegerField(null=False, default=0, verbose_name="fsm激活状态 1:激活") | ||
418 | |||
404 | update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间') | 419 | update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间') |
405 | create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间') | 420 | create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间') |
406 | 421 | ||
... | @@ -436,6 +451,10 @@ class HILSEOCRResult(models.Model): | ... | @@ -436,6 +451,10 @@ class HILSEOCRResult(models.Model): |
436 | hil_contract_2_ocr = models.TextField(null=True, verbose_name="HIL合同2") | 451 | hil_contract_2_ocr = models.TextField(null=True, verbose_name="HIL合同2") |
437 | hil_contract_3_ocr = models.TextField(null=True, verbose_name="HIL合同3") | 452 | hil_contract_3_ocr = models.TextField(null=True, verbose_name="HIL合同3") |
438 | qrs_ocr = models.TextField(null=True, verbose_name="AFC合同确认书") | 453 | qrs_ocr = models.TextField(null=True, verbose_name="AFC合同确认书") |
454 | fsm_wep_ocr = models.TextField(null=True, verbose_name="延长保修合同") | ||
455 | fsm_msi_ocr = models.TextField(null=True, verbose_name="长悦保养合同") | ||
456 | fsm_sc_ocr = models.TextField(null=True, verbose_name="汽车销售合同") | ||
457 | fsm_activited = models.IntegerField(null=False, default=0, verbose_name="fsm激活状态 1:激活") | ||
439 | 458 | ||
440 | update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间') | 459 | update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间') |
441 | create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间') | 460 | create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间') |
... | @@ -1042,3 +1061,41 @@ class AFCCompareReportNew(models.Model): | ... | @@ -1042,3 +1061,41 @@ class AFCCompareReportNew(models.Model): |
1042 | managed = False | 1061 | managed = False |
1043 | db_table = 'afc_compare_report_new' | 1062 | db_table = 'afc_compare_report_new' |
1044 | situ_db_label = 'afc' | 1063 | situ_db_label = 'afc' |
1064 | |||
1065 | |||
1066 | class NscInvoice(models.Model): | ||
1067 | id = models.AutoField(primary_key=True, verbose_name="id") # 主键 | ||
1068 | vin = models.CharField(max_length=64, verbose_name="车架号") # 索引 | ||
1069 | content = models.TextField(null=True, verbose_name="nsc发票信息") | ||
1070 | create_time = models.DateTimeField(verbose_name='创建时间') | ||
1071 | |||
1072 | class Meta: | ||
1073 | managed = False | ||
1074 | db_table = 'nsc_invoice' | ||
1075 | |||
1076 | |||
1077 | class AFCCmsStatusInfo(models.Model): | ||
1078 | id = models.AutoField(primary_key=True, verbose_name="id") # 主键 | ||
1079 | application_id = models.CharField(max_length=64, verbose_name="订单id") # 索引 | ||
1080 | business_type = models.CharField(max_length=64, verbose_name="业务类型") | ||
1081 | is_fsm = models.SmallIntegerField(null=False, default=0, verbose_name="是否fsm流程 1:是") | ||
1082 | update_time = models.DateTimeField(verbose_name='更新时间') | ||
1083 | create_time = models.DateTimeField(verbose_name='创建时间') | ||
1084 | |||
1085 | class Meta: | ||
1086 | managed = False | ||
1087 | db_table = 'afc_cms_status_info' | ||
1088 | situ_db_label = 'afc' | ||
1089 | |||
1090 | |||
1091 | class HILCmsStatusInfo(models.Model): | ||
1092 | id = models.AutoField(primary_key=True, verbose_name="id") # 主键 | ||
1093 | application_id = models.CharField(max_length=64, verbose_name="订单id") # 索引 | ||
1094 | business_type = models.CharField(max_length=64, verbose_name="业务类型") | ||
1095 | is_fsm = models.SmallIntegerField(null=False, default=0, verbose_name="是否fsm流程 1:是") | ||
1096 | update_time = models.DateTimeField(verbose_name='更新时间') | ||
1097 | create_time = models.DateTimeField(verbose_name='创建时间') | ||
1098 | |||
1099 | class Meta: | ||
1100 | managed = False | ||
1101 | db_table = 'hil_cms_status_info' | ... | ... |
... | @@ -27,6 +27,7 @@ class RequestTeam(NamedEnum): | ... | @@ -27,6 +27,7 @@ class RequestTeam(NamedEnum): |
27 | SETTLEMENT = (1, 'SETTLEMENT') | 27 | SETTLEMENT = (1, 'SETTLEMENT') |
28 | CONTRACTMANAGEMENT = (2, 'CONTRACTMANAGEMENT') | 28 | CONTRACTMANAGEMENT = (2, 'CONTRACTMANAGEMENT') |
29 | CONTROLLING = (3, 'CONTROLLING') | 29 | CONTROLLING = (3, 'CONTROLLING') |
30 | INSURANCE = (4, 'INSURANCE') | ||
30 | 31 | ||
31 | 32 | ||
32 | class RequestTrigger(NamedEnum): | 33 | class RequestTrigger(NamedEnum): |
... | @@ -36,6 +37,7 @@ class RequestTrigger(NamedEnum): | ... | @@ -36,6 +37,7 @@ class RequestTrigger(NamedEnum): |
36 | DOCUPLOAD = (3, 'Document Upload') | 37 | DOCUPLOAD = (3, 'Document Upload') |
37 | SUBMITING = (4, 'Submiting') | 38 | SUBMITING = (4, 'Submiting') |
38 | UPLOADING = (5, 'Uploading') | 39 | UPLOADING = (5, 'Uploading') |
40 | OVP = (6, 'OVP') | ||
39 | 41 | ||
40 | 42 | ||
41 | class FailureReason(NamedEnum): | 43 | class FailureReason(NamedEnum): | ... | ... |
... | @@ -34,6 +34,7 @@ class ECM: | ... | @@ -34,6 +34,7 @@ class ECM: |
34 | 'ACCEPTANCE': ('acceptance', conf.ECM_FOLDER_CA, conf.ECM_FOLDER_CA_HIL), | 34 | 'ACCEPTANCE': ('acceptance', conf.ECM_FOLDER_CA, conf.ECM_FOLDER_CA_HIL), |
35 | 'SETTLEMENT': (self.settlement_type, conf.ECM_FOLDER_SE, conf.ECM_FOLDER_SE_HIL), | 35 | 'SETTLEMENT': (self.settlement_type, conf.ECM_FOLDER_SE, conf.ECM_FOLDER_SE_HIL), |
36 | 'CONTRACTMANAGEMENT': ('contract_management', conf.ECM_FOLDER_CA, conf.ECM_FOLDER_CA_HIL), | 36 | 'CONTRACTMANAGEMENT': ('contract_management', conf.ECM_FOLDER_CA, conf.ECM_FOLDER_CA_HIL), |
37 | 'INSURANCE': ('insurance', conf.ECM_FOLDER_SE, conf.ECM_FOLDER_SE_HIL), | ||
37 | } | 38 | } |
38 | self.doc_base_map = { | 39 | self.doc_base_map = { |
39 | 'AFC': 'SF5_CN', | 40 | 'AFC': 'SF5_CN', | ... | ... |
... | @@ -808,10 +808,12 @@ class BSWorkbook(Workbook): | ... | @@ -808,10 +808,12 @@ class BSWorkbook(Workbook): |
808 | if field_str is not None: | 808 | if field_str is not None: |
809 | count_list.append((field_str, count)) | 809 | count_list.append((field_str, count)) |
810 | 810 | ||
811 | def contract_rebuild(self, contract_result_dict): | 811 | def contract_rebuild(self, contract_result_dict, is_ca=False): |
812 | for classify, contract_result in contract_result_dict.items(): | 812 | for classify, contract_result in contract_result_dict.items(): |
813 | if len(contract_result) == 0: | 813 | if len(contract_result) == 0: |
814 | continue | 814 | continue |
815 | if is_ca and classify not in consts.FSM_CONTRACT_CLASSIFY_SET: | ||
816 | continue | ||
815 | ws = self.create_sheet(consts.CONTRACT_MAP.get(classify)) | 817 | ws = self.create_sheet(consts.CONTRACT_MAP.get(classify)) |
816 | for i in range(30): | 818 | for i in range(30): |
817 | if str(i) in contract_result: | 819 | if str(i) in contract_result: |
... | @@ -934,6 +936,7 @@ class BSWorkbook(Workbook): | ... | @@ -934,6 +936,7 @@ class BSWorkbook(Workbook): |
934 | else: | 936 | else: |
935 | self.bs_rebuild(bs_summary, res_count_tuple, metadata) | 937 | self.bs_rebuild(bs_summary, res_count_tuple, metadata) |
936 | self.license_rebuild(license_summary, document_scheme, count_list) | 938 | self.license_rebuild(license_summary, document_scheme, count_list) |
939 | self.contract_rebuild(contract_result, True) | ||
937 | self.move_res_sheet() | 940 | self.move_res_sheet() |
938 | self.remove_base_sheet() | 941 | self.remove_base_sheet() |
939 | return count_list, self.need_follow | 942 | return count_list, self.need_follow | ... | ... |
... | @@ -48,14 +48,23 @@ from .models import ( | ... | @@ -48,14 +48,23 @@ from .models import ( |
48 | MposReport, | 48 | MposReport, |
49 | GenericOCRReport, | 49 | GenericOCRReport, |
50 | InterfaceReport, | 50 | InterfaceReport, |
51 | HILOCRResult, | ||
52 | HILSEOCRResult, | ||
53 | AFCOCRResult, | ||
54 | AFCSEOCRResult, | ||
55 | HILCmsStatusInfo, | ||
56 | AFCCmsStatusInfo | ||
51 | ) | 57 | ) |
52 | from .named_enum import ErrorType, AutoResult, WholeResult, RPAResult, SystemName | 58 | from .named_enum import ErrorType, AutoResult, WholeResult, RPAResult, SystemName, RequestTeam |
53 | from .mixins import DocHandler, MPOSHandler, PreSEHandler | 59 | from .mixins import DocHandler, MPOSHandler, PreSEHandler |
54 | from . import consts | 60 | from . import consts |
55 | from apps.account.authentication import OAuth2AuthenticationWithUser | 61 | from apps.account.authentication import OAuth2AuthenticationWithUser |
56 | from celery_compare.tasks import compare | 62 | from celery_compare.tasks import compare, fsm_compare |
63 | from prese.compare import get_empty_result | ||
57 | 64 | ||
58 | import time | 65 | import time |
66 | |||
67 | |||
59 | class CustomDate(fields.Date): | 68 | class CustomDate(fields.Date): |
60 | 69 | ||
61 | def _deserialize(self, value, attr, data, **kwargs): | 70 | def _deserialize(self, value, attr, data, **kwargs): |
... | @@ -248,6 +257,7 @@ se_compare_content = { | ... | @@ -248,6 +257,7 @@ se_compare_content = { |
248 | 'fsmSpecialCar': fields.Boolean(required=False), | 257 | 'fsmSpecialCar': fields.Boolean(required=False), |
249 | 'fsmBestPrice': fields.Boolean(required=False), | 258 | 'fsmBestPrice': fields.Boolean(required=False), |
250 | 'isAutoSettlement': fields.Boolean(required=False), | 259 | 'isAutoSettlement': fields.Boolean(required=False), |
260 | 'fsmLandingDealer': fields.Str(required=False, validate=validate.Length(max=1024)), | ||
251 | 261 | ||
252 | 'individualCusInfo': fields.List(fields.Nested(se_individual_args), | 262 | 'individualCusInfo': fields.List(fields.Nested(se_individual_args), |
253 | required=True, validate=validate.Length(min=1, max=4)), | 263 | required=True, validate=validate.Length(min=1, max=4)), |
... | @@ -551,6 +561,7 @@ class UploadDocView(GenericView, DocHandler): | ... | @@ -551,6 +561,7 @@ class UploadDocView(GenericView, DocHandler): |
551 | # authentication_classes = [] | 561 | # authentication_classes = [] |
552 | permission_classes = [IsAuthenticated] | 562 | permission_classes = [IsAuthenticated] |
553 | authentication_classes = [OAuth2AuthenticationWithUser] | 563 | authentication_classes = [OAuth2AuthenticationWithUser] |
564 | |||
554 | # required_scopes = ['write'] | 565 | # required_scopes = ['write'] |
555 | 566 | ||
556 | # 上传(接收)文件接口 | 567 | # 上传(接收)文件接口 |
... | @@ -563,6 +574,8 @@ class UploadDocView(GenericView, DocHandler): | ... | @@ -563,6 +574,8 @@ class UploadDocView(GenericView, DocHandler): |
563 | document = args.get('document') | 574 | document = args.get('document') |
564 | business_type = document.get('businessType') | 575 | business_type = document.get('businessType') |
565 | application_id = application_data.get('applicationId') | 576 | application_id = application_data.get('applicationId') |
577 | # 包含FSM 激活状态 | ||
578 | application_status = application_data.get('applicationStatus', '') | ||
566 | document_scheme = document.get('documentScheme') | 579 | document_scheme = document.get('documentScheme') |
567 | data_source = document.get('dataSource') | 580 | data_source = document.get('dataSource') |
568 | document_name = document.get('documentName', '') | 581 | document_name = document.get('documentName', '') |
... | @@ -571,6 +584,34 @@ class UploadDocView(GenericView, DocHandler): | ... | @@ -571,6 +584,34 @@ class UploadDocView(GenericView, DocHandler): |
571 | data_source = self.fix_data_source(data_source) | 584 | data_source = self.fix_data_source(data_source) |
572 | document_scheme = self.fix_scheme(document_scheme) | 585 | document_scheme = self.fix_scheme(document_scheme) |
573 | 586 | ||
587 | # fsm激活状态, 更新ocr_result 表fsm状态 | ||
588 | self.running_log.info('[doc upload applicationId-{0}] [applicationStatus-{1}, activated-{2}]' | ||
589 | .format(application_id, application_status, | ||
590 | True if consts.FSM_ACTIVITED_STATUS.get(application_status) else False)) | ||
591 | if consts.FSM_ACTIVITED_STATUS.get(application_status): | ||
592 | result_class = None | ||
593 | if business_type == consts.HIL_PREFIX: | ||
594 | if document_scheme == RequestTeam.ACCEPTANCE.name: | ||
595 | result_class = HILOCRResult | ||
596 | elif document_scheme == RequestTeam.SETTLEMENT.name or document_scheme == RequestTeam.INSURANCE.name: | ||
597 | result_class = HILSEOCRResult | ||
598 | elif business_type == consts.AFC_PREFIX: | ||
599 | if document_scheme == RequestTeam.ACCEPTANCE.name: | ||
600 | result_class = AFCOCRResult | ||
601 | elif document_scheme == RequestTeam.SETTLEMENT.name or document_scheme == RequestTeam.INSURANCE.name: | ||
602 | result_class = AFCSEOCRResult | ||
603 | |||
604 | ocr_result_obj = result_class.objects.filter(application_id=application_id).first() | ||
605 | if ocr_result_obj: | ||
606 | ocr_result_obj.fsm_activited = 1 | ||
607 | ocr_result_obj.save() | ||
608 | else: | ||
609 | ocr_result_obj = result_class() | ||
610 | ocr_result_obj.application_id = application_id | ||
611 | ocr_result_obj.fsm_activited = 1 | ||
612 | ocr_result_obj.save() | ||
613 | |||
614 | self.running_log.info('[doc upload applicationId-{0}] [ocr result saved]'.format(application_id)) | ||
574 | if data_source == consts.DATA_SOURCE_LIST[1]: | 615 | if data_source == consts.DATA_SOURCE_LIST[1]: |
575 | if document_name.endswith('-证书.pdf') or document_name.endswith('-证书'): | 616 | if document_name.endswith('-证书.pdf') or document_name.endswith('-证书'): |
576 | self.running_log.info('[doc upload success] [eapp license skip] [args={0}]'.format(args)) | 617 | self.running_log.info('[doc upload success] [eapp license skip] [args={0}]'.format(args)) |
... | @@ -602,13 +643,22 @@ class UploadDocView(GenericView, DocHandler): | ... | @@ -602,13 +643,22 @@ class UploadDocView(GenericView, DocHandler): |
602 | is_zip = False | 643 | is_zip = False |
603 | 644 | ||
604 | classify_1 = 0 | 645 | classify_1 = 0 |
605 | # 电子合同 | 646 | # 电子合同 Econtract or OVP(FSM) |
606 | if data_source == consts.DATA_SOURCE_LIST[-1] and document_scheme == consts.DOC_SCHEME_LIST[1]: | 647 | if data_source == consts.DATA_SOURCE_LIST[2] or data_source == consts.DATA_SOURCE_LIST[3]: |
648 | if document_scheme == consts.DOC_SCHEME_LIST[1]: | ||
607 | for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix): | 649 | for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix): |
608 | if keyword in document_name: | 650 | if keyword in document_name: |
609 | classify_1 = classify_1_tmp | 651 | classify_1 = classify_1_tmp |
610 | break | 652 | break |
611 | elif document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \ | 653 | # FSM合同:WEP/MSI/SC |
654 | elif data_source == consts.DATA_SOURCE_LIST[0] and document_scheme == consts.DOC_SCHEME_LIST[0]: | ||
655 | for keyword, classify_1_tmp in consts.FSM_ECONTRACT_KEYWORDS_MAP.get(prefix): | ||
656 | if keyword in document_name: | ||
657 | classify_1 = classify_1_tmp | ||
658 | break | ||
659 | |||
660 | |||
661 | if document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \ | ||
612 | or document_name.endswith('.RAR'): | 662 | or document_name.endswith('.RAR'): |
613 | is_zip = True | 663 | is_zip = True |
614 | 664 | ||
... | @@ -809,6 +859,9 @@ class CompareView(GenericView): | ... | @@ -809,6 +859,9 @@ class CompareView(GenericView): |
809 | ''' | 859 | ''' |
810 | 860 | ||
811 | 861 | ||
862 | pre_fsm_url = conf.PRE_FSM_URL | ||
863 | |||
864 | |||
812 | class SECompareView(GenericView, PreSEHandler): | 865 | class SECompareView(GenericView, PreSEHandler): |
813 | permission_classes = [IsAuthenticated] | 866 | permission_classes = [IsAuthenticated] |
814 | authentication_classes = [OAuth2AuthenticationWithUser] | 867 | authentication_classes = [OAuth2AuthenticationWithUser] |
... | @@ -829,7 +882,52 @@ class SECompareView(GenericView, PreSEHandler): | ... | @@ -829,7 +882,52 @@ class SECompareView(GenericView, PreSEHandler): |
829 | fsm_flag = content.get('fsmFlag', False) | 882 | fsm_flag = content.get('fsmFlag', False) |
830 | fsm_special_car = content.get('fsmSpecialCar', False) | 883 | fsm_special_car = content.get('fsmSpecialCar', False) |
831 | fsm_best_price = content.get('fsmBestPrice', False) | 884 | fsm_best_price = content.get('fsmBestPrice', False) |
885 | fsm_landing_dealer = content.get('fsmLandingDealer') | ||
832 | 886 | ||
887 | if fsm_special_car: | ||
888 | compare_result = { | ||
889 | "is_pass": False, | ||
890 | "particulars": [{ | ||
891 | "object_name": "", | ||
892 | "fields": [{ | ||
893 | "input": "", | ||
894 | "ocr": "", | ||
895 | "field_is_pass": False, | ||
896 | "comments": "此申请为FSM 特殊申请,暂不支持预放款流程" | ||
897 | }] | ||
898 | }] | ||
899 | } | ||
900 | elif fsm_best_price: | ||
901 | compare_result = { | ||
902 | "is_pass": False, | ||
903 | "particulars": [{ | ||
904 | "object_name": "", | ||
905 | "fields": [{ | ||
906 | "input": "", | ||
907 | "ocr": "", | ||
908 | "field_is_pass": False, | ||
909 | "comments": "此申请为FSM 特殊申请,暂不支持预放款流程" | ||
910 | }] | ||
911 | }] | ||
912 | } | ||
913 | elif fsm_flag and (not fsm_special_car or not fsm_best_price): | ||
914 | # 调用Java pre fsm接口 | ||
915 | try: | ||
916 | self.running_log.info("{0} request java pre fsm api, url:{1}, body:{2}".format(log_base, pre_fsm_url, json.dumps(content))) | ||
917 | headers = { | ||
918 | 'Content-Type': 'application/json' | ||
919 | } | ||
920 | resp = requests.post(pre_fsm_url, headers=headers, json=content) | ||
921 | self.running_log.info("{0} response from java pre fsm api, resp:{1}".format(log_base, resp.text)) | ||
922 | result = json.loads(resp.text) | ||
923 | compare_result = result.get("result") | ||
924 | if not compare_result: | ||
925 | compare_result = get_empty_result() | ||
926 | except Exception as e: | ||
927 | self.running_log.error("{0} pre fsm request to java error, url:{1}, param:{2}, errorMsg:{3}".format( | ||
928 | log_base, pre_fsm_url, json.dumps(content), traceback.format_exc())) | ||
929 | compare_result = get_empty_result() | ||
930 | elif not fsm_flag: | ||
833 | # 存库, 用于银行卡比对 | 931 | # 存库, 用于银行卡比对 |
834 | try: | 932 | try: |
835 | bank_class = HILbankVerification if business_type in consts.HIL_SET else AFCbankVerification | 933 | bank_class = HILbankVerification if business_type in consts.HIL_SET else AFCbankVerification |
... | @@ -853,7 +951,8 @@ class SECompareView(GenericView, PreSEHandler): | ... | @@ -853,7 +951,8 @@ class SECompareView(GenericView, PreSEHandler): |
853 | # preSettlement比对 | 951 | # preSettlement比对 |
854 | compare_result = self.pre_compare_entrance(content) | 952 | compare_result = self.pre_compare_entrance(content) |
855 | self.running_log.info('{0} [prese completed] [applicationEntity={1}] [application_id={2}] [uniq_seq={3}] ' | 953 | self.running_log.info('{0} [prese completed] [applicationEntity={1}] [application_id={2}] [uniq_seq={3}] ' |
856 | '[result={4}]'.format(log_base, business_type, application_id, uniq_seq, compare_result)) | 954 | '[result={4}]'.format(log_base, business_type, application_id, uniq_seq, |
955 | compare_result)) | ||
857 | 956 | ||
858 | try: | 957 | try: |
859 | end_time = time.time() | 958 | end_time = time.time() |
... | @@ -956,10 +1055,10 @@ class DocView(GenericView, DocHandler): | ... | @@ -956,10 +1055,10 @@ class DocView(GenericView, DocHandler): |
956 | application_id_query = Q(application_id__contains=application_id) if application_id is not None else Q() | 1055 | application_id_query = Q(application_id__contains=application_id) if application_id is not None else Q() |
957 | data_source_query = Q(data_source=data_source) if data_source is not None else Q() | 1056 | data_source_query = Q(data_source=data_source) if data_source is not None else Q() |
958 | upload_finish_time_query = Q(upload_finish_time__gte=upload_time_start, | 1057 | upload_finish_time_query = Q(upload_finish_time__gte=upload_time_start, |
959 | upload_finish_time__lt=upload_time_end + datetime.timedelta(days=1))\ | 1058 | upload_finish_time__lt=upload_time_end + datetime.timedelta(days=1)) \ |
960 | if upload_time_start is not None and upload_time_end is not None else Q() | 1059 | if upload_time_start is not None and upload_time_end is not None else Q() |
961 | create_time_query = Q(create_time__gte=create_time_start, | 1060 | create_time_query = Q(create_time__gte=create_time_start, |
962 | create_time__lt=create_time_end + datetime.timedelta(days=1))\ | 1061 | create_time__lt=create_time_end + datetime.timedelta(days=1)) \ |
963 | if create_time_start is not None and create_time_end is not None else Q() | 1062 | if create_time_start is not None and create_time_end is not None else Q() |
964 | query = application_id_query & status_query & data_source_query & upload_finish_time_query & create_time_query | 1063 | query = application_id_query & status_query & data_source_query & upload_finish_time_query & create_time_query |
965 | val_tuple = ('id', 'application_id', 'upload_finish_time', 'create_time', 'document_scheme', 'data_source', | 1064 | val_tuple = ('id', 'application_id', 'upload_finish_time', 'create_time', 'document_scheme', 'data_source', |
... | @@ -971,10 +1070,11 @@ class DocView(GenericView, DocHandler): | ... | @@ -971,10 +1070,11 @@ class DocView(GenericView, DocHandler): |
971 | if start_index >= total > 0: | 1070 | if start_index >= total > 0: |
972 | raise self.invalid_params('页数不存在') | 1071 | raise self.invalid_params('页数不存在') |
973 | 1072 | ||
974 | doc_queryset = doc_class.objects.filter(query).values(*val_tuple).order_by('-create_time')[start_index: end_index] | 1073 | doc_queryset = doc_class.objects.filter(query).values(*val_tuple).order_by('-create_time')[ |
1074 | start_index: end_index] | ||
975 | # doc_list = self.get_doc_list(doc_queryset, prefix) | 1075 | # doc_list = self.get_doc_list(doc_queryset, prefix) |
976 | for doc_dict in doc_queryset: | 1076 | for doc_dict in doc_queryset: |
977 | tmp_scheme = consts.COMPARE_DOC_SCHEME_LIST[0] if doc_dict['document_scheme'] == consts.DOC_SCHEME_LIST[0]\ | 1077 | tmp_scheme = consts.COMPARE_DOC_SCHEME_LIST[0] if doc_dict['document_scheme'] == consts.DOC_SCHEME_LIST[0] \ |
978 | else consts.COMPARE_DOC_SCHEME_LIST[1] | 1078 | else consts.COMPARE_DOC_SCHEME_LIST[1] |
979 | application_link = '{0}/showList/showList?entity={1}&scheme={2}&case_id={3}'.format( | 1079 | application_link = '{0}/showList/showList?entity={1}&scheme={2}&case_id={3}'.format( |
980 | conf.BASE_URL, prefix, tmp_scheme, doc_dict['application_id']) | 1080 | conf.BASE_URL, prefix, tmp_scheme, doc_dict['application_id']) |
... | @@ -1021,7 +1121,6 @@ class DocView(GenericView, DocHandler): | ... | @@ -1021,7 +1121,6 @@ class DocView(GenericView, DocHandler): |
1021 | # os.remove(tmp_save_path) | 1121 | # os.remove(tmp_save_path) |
1022 | # raise self.invalid_params(msg='invalid params: PDF file XSS') | 1122 | # raise self.invalid_params(msg='invalid params: PDF file XSS') |
1023 | 1123 | ||
1024 | |||
1025 | file.close() | 1124 | file.close() |
1026 | # 1. 上传信息记录 | 1125 | # 1. 上传信息记录 |
1027 | application_id = '{0}{1}'.format(consts.FIXED_APPLICATION_ID_PREFIX, metadata_version_id) | 1126 | application_id = '{0}{1}'.format(consts.FIXED_APPLICATION_ID_PREFIX, metadata_version_id) |
... | @@ -1104,7 +1203,8 @@ class CompareResultView(GenericView): | ... | @@ -1104,7 +1203,8 @@ class CompareResultView(GenericView): |
1104 | latest_compared_time = '' | 1203 | latest_compared_time = '' |
1105 | else: | 1204 | else: |
1106 | whole_result = consts.RESULT_Y if result_obj.ocr_auto_result_pass else consts.RESULT_N | 1205 | whole_result = consts.RESULT_Y if result_obj.ocr_auto_result_pass else consts.RESULT_N |
1107 | latest_compared_time = '' if result_obj.ocr_latest_comparison_time is None else result_obj.ocr_latest_comparison_time.strftime('%Y-%m-%d %H:%M') | 1206 | latest_compared_time = '' if result_obj.ocr_latest_comparison_time is None else result_obj.ocr_latest_comparison_time.strftime( |
1207 | '%Y-%m-%d %H:%M') | ||
1108 | 1208 | ||
1109 | source = consts.INFO_SOURCE[1] | 1209 | source = consts.INFO_SOURCE[1] |
1110 | version = comments = '' | 1210 | version = comments = '' |
... | @@ -1120,7 +1220,8 @@ class CompareResultView(GenericView): | ... | @@ -1120,7 +1220,8 @@ class CompareResultView(GenericView): |
1120 | 'source': source, | 1220 | 'source': source, |
1121 | 'version': version, | 1221 | 'version': version, |
1122 | 'comments': comments, | 1222 | 'comments': comments, |
1123 | 'result': [] if result_obj is None or not result_obj.ocr_auto_result else json.loads(result_obj.ocr_auto_result) | 1223 | 'result': [] if result_obj is None or not result_obj.ocr_auto_result else json.loads( |
1224 | result_obj.ocr_auto_result) | ||
1124 | } | 1225 | } |
1125 | 1226 | ||
1126 | return response.ok(data=compare_result) | 1227 | return response.ok(data=compare_result) |
... | @@ -1155,7 +1256,8 @@ class CompareResultView(GenericView): | ... | @@ -1155,7 +1256,8 @@ class CompareResultView(GenericView): |
1155 | 'id': 0 if result_obj is None else result_obj.id, | 1256 | 'id': 0 if result_obj is None else result_obj.id, |
1156 | 'application_id': case_id, | 1257 | 'application_id': case_id, |
1157 | 'entity': entity, | 1258 | 'entity': entity, |
1158 | 'scheme': consts.DOC_SCHEME_LIST[0] if scheme == consts.COMPARE_DOC_SCHEME_LIST[0] else consts.DOC_SCHEME_LIST[1], | 1259 | 'scheme': consts.DOC_SCHEME_LIST[0] if scheme == consts.COMPARE_DOC_SCHEME_LIST[0] else |
1260 | consts.DOC_SCHEME_LIST[1], | ||
1159 | 'whole_result': whole_result, | 1261 | 'whole_result': whole_result, |
1160 | 'latest_compared_time': '' if result_obj is None else result_obj.update_time.strftime('%Y-%m-%d %H:%M'), | 1262 | 'latest_compared_time': '' if result_obj is None else result_obj.update_time.strftime('%Y-%m-%d %H:%M'), |
1161 | 'source': source, | 1263 | 'source': source, |
... | @@ -1328,7 +1430,8 @@ class SECMSView(GenericView): | ... | @@ -1328,7 +1430,8 @@ class SECMSView(GenericView): |
1328 | 1430 | ||
1329 | args = request.data | 1431 | args = request.data |
1330 | cms_info = args.get('content', {}) | 1432 | cms_info = args.get('content', {}) |
1331 | business_type = consts.AFC_PREFIX if cms_info.get('financeCompany', '').startswith('宝马') else consts.HIL_PREFIX | 1433 | business_type = consts.AFC_PREFIX if cms_info.get('financeCompany', '').startswith( |
1434 | '宝马') else consts.HIL_PREFIX | ||
1332 | src_application_id = cms_info.get('settlemnetVerification', {}).get('applicationNo', '') | 1435 | src_application_id = cms_info.get('settlemnetVerification', {}).get('applicationNo', '') |
1333 | application_id = src_application_id[:src_application_id.rfind('-')] | 1436 | application_id = src_application_id[:src_application_id.rfind('-')] |
1334 | 1437 | ||
... | @@ -1363,6 +1466,32 @@ class SECMSView(GenericView): | ... | @@ -1363,6 +1466,32 @@ class SECMSView(GenericView): |
1363 | content=content_str, | 1466 | content=content_str, |
1364 | ) | 1467 | ) |
1365 | 1468 | ||
1469 | # 检查是否fsm流程(SE) | ||
1470 | fsm_contract = cms_info.get('FSMContract', False) | ||
1471 | fsm_best_price = cms_info.get('FSMBestPrice', False) | ||
1472 | if fsm_contract: | ||
1473 | # 记录fsm 流程的cms 提交 | ||
1474 | try: | ||
1475 | cms_status_class = HILCmsStatusInfo if business_type in consts.HIL_SET else AFCCmsStatusInfo | ||
1476 | cms_status_info = cms_status_class.objects.filter(application_id=application_id).first() | ||
1477 | if cms_status_info: | ||
1478 | cms_status_info.is_fsm = 1 | ||
1479 | cms_status_info.update_time = datetime.datetime.now() | ||
1480 | cms_status_info.save() | ||
1481 | else: | ||
1482 | cms_status_info = cms_status_class() | ||
1483 | cms_status_info.application_id = application_id | ||
1484 | cms_status_info.business_type = business_type | ||
1485 | cms_status_info.is_fsm = 1 | ||
1486 | cms_status_info.update_time = datetime.datetime.now() | ||
1487 | cms_status_info.create_time = datetime.datetime.now() | ||
1488 | cms_status_info.save() | ||
1489 | except Exception as e: | ||
1490 | self.exception_log.exception( | ||
1491 | '[cms view] [cms_status_info db save failed] [error={0}]'.format(traceback.format_exc())) | ||
1492 | fsm_compare.apply_async((application_id, business_type, None, None, False, True), | ||
1493 | queue='queue_compare') | ||
1494 | else: | ||
1366 | # 触发比对 | 1495 | # 触发比对 |
1367 | compare.apply_async((application_id, business_type, None, None, False, True), | 1496 | compare.apply_async((application_id, business_type, None, None, False, True), |
1368 | queue='queue_compare') | 1497 | queue='queue_compare') |
... | @@ -1458,7 +1587,7 @@ class AutoSettlementView(GenericView): | ... | @@ -1458,7 +1587,7 @@ class AutoSettlementView(GenericView): |
1458 | whole_result_query = Q(ocr_whole_result_pass=whole_result) if not isinstance(whole_result, str) else Q() | 1587 | whole_result_query = Q(ocr_whole_result_pass=whole_result) if not isinstance(whole_result, str) else Q() |
1459 | rpa_result_query = Q(rpa_result=rpa_result) if not isinstance(rpa_result, str) else Q() | 1588 | rpa_result_query = Q(rpa_result=rpa_result) if not isinstance(rpa_result, str) else Q() |
1460 | time1_query = Q(rpa_get_case_from_ocr_time__gte=get_case_from_ocr_time_start, | 1589 | time1_query = Q(rpa_get_case_from_ocr_time__gte=get_case_from_ocr_time_start, |
1461 | rpa_get_case_from_ocr_time__lt=get_case_from_ocr_time_end + datetime.timedelta(days=1))\ | 1590 | rpa_get_case_from_ocr_time__lt=get_case_from_ocr_time_end + datetime.timedelta(days=1)) \ |
1462 | if get_case_from_ocr_time_start is not None and get_case_from_ocr_time_end is not None else Q() | 1591 | if get_case_from_ocr_time_start is not None and get_case_from_ocr_time_end is not None else Q() |
1463 | time2_query = Q(rpa_activated_time__gte=activated_time_start, | 1592 | time2_query = Q(rpa_activated_time__gte=activated_time_start, |
1464 | rpa_activated_time__lt=activated_time_end + datetime.timedelta(days=1)) \ | 1593 | rpa_activated_time__lt=activated_time_end + datetime.timedelta(days=1)) \ | ... | ... |
... | @@ -7,6 +7,9 @@ import traceback | ... | @@ -7,6 +7,9 @@ import traceback |
7 | import numpy as np | 7 | import numpy as np |
8 | from datetime import datetime, timedelta | 8 | from datetime import datetime, timedelta |
9 | from collections import OrderedDict | 9 | from collections import OrderedDict |
10 | |||
11 | import requests | ||
12 | |||
10 | from . import app | 13 | from . import app |
11 | from settings import conf | 14 | from settings import conf |
12 | from apps.doc.models import ( | 15 | from apps.doc.models import ( |
... | @@ -3266,6 +3269,33 @@ def se_compare(application_id, application_entity, ocr_res_id, last_obj, ocr_res | ... | @@ -3266,6 +3269,33 @@ def se_compare(application_id, application_entity, ocr_res_id, last_obj, ocr_res |
3266 | 3269 | ||
3267 | 3270 | ||
3268 | @app.task | 3271 | @app.task |
3272 | def fsm_compare(application_id, application_entity, uniq_seq, ocr_res_id, is_ca=True, is_cms=False): | ||
3273 | compare_log.info('{0} [receive fsm task] [entity={1}] [id={2}] [uniq_seq={3}] [ocr_res_id={4}] [is_ca={5}] ' | ||
3274 | '[is_cms={6}]'.format(log_base, application_entity, application_id, uniq_seq, ocr_res_id, | ||
3275 | is_ca, is_cms)) | ||
3276 | # 调用java fsm 比对流程接口(http) | ||
3277 | # 调用Java fsm 比对流程接口, fsm 是se流程, ca可以暂时忽略 | ||
3278 | url = conf.FSM_URL | ||
3279 | body = { | ||
3280 | 'applicationId': application_id, | ||
3281 | 'businessType': application_entity, | ||
3282 | 'ocrResId': ocr_res_id, | ||
3283 | 'isCa': is_ca, | ||
3284 | 'isCms': is_cms | ||
3285 | } | ||
3286 | try: | ||
3287 | compare_log.info("request java fsm api, url:{0}, body:{1}".format(url, json.dumps(body))) | ||
3288 | headers = { | ||
3289 | 'Content-Type': 'application/json' | ||
3290 | } | ||
3291 | resp = requests.post(url, headers=headers, json=body) | ||
3292 | compare_log.info("response from fsm api, resp:{0}".format(resp.text)) | ||
3293 | except Exception as e: | ||
3294 | compare_log.error("fsm full request to java error, url:{0}, param:{1}, errorMsg:{2}".format( | ||
3295 | url, json.dumps(body), traceback.format_exc())) | ||
3296 | |||
3297 | |||
3298 | @app.task | ||
3269 | def compare(application_id, application_entity, uniq_seq, ocr_res_id, is_ca=True, is_cms=False): | 3299 | def compare(application_id, application_entity, uniq_seq, ocr_res_id, is_ca=True, is_cms=False): |
3270 | # POS: application_id, application_entity, uniq_seq, None | 3300 | # POS: application_id, application_entity, uniq_seq, None |
3271 | # OCR: application_id, business_type(application_entity), None, ocr_res_id | 3301 | # OCR: application_id, business_type(application_entity), None, ocr_res_id | ... | ... |
... | @@ -6,6 +6,7 @@ | ... | @@ -6,6 +6,7 @@ |
6 | # @Description : | 6 | # @Description : |
7 | 7 | ||
8 | from .get_char import Finder | 8 | from .get_char import Finder |
9 | from .get_char_fsm import Finder as FSMFinder | ||
9 | import numpy as np | 10 | import numpy as np |
10 | 11 | ||
11 | 12 | ||
... | @@ -23,7 +24,7 @@ def extract_info(ocr_results): | ... | @@ -23,7 +24,7 @@ def extract_info(ocr_results): |
23 | return {'page_1': {'合同编号': contract_no}} | 24 | return {'page_1': {'合同编号': contract_no}} |
24 | 25 | ||
25 | 26 | ||
26 | def predict(pdf_info, is_qrs=False): | 27 | def predict(pdf_info, is_qrs=False, is_fsm=False): |
27 | ocr_results = {} | 28 | ocr_results = {} |
28 | for pno in pdf_info: | 29 | for pno in pdf_info: |
29 | ocr_results[pno] = {} | 30 | ocr_results[pno] = {} |
... | @@ -50,6 +51,9 @@ def predict(pdf_info, is_qrs=False): | ... | @@ -50,6 +51,9 @@ def predict(pdf_info, is_qrs=False): |
50 | results = extract_info(ocr_results) | 51 | results = extract_info(ocr_results) |
51 | else: | 52 | else: |
52 | # 输入是整个 PDF 中的信息 | 53 | # 输入是整个 PDF 中的信息 |
54 | if is_fsm: | ||
55 | f = FSMFinder(pdf_info, ocr_results=ocr_results) | ||
56 | else: | ||
53 | f = Finder(pdf_info, ocr_results=ocr_results) | 57 | f = Finder(pdf_info, ocr_results=ocr_results) |
54 | results = f.get_info() | 58 | results = f.get_info() |
55 | return results | 59 | return results | ... | ... |
1 | import re | ||
2 | import numpy as np | ||
3 | from fuzzywuzzy import fuzz | ||
4 | from shapely.geometry import Polygon | ||
5 | |||
6 | |||
7 | class Finder: | ||
8 | |||
9 | def __init__(self, pdf_info, ocr_results): | ||
10 | self.pdf_info = pdf_info | ||
11 | self.ocr_results = ocr_results | ||
12 | self.is_asp = False | ||
13 | self.item = {"words": None, | ||
14 | "position": None, | ||
15 | } | ||
16 | |||
17 | def gen_init_result(self, is_asp): | ||
18 | # 格式化算法输出 | ||
19 | self.init_result = {"page_1": {"合同编号": self.item, | ||
20 | "所购车辆价格": self.item, | ||
21 | "车架号": self.item, | ||
22 | "贷款本金金额": {"大写": self.item, | ||
23 | "小写": self.item, | ||
24 | "车辆贷款本金金额": self.item, | ||
25 | "附加产品融资贷款本金总金额": self.item, | ||
26 | }, | ||
27 | "贷款期限": self.item, | ||
28 | "附加产品融资贷款本金总金额明细": self.item, | ||
29 | "借款人签字及时间": self.item, | ||
30 | }, | ||
31 | "page_2": {"合同编号": self.item, | ||
32 | "借款人及抵押人": {"name": self.item, | ||
33 | "id": self.item, | ||
34 | }, | ||
35 | "共同借款人及共同抵押人": {"name": self.item, | ||
36 | "id": self.item, | ||
37 | }, | ||
38 | "保证人1": {"name": self.item, | ||
39 | "id": self.item, | ||
40 | }, | ||
41 | "保证人2": {"name": self.item, | ||
42 | "id": self.item, | ||
43 | }, | ||
44 | "所购车辆价格": self.item, | ||
45 | "车架号": self.item, | ||
46 | "经销商": self.item, | ||
47 | "贷款本金金额": {"大写": self.item, | ||
48 | "小写": self.item, | ||
49 | "车辆贷款本金金额": self.item, | ||
50 | "附加产品融资贷款本金总金额": self.item, | ||
51 | }, | ||
52 | "贷款期限": self.item, | ||
53 | "标准利率": self.item, | ||
54 | "借款人收款账户": {"账号": self.item, | ||
55 | "户名": self.item, | ||
56 | "开户行": self.item, | ||
57 | }, | ||
58 | "还款账户": {"账号": self.item, | ||
59 | "户名": self.item, | ||
60 | "开户行": self.item, | ||
61 | }, | ||
62 | }, | ||
63 | "page_3": {"合同编号": self.item, | ||
64 | "还款计划表": self.item, | ||
65 | "车辆代理商": self.item, | ||
66 | }, | ||
67 | "page_4": {"合同编号": self.item, | ||
68 | "附加产品融资贷款本金总金额明细": self.item, | ||
69 | }, | ||
70 | "page_5": {"合同编号": self.item, | ||
71 | }, | ||
72 | "page_6": {"合同编号": self.item, | ||
73 | }, | ||
74 | } | ||
75 | if self.is_asp: | ||
76 | self.init_result["page_7"] = {"合同编号": self.item, | ||
77 | } | ||
78 | self.init_result["page_8"] = {"合同编号": self.item, | ||
79 | "主借人签字": {"签字": self.item, | ||
80 | "日期": self.item, | ||
81 | }, | ||
82 | "共借人签字": {"签字": self.item, | ||
83 | "日期": self.item, | ||
84 | }, | ||
85 | "保证人1签字": {"签字": self.item, | ||
86 | "日期": self.item, | ||
87 | }, | ||
88 | "保证人2签字": {"签字": self.item, | ||
89 | "日期": self.item, | ||
90 | }, | ||
91 | "见证人签字": {"签字": self.item, | ||
92 | "日期": self.item, | ||
93 | }, | ||
94 | } | ||
95 | else: | ||
96 | self.init_result["page_7"] = {"合同编号": self.item, | ||
97 | "主借人签字": {"签字": self.item, | ||
98 | "日期": self.item, | ||
99 | }, | ||
100 | "共借人签字": {"签字": self.item, | ||
101 | "日期": self.item, | ||
102 | }, | ||
103 | "保证人1签字": {"签字": self.item, | ||
104 | "日期": self.item, | ||
105 | }, | ||
106 | "保证人2签字": {"签字": self.item, | ||
107 | "日期": self.item, | ||
108 | }, | ||
109 | "见证人签字": {"签字": self.item, | ||
110 | "日期": self.item, | ||
111 | }, | ||
112 | } | ||
113 | |||
114 | |||
115 | def get_top_iou(self, poly, ocr_result): | ||
116 | """传入一个多边形, 找到与之最匹配的多边形 | ||
117 | |||
118 | Args: | ||
119 | poly (TYPE): Description | ||
120 | """ | ||
121 | iou_list = [] | ||
122 | for key in ocr_result: | ||
123 | bbox, text = ocr_result[key] | ||
124 | g = Polygon(np.array(bbox).reshape((-1, 2))) | ||
125 | p = Polygon(np.array(poly).reshape((-1, 2))) | ||
126 | if not g.is_valid or not p.is_valid: | ||
127 | continue | ||
128 | inter = Polygon(g).intersection(Polygon(p)).area | ||
129 | union = g.area + p.area - inter | ||
130 | iou = inter/union | ||
131 | iou_list.append([iou, key]) | ||
132 | if len(iou_list) == 0: | ||
133 | return -1, -1 | ||
134 | top_iou = sorted(iou_list, key=lambda x: x[0])[-1] | ||
135 | return top_iou | ||
136 | |||
137 | def poly_to_rectangle(self, poly): | ||
138 | xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax = poly | ||
139 | bbox = [xmin, ymin, xmax, ymax] | ||
140 | return bbox | ||
141 | |||
142 | def get_contract_no(self, page_num): | ||
143 | """传入页码,查看该页码右上角的编号 | ||
144 | |||
145 | Args: | ||
146 | page_num (string): | ||
147 | |||
148 | Returns: | ||
149 | sting: | ||
150 | """ | ||
151 | contract_no = self.item.copy() | ||
152 | # contract_no['words'] = '' | ||
153 | # contract_no['position'] = [-1, -1, -1, -1] | ||
154 | # 只看第一页 | ||
155 | for key in self.ocr_results[page_num]: | ||
156 | bbox, text = self.ocr_results[page_num][key] | ||
157 | if '合同编号:' in text: | ||
158 | words = text.split(':')[-1] | ||
159 | location = self.poly_to_rectangle(bbox) | ||
160 | contract_no['words'] = words | ||
161 | contract_no['position'] = location | ||
162 | return contract_no | ||
163 | |||
164 | def get_vehicle_price(self, page_num='0'): | ||
165 | vehicle_price = self.item.copy() | ||
166 | # vehicle_price['words'] = '' | ||
167 | # vehicle_price['position'] = [-1, -1, -1, -1] | ||
168 | for key in self.ocr_results[page_num]: | ||
169 | bbox, text = self.ocr_results[page_num][key] | ||
170 | if '所购车辆价格为人民币' in text: | ||
171 | words = text.split('币')[-1] | ||
172 | location = self.poly_to_rectangle(bbox) | ||
173 | vehicle_price['words'] = words | ||
174 | vehicle_price['position'] = location | ||
175 | return vehicle_price | ||
176 | |||
177 | def get_vin(self, page_num='0'): | ||
178 | vin = self.item.copy() | ||
179 | # vin['words'] = '' | ||
180 | # vin['position'] = [-1, -1, -1, -1] | ||
181 | for key in self.ocr_results[page_num]: | ||
182 | bbox, text = self.ocr_results[page_num][key] | ||
183 | if '车架号:' in text: | ||
184 | words = text.split(':')[-1] | ||
185 | location = self.poly_to_rectangle(bbox) | ||
186 | vin['words'] = words | ||
187 | vin['position'] = location | ||
188 | return vin | ||
189 | |||
190 | def get_loan_principal(self, page_num='0'): | ||
191 | chinese_keywords = ['壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖', '拾', | ||
192 | '佰', '仟', '万', '亿', '元', '角', '分', '零', '整'] | ||
193 | upper = self.item.copy() | ||
194 | lower = self.item.copy() | ||
195 | asp_1 = self.item.copy() | ||
196 | asp_2 = self.item.copy() | ||
197 | anchor_bbox = None | ||
198 | for block in self.pdf_info[page_num]['blocks']: | ||
199 | if block['type'] != 0: | ||
200 | continue | ||
201 | for line in block['lines']: | ||
202 | for span in line['spans']: | ||
203 | bbox, text = span['bbox'], span['text'] | ||
204 | if fuzz.ratio(''.join(chinese_keywords), text) > 15: | ||
205 | text = text.split(':')[-1].strip() | ||
206 | upper['position'] = bbox | ||
207 | upper['words'] = text | ||
208 | if '小写:¥' in text: | ||
209 | words = text.split('¥')[-1].strip() | ||
210 | lower['position'] = bbox | ||
211 | lower['words'] = words | ||
212 | if '附加产品融资贷款本金总金额' == text: | ||
213 | anchor_bbox = bbox | ||
214 | if anchor_bbox: | ||
215 | for block in self.pdf_info[page_num]['blocks']: | ||
216 | if block['type'] != 0: | ||
217 | continue | ||
218 | for line in block['lines']: | ||
219 | for span in line['spans']: | ||
220 | bbox, text = span['bbox'], span['text'] | ||
221 | if np.mean(bbox[1::2]) < np.mean(anchor_bbox[1::2]) and '人民币:小写:' in text: | ||
222 | words = re.findall(r'人民币:小写:\[(.*)\]', text)[0] | ||
223 | asp_1['position'] = bbox | ||
224 | asp_1['words'] = words | ||
225 | if np.mean(bbox[1::2]) > np.mean(anchor_bbox[1::2]) and '人民币:小写:' in text: | ||
226 | words = re.findall(r'人民币:小写:\[(.*)\]', text)[0] | ||
227 | asp_2['position'] = bbox | ||
228 | asp_2['words'] = words | ||
229 | return upper, lower, asp_1, asp_2 | ||
230 | |||
231 | def get_loan_term(self, page_num='0'): | ||
232 | loan_term = self.item.copy() | ||
233 | all_text = '' | ||
234 | for block in self.pdf_info[page_num]['blocks']: | ||
235 | if block['type'] != 0: | ||
236 | continue | ||
237 | for line in block['lines']: | ||
238 | for span in line['spans']: | ||
239 | bbox, text = span['bbox'], span['text'] | ||
240 | all_text += text | ||
241 | matchs = re.search(r'贷款期限(\d+)个月', all_text) | ||
242 | if matchs: | ||
243 | words = matchs.group(1) | ||
244 | for block in self.pdf_info[page_num]['blocks']: | ||
245 | if block['type'] != 0: | ||
246 | continue | ||
247 | for line in block['lines']: | ||
248 | for span in line['spans']: | ||
249 | bbox, text = span['bbox'], span['text'] | ||
250 | if f'{words}个月' in text: | ||
251 | loan_term['position'] = bbox | ||
252 | loan_term['words'] = words | ||
253 | return loan_term | ||
254 | |||
255 | def get_standard_rate(self, page_num='0'): | ||
256 | standard_rate = self.item.copy() | ||
257 | for block in self.pdf_info[page_num]['blocks']: | ||
258 | if block['type'] != 0: | ||
259 | continue | ||
260 | for line in block['lines']: | ||
261 | for span in line['spans']: | ||
262 | bbox, text = span['bbox'], span['text'] | ||
263 | matchs = re.search(r'本合同当期的标准利率为(\S+)%/年', text) | ||
264 | if matchs: | ||
265 | standard_rate['position'] = bbox | ||
266 | standard_rate['words'] = matchs.group(1) | ||
267 | return standard_rate | ||
268 | |||
269 | def mergelist(self, text_list): | ||
270 | pattern = re.compile("[^\u4e00-\u9fa5]") # 匹配不是中文的其他字符 | ||
271 | mergeindex = -1 | ||
272 | for index, i in enumerate(text_list): | ||
273 | if '所购' in i and len(pattern.sub('', pattern.sub('', text_list[index+1]))) != 0: | ||
274 | # if '所购' in i and '.00' not in text_list[index+1]: | ||
275 | mergeindex = index | ||
276 | if mergeindex == -1: | ||
277 | return text_list | ||
278 | else: | ||
279 | new_text_list = text_list[:mergeindex] + [text_list[mergeindex] + text_list[mergeindex+1]] + text_list[mergeindex+2:] | ||
280 | return self.mergelist(new_text_list) | ||
281 | |||
282 | def get_asp_details(self, page_num): | ||
283 | asp_details_table_term = self.item.copy() | ||
284 | |||
285 | asp_details_table = [['附加产品融资贷款本金总金额及贷款利率明细'], ['项目1', '用途总金额2', '贷款本金3']] | ||
286 | |||
287 | bbox_xm = None | ||
288 | bbox_ytzje = None | ||
289 | bbox_dkbj = None | ||
290 | bbox_total = None | ||
291 | for key in self.ocr_results[page_num]: | ||
292 | bbox, text = self.ocr_results[page_num][key] | ||
293 | if text == '项目1': | ||
294 | bbox_xm = bbox | ||
295 | if text == '用途总金额2': | ||
296 | bbox_ytzje = bbox | ||
297 | if text == '贷款本金3': | ||
298 | bbox_dkbj = bbox | ||
299 | if text in ['附加产品融资贷款本', '附加产品融资贷款本金', '附加产品融资贷']: | ||
300 | bbox_total = bbox | ||
301 | |||
302 | if bbox_xm: | ||
303 | for i in range(10): | ||
304 | rh = abs(bbox_xm[1]-bbox_xm[-1]) | ||
305 | anchor = np.array(bbox_xm).reshape((-1 ,2)) | ||
306 | anchor[:, 1] += int(rh*1.4) | ||
307 | _iou, _key = self.get_top_iou(poly=anchor, ocr_result=self.ocr_results[page_num]) | ||
308 | if _iou > 0: | ||
309 | bbox, xm_text = self.ocr_results[page_num][_key] | ||
310 | bbox_xm = bbox | ||
311 | # 解决项目内容是两行的问题 | ||
312 | if not '所购' in xm_text: | ||
313 | line = asp_details_table[-1] | ||
314 | line[0] += xm_text | ||
315 | asp_details_table[-1] = line | ||
316 | continue | ||
317 | # print(xm_text) | ||
318 | anchor_1 = [bbox_ytzje[0], bbox[1], bbox_ytzje[2], bbox[3], | ||
319 | bbox_ytzje[4], bbox[5], bbox_ytzje[6], bbox[7]] | ||
320 | _iou, _key = self.get_top_iou(poly=anchor_1, ocr_result=self.ocr_results[page_num]) | ||
321 | bbox, ytzje_text = self.ocr_results[page_num][_key] | ||
322 | # print(ytzje_text) | ||
323 | anchor_2 = [bbox_dkbj[0], bbox[1], bbox_dkbj[2], bbox[3], | ||
324 | bbox_dkbj[4], bbox[5], bbox_dkbj[6], bbox[7]] | ||
325 | _iou, _key = self.get_top_iou(poly=anchor_2, ocr_result=self.ocr_results[page_num]) | ||
326 | bbox, dkbj_text = self.ocr_results[page_num][_key] | ||
327 | # print(dkbj_text) | ||
328 | if xm_text == ytzje_text: | ||
329 | xm_text, ytzje_text = xm_text.split(' ') | ||
330 | line = [xm_text, ytzje_text, dkbj_text] | ||
331 | asp_details_table.append(line) | ||
332 | else: | ||
333 | break | ||
334 | |||
335 | if bbox_total: | ||
336 | anchor = [bbox_dkbj[0], bbox_total[1], bbox_dkbj[2], bbox_total[3], | ||
337 | bbox_dkbj[4], bbox_total[5], bbox_dkbj[6], bbox_total[7]] | ||
338 | _iou, _key = self.get_top_iou(poly=anchor, ocr_result=self.ocr_results[page_num]) | ||
339 | bbox, total_text = self.ocr_results[page_num][_key] | ||
340 | asp_details_table.append(['附加产品融资贷款本金总金额:', '', total_text]) | ||
341 | asp_details_table_term['words'] = asp_details_table | ||
342 | |||
343 | return asp_details_table_term | ||
344 | |||
345 | def get_signature(self): | ||
346 | signature = self.item.copy() | ||
347 | |||
348 | for block in self.pdf_info['0']['blocks']: | ||
349 | if block['type'] != 0: | ||
350 | continue | ||
351 | for line in block['lines']: | ||
352 | for span in line['spans']: | ||
353 | bbox, text = span['bbox'], span['text'] | ||
354 | if '签署日期' in text: | ||
355 | words = text | ||
356 | signature['words'] = words | ||
357 | signature['position'] = bbox | ||
358 | return signature | ||
359 | |||
360 | def get_somebody(self, top, bottom): | ||
361 | # 指定上下边界后,返回上下边界内的客户信息 | ||
362 | _name = self.item.copy() | ||
363 | _id = self.item.copy() | ||
364 | # 只看第一页,先划定上下边界 | ||
365 | y_top = 0 | ||
366 | y_bottom = 0 | ||
367 | for block in self.pdf_info['1']['blocks']: | ||
368 | if block['type'] != 0: | ||
369 | continue | ||
370 | for line in block['lines']: | ||
371 | for span in line['spans']: | ||
372 | bbox, text = span['bbox'], span['text'] | ||
373 | if top in text: | ||
374 | y_top = bbox[3] | ||
375 | if bottom in text: | ||
376 | y_bottom = bbox[3] | ||
377 | for block in self.pdf_info['1']['blocks']: | ||
378 | if block['type'] != 0: | ||
379 | continue | ||
380 | for line in block['lines']: | ||
381 | for span in line['spans']: | ||
382 | bbox, text = span['bbox'], span['text'] | ||
383 | if y_top < bbox[3] < y_bottom: | ||
384 | # print(top, bottom, text) | ||
385 | if '姓名/名称' in text: | ||
386 | words = text.split(':')[-1] | ||
387 | _name['position'] = bbox | ||
388 | _name['words'] = words | ||
389 | if '自然人身份证件号码/法人执照号码' in text: | ||
390 | words = text.split(':')[-1] | ||
391 | _id['position'] = bbox | ||
392 | _id['words'] = words | ||
393 | return _name, _id | ||
394 | |||
395 | def get_seller(self): | ||
396 | seller = self.item.copy() | ||
397 | # 先找到 key | ||
398 | anchor_bbox = None | ||
399 | for block in self.pdf_info['1']['blocks']: | ||
400 | if block['type'] != 0: | ||
401 | continue | ||
402 | for line in block['lines']: | ||
403 | for span in line['spans']: | ||
404 | bbox, text = span['bbox'], span['text'] | ||
405 | if text in ['经销商', '车辆销售方']: | ||
406 | anchor_bbox = bbox | ||
407 | # 当找到了 key, 则根据 key 去匹配 value | ||
408 | if anchor_bbox: | ||
409 | half_width = self.pdf_info['1']['width'] * 0.5 | ||
410 | for block in self.pdf_info['1']['blocks']: | ||
411 | if block['type'] != 0: | ||
412 | continue | ||
413 | for line in block['lines']: | ||
414 | for span in line['spans']: | ||
415 | bbox, text = span['bbox'], span['text'] | ||
416 | if anchor_bbox[2]<np.mean(bbox[::2])<half_width and \ | ||
417 | anchor_bbox[1]<np.mean(bbox[1::2])<anchor_bbox[3]: | ||
418 | seller['position'] = bbox | ||
419 | seller['words'] = text | ||
420 | return seller | ||
421 | |||
422 | def get_cldls(self): | ||
423 | seller = self.item.copy() | ||
424 | # 先找到 key | ||
425 | anchor_bbox = None | ||
426 | for block in self.pdf_info['2']['blocks']: | ||
427 | if anchor_bbox is not None: | ||
428 | break | ||
429 | if block['type'] != 0: | ||
430 | continue | ||
431 | for line in block['lines']: | ||
432 | if anchor_bbox is not None: | ||
433 | break | ||
434 | for span in line['spans']: | ||
435 | bbox, text = span['bbox'], span['text'] | ||
436 | if text.strip() == '车辆代理商': | ||
437 | anchor_bbox = bbox | ||
438 | # print(anchor_bbox) | ||
439 | # 当找到了 key, 则根据 key 去匹配 value | ||
440 | if anchor_bbox: | ||
441 | half_width = self.pdf_info['2']['width'] * 0.5 | ||
442 | for block in self.pdf_info['2']['blocks']: | ||
443 | if block['type'] != 0: | ||
444 | continue | ||
445 | for line in block['lines']: | ||
446 | for span in line['spans']: | ||
447 | bbox, text = span['bbox'], span['text'] | ||
448 | if anchor_bbox[2]<np.mean(bbox[::2])<half_width and \ | ||
449 | anchor_bbox[1]<np.mean(bbox[1::2])<anchor_bbox[3]: | ||
450 | seller['position'] = bbox | ||
451 | seller['words'] = text | ||
452 | return seller | ||
453 | return seller | ||
454 | |||
455 | def get_borrower_collection_account(self): | ||
456 | account = self.item.copy() | ||
457 | account_name = self.item.copy() | ||
458 | account_bank = self.item.copy() | ||
459 | all_text = '' | ||
460 | for block in self.pdf_info['1']['blocks']: | ||
461 | if block['type'] != 0: | ||
462 | continue | ||
463 | for line in block['lines']: | ||
464 | for span in line['spans']: | ||
465 | bbox, text = span['bbox'], span['text'] | ||
466 | all_text += text | ||
467 | # 首先确定账户信息是哪种,我们只输出非另行通知的格式 | ||
468 | if '借款人收款账户' in all_text: | ||
469 | all_text = all_text.replace(' ', '').replace(' ', '') | ||
470 | matchs_1 = re.findall(r'账号:(.*?)户名', all_text) | ||
471 | if matchs_1: | ||
472 | words = matchs_1[0] | ||
473 | for block in self.pdf_info['1']['blocks']: | ||
474 | if block['type'] != 0: | ||
475 | continue | ||
476 | for line in block['lines']: | ||
477 | for span in line['spans']: | ||
478 | bbox, text = span['bbox'], span['text'] | ||
479 | if f'{words}' in text: | ||
480 | account['position'] = bbox | ||
481 | account['words'] = words | ||
482 | matchs_2 = re.findall(r'户名:(.*?)开户行', all_text) | ||
483 | if matchs_2: | ||
484 | words = matchs_2[0] | ||
485 | for block in self.pdf_info['1']['blocks']: | ||
486 | if block['type'] != 0: | ||
487 | continue | ||
488 | for line in block['lines']: | ||
489 | for span in line['spans']: | ||
490 | bbox, text = span['bbox'], span['text'] | ||
491 | if f'{words}' in text: | ||
492 | account_name['position'] = bbox | ||
493 | account_name['words'] = words | ||
494 | matchs_3 = re.findall(r'开户行:(.*?)借款人', all_text) | ||
495 | if matchs_3: | ||
496 | words = matchs_3[0] | ||
497 | for block in self.pdf_info['1']['blocks']: | ||
498 | if block['type'] != 0: | ||
499 | continue | ||
500 | for line in block['lines']: | ||
501 | for span in line['spans']: | ||
502 | bbox, text = span['bbox'], span['text'] | ||
503 | if f'{words}' in text: | ||
504 | account_bank['position'] = bbox | ||
505 | account_bank['words'] = words | ||
506 | return account, account_name, account_bank | ||
507 | |||
508 | def get_payback_account(self): | ||
509 | account = self.item.copy() | ||
510 | account_name = self.item.copy() | ||
511 | account_bank = self.item.copy() | ||
512 | all_text = '' | ||
513 | for block in self.pdf_info['1']['blocks']: | ||
514 | if block['type'] != 0: | ||
515 | continue | ||
516 | for line in block['lines']: | ||
517 | for span in line['spans']: | ||
518 | bbox, text = span['bbox'], span['text'] | ||
519 | all_text += text | ||
520 | # 首先确定账户信息是哪种,我们只输出非另行通知的格式 | ||
521 | if '(13) 还款账户' in all_text: | ||
522 | all_text = all_text.split('(13) 还款账户')[-1] | ||
523 | all_text = all_text.replace(' ', '').replace(' ', '') | ||
524 | matchs_1 = re.findall(r'账号:(.*?)户名', all_text) | ||
525 | if matchs_1: | ||
526 | words = matchs_1[0] | ||
527 | for block in self.pdf_info['1']['blocks']: | ||
528 | if block['type'] != 0: | ||
529 | continue | ||
530 | for line in block['lines']: | ||
531 | for span in line['spans']: | ||
532 | bbox, text = span['bbox'], span['text'] | ||
533 | if f'{words}' in text: | ||
534 | account['position'] = bbox | ||
535 | account['words'] = words | ||
536 | matchs_2 = re.findall(r'户名:(.*?)开户行', all_text) | ||
537 | if matchs_2: | ||
538 | words = matchs_2[0] | ||
539 | for block in self.pdf_info['1']['blocks']: | ||
540 | if block['type'] != 0: | ||
541 | continue | ||
542 | for line in block['lines']: | ||
543 | for span in line['spans']: | ||
544 | bbox, text = span['bbox'], span['text'] | ||
545 | if f'{words}' in text: | ||
546 | account_name['position'] = bbox | ||
547 | account_name['words'] = words | ||
548 | matchs_3 = re.findall(r'开户行:(.*?);', all_text) | ||
549 | if matchs_3: | ||
550 | words = matchs_3[0] | ||
551 | for block in self.pdf_info['1']['blocks']: | ||
552 | if block['type'] != 0: | ||
553 | continue | ||
554 | for line in block['lines']: | ||
555 | for span in line['spans']: | ||
556 | bbox, text = span['bbox'], span['text'] | ||
557 | if f'开户行:{words};' in text.replace(' ', ''): | ||
558 | account_bank['position'] = bbox | ||
559 | account_bank['words'] = words | ||
560 | return account, account_name, account_bank | ||
561 | |||
562 | def get_repayment_schedule(self): | ||
563 | repayment_schedule = self.item.copy() | ||
564 | # 只看第二页 | ||
565 | repayment_schedule_table = [] | ||
566 | repayment_schedule_text_list = [] | ||
567 | table = False | ||
568 | for block in self.pdf_info['2']['blocks']: | ||
569 | if block['type'] != 0: | ||
570 | continue | ||
571 | for line in block['lines']: | ||
572 | for span in line['spans']: | ||
573 | bbox, text = span['bbox'], span['text'] | ||
574 | if '序号' == text: | ||
575 | table = True | ||
576 | if '以上表格中所列的序号并非还款期数' in text: | ||
577 | table = False | ||
578 | if table == True: | ||
579 | repayment_schedule_text_list.append(text) | ||
580 | |||
581 | for i in range(len(repayment_schedule_text_list)//5): | ||
582 | |||
583 | line = [] | ||
584 | # 5表示5列的意思 | ||
585 | for j in range(5): | ||
586 | line.append(repayment_schedule_text_list[i*5+j]) | ||
587 | |||
588 | if str(i+1) == line[1]: | ||
589 | break | ||
590 | |||
591 | repayment_schedule_table.append(line) | ||
592 | |||
593 | if len(repayment_schedule_table) > 0: | ||
594 | repayment_schedule['words'] = repayment_schedule_table | ||
595 | return repayment_schedule | ||
596 | |||
597 | def get_signature_role_1(self): | ||
598 | signature_role_1 = self.init_item.copy() | ||
599 | # 先定位签字区域 | ||
600 | texts = [] | ||
601 | boxes = [] | ||
602 | page_num = None | ||
603 | position = None | ||
604 | words = None | ||
605 | region = False | ||
606 | for i in list(self.pdf_info.keys()): | ||
607 | for block in self.pdf_info[i]['blocks']: | ||
608 | if block['type'] != 0: | ||
609 | continue | ||
610 | for line in block['lines']: | ||
611 | for span in line['spans']: | ||
612 | bbox, text = span['bbox'], span['text'] | ||
613 | if '借款人(抵押人)' in text: | ||
614 | region = True | ||
615 | if '日期' in text: | ||
616 | region = False | ||
617 | if region == True: | ||
618 | page_num = i | ||
619 | texts.append(text) | ||
620 | boxes.append(bbox) | ||
621 | if len(texts) > 4: | ||
622 | words = '有' | ||
623 | else: | ||
624 | words = '无' | ||
625 | boxes = np.array(boxes).reshape((-1, 2)) | ||
626 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | ||
627 | signature_role_1['page_num'] = page_num | ||
628 | signature_role_1['position'] = position | ||
629 | signature_role_1['words'] = words | ||
630 | return signature_role_1 | ||
631 | |||
632 | def get_signature_role_2(self): | ||
633 | signature_role_2 = self.init_item.copy() | ||
634 | # 先定位签字区域 | ||
635 | texts = [] | ||
636 | boxes = [] | ||
637 | page_num = None | ||
638 | position = None | ||
639 | words = None | ||
640 | region = False | ||
641 | for i in list(self.pdf_info.keys()): | ||
642 | for block in self.pdf_info[i]['blocks']: | ||
643 | if block['type'] != 0: | ||
644 | continue | ||
645 | for line in block['lines']: | ||
646 | for span in line['spans']: | ||
647 | bbox, text = span['bbox'], span['text'] | ||
648 | if '共同借款人(共同抵押人)' in text: | ||
649 | region = True | ||
650 | if '日期' in text: | ||
651 | region = False | ||
652 | if region == True: | ||
653 | page_num = i | ||
654 | texts.append(text) | ||
655 | boxes.append(bbox) | ||
656 | if len(texts) > 4: | ||
657 | words = '有' | ||
658 | else: | ||
659 | words = '无' | ||
660 | boxes = np.array(boxes).reshape((-1, 2)) | ||
661 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | ||
662 | signature_role_2['page_num'] = page_num | ||
663 | signature_role_2['position'] = position | ||
664 | signature_role_2['words'] = words | ||
665 | return signature_role_2 | ||
666 | |||
667 | def get_signature_role_3(self): | ||
668 | signature_role_3 = self.init_item.copy() | ||
669 | # 先定位签字区域 | ||
670 | texts = [] | ||
671 | boxes = [] | ||
672 | page_num = None | ||
673 | position = None | ||
674 | words = None | ||
675 | region = False | ||
676 | for i in list(self.pdf_info.keys()): | ||
677 | for block in self.pdf_info[i]['blocks']: | ||
678 | if block['type'] != 0: | ||
679 | continue | ||
680 | for line in block['lines']: | ||
681 | for span in line['spans']: | ||
682 | bbox, text = span['bbox'], span['text'] | ||
683 | if '保证人1' in text and int(i) != 0: | ||
684 | region = True | ||
685 | if '日期' in text: | ||
686 | region = False | ||
687 | if region == True: | ||
688 | page_num = i | ||
689 | texts.append(text) | ||
690 | boxes.append(bbox) | ||
691 | if len(texts) > 4: | ||
692 | words = '有' | ||
693 | else: | ||
694 | words = '无' | ||
695 | boxes = np.array(boxes).reshape((-1, 2)) | ||
696 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | ||
697 | signature_role_3['page_num'] = page_num | ||
698 | signature_role_3['position'] = position | ||
699 | signature_role_3['words'] = words | ||
700 | return signature_role_3 | ||
701 | |||
702 | def get_signature_role_4(self): | ||
703 | signature_role_4 = self.init_item.copy() | ||
704 | # 先定位签字区域 | ||
705 | texts = [] | ||
706 | boxes = [] | ||
707 | page_num = None | ||
708 | position = None | ||
709 | words = None | ||
710 | region = False | ||
711 | for i in list(self.pdf_info.keys()): | ||
712 | for block in self.pdf_info[i]['blocks']: | ||
713 | if block['type'] != 0: | ||
714 | continue | ||
715 | for line in block['lines']: | ||
716 | for span in line['spans']: | ||
717 | bbox, text = span['bbox'], span['text'] | ||
718 | if '保证人2' in text and int(i) != 0: | ||
719 | region = True | ||
720 | if '日期' in text: | ||
721 | region = False | ||
722 | if region == True: | ||
723 | page_num = i | ||
724 | texts.append(text) | ||
725 | boxes.append(bbox) | ||
726 | if len(texts) > 4: | ||
727 | words = '有' | ||
728 | else: | ||
729 | words = '无' | ||
730 | boxes = np.array(boxes).reshape((-1, 2)) | ||
731 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | ||
732 | signature_role_4['page_num'] = page_num | ||
733 | signature_role_4['position'] = position | ||
734 | signature_role_4['words'] = words | ||
735 | return signature_role_4 | ||
736 | |||
737 | def get_signature_role_5(self): | ||
738 | signature_role_5 = self.init_item.copy() | ||
739 | # 先定位签字区域 | ||
740 | texts = [] | ||
741 | boxes = [] | ||
742 | page_num = None | ||
743 | position = None | ||
744 | words = None | ||
745 | region = False | ||
746 | for i in list(self.pdf_info.keys()): | ||
747 | for block in self.pdf_info[i]['blocks']: | ||
748 | if block['type'] != 0: | ||
749 | continue | ||
750 | for line in block['lines']: | ||
751 | for span in line['spans']: | ||
752 | bbox, text = span['bbox'], span['text'] | ||
753 | if '见证人签字' in text and int(i) != 0: | ||
754 | region = True | ||
755 | if '年' in text: | ||
756 | region = False | ||
757 | if region == True: | ||
758 | page_num = i | ||
759 | texts.append(text) | ||
760 | boxes.append(bbox) | ||
761 | print(texts) | ||
762 | if len(texts) > 4: | ||
763 | words = '有' | ||
764 | else: | ||
765 | words = '无' | ||
766 | boxes = np.array(boxes).reshape((-1, 2)) | ||
767 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | ||
768 | signature_role_5['page_num'] = page_num | ||
769 | signature_role_5['position'] = position | ||
770 | signature_role_5['words'] = words | ||
771 | return signature_role_5 | ||
772 | |||
773 | def get_last_page_signature(self, page_num, top, bottom): | ||
774 | signature_name = self.item.copy() | ||
775 | signature_date = self.item.copy() | ||
776 | anchor_top = None | ||
777 | anchor_bottom = None | ||
778 | for block in self.pdf_info[page_num]['blocks']: | ||
779 | if block['type'] != 0: | ||
780 | continue | ||
781 | for line in block['lines']: | ||
782 | for span in line['spans']: | ||
783 | bbox, text = span['bbox'], span['text'] | ||
784 | if top in text: | ||
785 | anchor_top = bbox[1] | ||
786 | if bottom in text: | ||
787 | anchor_bottom = bbox[1] | ||
788 | # print(top, anchor_top, anchor_bottom) | ||
789 | if anchor_top is not None and anchor_bottom is not None: | ||
790 | for block in self.pdf_info[page_num]['blocks']: | ||
791 | if block['type'] != 0: | ||
792 | continue | ||
793 | for line in block['lines']: | ||
794 | for span in line['spans']: | ||
795 | bbox, text = span['bbox'], span['text'] | ||
796 | if '签署日期' in text and int(anchor_top)<np.mean(bbox[1::2])<int(anchor_bottom): | ||
797 | name = text.split(' ')[0] | ||
798 | date = text.split(':')[-1] | ||
799 | signature_name['words'] = name | ||
800 | signature_name['position'] = bbox | ||
801 | signature_date['words'] = date | ||
802 | signature_date['position'] = bbox | ||
803 | return signature_name, signature_date | ||
804 | |||
805 | def get_info(self): | ||
806 | """ | ||
807 | block['type'] == 0 : 表示该元素为图片 | ||
808 | |||
809 | Returns: | ||
810 | dict: Description | ||
811 | """ | ||
812 | |||
813 | # 先判断是否为 ASP 产品 | ||
814 | # 只看第一页,判断是否有 '附加产品融资贷款本金总金额' 这一句话,若有则为 ASP 产品 | ||
815 | # print(self.pdf_info['0']['blocks']) | ||
816 | # for block in self.pdf_info['0']['blocks']: | ||
817 | # if block['type'] != 0: | ||
818 | # continue | ||
819 | # for line in block['lines']: | ||
820 | # for span in line['spans']: | ||
821 | # bbox, text = span['bbox'], span['text'] | ||
822 | # if '附加产品融资贷款本金总金额' == text: | ||
823 | # self.is_asp = True | ||
824 | for key in self.ocr_results['0']: | ||
825 | bbox, text = self.ocr_results['0'][key] | ||
826 | if '附加产品融资贷款本金总金额' in text: | ||
827 | self.is_asp = True | ||
828 | |||
829 | self.gen_init_result(self.is_asp) | ||
830 | |||
831 | if len(list(self.ocr_results.keys())) <= 8: # 8.5 版本客户提供的样本出现串页的情况,暂时无法识别 | ||
832 | # Page 1 | ||
833 | # 找合同编号 | ||
834 | contract_no = self.get_contract_no(page_num='0') | ||
835 | # print(contract_no) | ||
836 | self.init_result['page_1']['合同编号'] = contract_no | ||
837 | # 所购车辆价格 | ||
838 | vehicle_price = self.get_vehicle_price() | ||
839 | # print(vehicle_price) | ||
840 | self.init_result['page_1']['所购车辆价格'] = vehicle_price | ||
841 | # 车架号 | ||
842 | vin = self.get_vin() | ||
843 | # print(vin) | ||
844 | self.init_result['page_1']['车架号'] = vin | ||
845 | # 贷款本金金额(如果是 ASP产品)则'贷款本金金额'项目中包含'车辆贷款本金金额'和'附加产品融资贷款本金总金额'两个项目 | ||
846 | upper, lower, asp_1, asp_2 = self.get_loan_principal() | ||
847 | # print(upper, lower, asp_1, asp_2) | ||
848 | self.init_result['page_1']['贷款本金金额']['大写'] = upper | ||
849 | self.init_result['page_1']['贷款本金金额']['小写'] = lower | ||
850 | self.init_result['page_1']['贷款本金金额']['车辆贷款本金金额'] = asp_1 | ||
851 | self.init_result['page_1']['贷款本金金额']['附加产品融资贷款本金总金额'] = asp_2 | ||
852 | # 贷款期限 | ||
853 | loan_term = self.get_loan_term() | ||
854 | # print(loan_term) | ||
855 | self.init_result['page_1']['贷款期限'] = loan_term | ||
856 | # 附加产品融资贷款本金总金额明细(ASP-表格) | ||
857 | asp_details_table = self.get_asp_details(page_num='0') | ||
858 | # print(asp_details_table) | ||
859 | self.init_result['page_1']['附加产品融资贷款本金总金额明细'] = asp_details_table | ||
860 | # 借款人签字及时间 | ||
861 | signature = self.get_signature() | ||
862 | # print(signature) | ||
863 | self.init_result['page_1']['借款人签字及时间'] = signature | ||
864 | ####################################### | ||
865 | # Page 2 | ||
866 | # 找合同编号 | ||
867 | contract_no = self.get_contract_no(page_num='0') | ||
868 | # print(contract_no) | ||
869 | self.init_result['page_2']['合同编号'] = contract_no | ||
870 | # 找借款人及抵押人(地址字段原本有空格) | ||
871 | borrower_name, borrower_id = self.get_somebody(top='借款人及抵押人:', bottom='共同借款人:') | ||
872 | # 这是为了同时兼容 8.1 版本 | ||
873 | if borrower_name['words'] == None: | ||
874 | borrower_name, borrower_id = self.get_somebody(top='借款人及抵押人:', bottom='共同借款人及共同抵押人:') | ||
875 | # 这是为了兼容车贷分离版本 | ||
876 | if borrower_name['words'] == None: | ||
877 | borrower_name, borrower_id = self.get_somebody(top='借款人:', bottom='共同借款人及抵押人:') | ||
878 | # print(borrower_name, borrower_id) | ||
879 | self.init_result['page_2']['借款人及抵押人']['name'] = borrower_name | ||
880 | self.init_result['page_2']['借款人及抵押人']['id'] = borrower_id | ||
881 | # 找共同借款人及共同抵押人 | ||
882 | co_borrower_name, co_borrower_id = self.get_somebody(top='共同借款人:', bottom='保证人1:') | ||
883 | # print(co_borrower_name, co_borrower_id) | ||
884 | self.init_result['page_2']['共同借款人及共同抵押人']['name'] = co_borrower_name | ||
885 | self.init_result['page_2']['共同借款人及共同抵押人']['id'] = co_borrower_id | ||
886 | # 保证人1 | ||
887 | first_guarantor_name, first_guarantor_id = self.get_somebody(top='保证人1:', bottom='保证人2:') | ||
888 | self.init_result['page_2']['保证人1']['name'] = first_guarantor_name | ||
889 | self.init_result['page_2']['保证人1']['id'] = first_guarantor_id | ||
890 | # 保证人2 | ||
891 | second_guarantor_name, second_guarantor_id = self.get_somebody(top='保证人2:', bottom='第一章') | ||
892 | self.init_result['page_2']['保证人2']['name'] = second_guarantor_name | ||
893 | self.init_result['page_2']['保证人2']['id'] = second_guarantor_id | ||
894 | # 所购车辆价格 | ||
895 | vehicle_price = self.get_vehicle_price(page_num='1') | ||
896 | # print(vehicle_price) | ||
897 | self.init_result['page_2']['所购车辆价格'] = vehicle_price | ||
898 | # 车架号 | ||
899 | vin = self.get_vin(page_num='1') | ||
900 | # print(vin) | ||
901 | self.init_result['page_2']['车架号'] = vin | ||
902 | # 经销商 | ||
903 | seller = self.get_seller() | ||
904 | # print(seller) | ||
905 | self.init_result['page_2']['经销商'] = seller | ||
906 | # 贷款本金金额(如果是 ASP产品)则'贷款本金金额'项目中包含'车辆贷款本金金额'和'附加产品融资贷款本金总金额'两个项目 | ||
907 | upper, lower, asp_1, asp_2 = self.get_loan_principal(page_num='1') | ||
908 | # print(upper, lower, asp_1, asp_2) | ||
909 | self.init_result['page_2']['贷款本金金额']['大写'] = upper | ||
910 | self.init_result['page_2']['贷款本金金额']['小写'] = lower | ||
911 | self.init_result['page_2']['贷款本金金额']['车辆贷款本金金额'] = asp_1 | ||
912 | self.init_result['page_2']['贷款本金金额']['附加产品融资贷款本金总金额'] = asp_2 | ||
913 | # 贷款期限 | ||
914 | loan_term = self.get_loan_term(page_num='1') | ||
915 | # print(loan_term) | ||
916 | self.init_result['page_2']['贷款期限'] = loan_term | ||
917 | # 本合同当期的标准利率 | ||
918 | standard_rate = self.get_standard_rate(page_num='1') | ||
919 | # print(standard_rate) | ||
920 | self.init_result['page_2']['标准利率'] = standard_rate | ||
921 | # 202212 release 新增借款人收款账户 | ||
922 | account, account_name, account_bank = self.get_borrower_collection_account() | ||
923 | # print(account, account_name, account_bank) | ||
924 | self.init_result['page_2']['借款人收款账户']['账号'] = account | ||
925 | self.init_result['page_2']['借款人收款账户']['户名'] = account_name | ||
926 | self.init_result['page_2']['借款人收款账户']['开户行'] = account_bank | ||
927 | # 还款账户 | ||
928 | account, account_name, account_bank = self.get_payback_account() | ||
929 | # print(account, account_name, account_bank) | ||
930 | self.init_result['page_2']['还款账户']['账号'] = account | ||
931 | self.init_result['page_2']['还款账户']['户名'] = account_name | ||
932 | self.init_result['page_2']['还款账户']['开户行'] = account_bank | ||
933 | ####################################### | ||
934 | # Page 3 | ||
935 | # 找合同编号 | ||
936 | contract_no = self.get_contract_no(page_num='2') | ||
937 | self.init_result['page_3']['合同编号'] = contract_no | ||
938 | # 还款计划表(表格) | ||
939 | repayment_schedule_table = self.get_repayment_schedule() | ||
940 | # print(repayment_schedule_table) | ||
941 | self.init_result['page_3']['还款计划表'] = repayment_schedule_table | ||
942 | # 车辆代理商 | ||
943 | cldls = self.get_cldls() | ||
944 | self.init_result['page_3']['车辆代理商'] = cldls | ||
945 | ####################################### | ||
946 | # Page 4 | ||
947 | # 找合同编号 | ||
948 | contract_no = self.get_contract_no(page_num='3') | ||
949 | # print(contract_no) | ||
950 | self.init_result['page_4']['合同编号'] = contract_no | ||
951 | # 附加产品融资贷款本金总金额明细(ASP-表格) | ||
952 | asp_details_table = self.get_asp_details(page_num='3') | ||
953 | # print(asp_details_table) | ||
954 | self.init_result['page_4']['附加产品融资贷款本金总金额明细'] = asp_details_table | ||
955 | ####################################### | ||
956 | # Page 5 | ||
957 | # 找合同编号 | ||
958 | contract_no = self.get_contract_no(page_num='4') | ||
959 | # print(contract_no) | ||
960 | self.init_result['page_5']['合同编号'] = contract_no | ||
961 | ####################################### | ||
962 | # Page 6 | ||
963 | # 找合同编号 | ||
964 | contract_no = self.get_contract_no(page_num='5') | ||
965 | # print(contract_no) | ||
966 | self.init_result['page_6']['合同编号'] = contract_no | ||
967 | |||
968 | if self.is_asp: | ||
969 | # Page 7 | ||
970 | # 找合同编号 | ||
971 | contract_no = self.get_contract_no(page_num='6') | ||
972 | self.init_result['page_7']['合同编号'] = contract_no | ||
973 | # Page 8 | ||
974 | # 找合同编号 | ||
975 | contract_no = self.get_contract_no(page_num='7') | ||
976 | self.init_result['page_8']['合同编号'] = contract_no | ||
977 | signature_name, signature_date = self.get_last_page_signature(page_num='7', | ||
978 | top='合同编号', bottom='共同借款人') | ||
979 | if signature_name['words'] == None: | ||
980 | signature_name, signature_date = self.get_last_page_signature(page_num='7', | ||
981 | top='合同编号', bottom='共同借款人(抵押人)') | ||
982 | # print(signature_name, signature_date) | ||
983 | self.init_result['page_8']['主借人签字']['签字'] = signature_name | ||
984 | self.init_result['page_8']['主借人签字']['日期'] = signature_date | ||
985 | signature_name, signature_date = self.get_last_page_signature(page_num='7', | ||
986 | top='共同借款人', bottom='保证人1') | ||
987 | if signature_name['words'] == None: | ||
988 | signature_name, signature_date = self.get_last_page_signature(page_num='7', | ||
989 | top='共同借款人(抵押人)', bottom='保证人1') | ||
990 | # print(signature_name, signature_date) | ||
991 | self.init_result['page_8']['共借人签字']['签字'] = signature_name | ||
992 | self.init_result['page_8']['共借人签字']['日期'] = signature_date | ||
993 | signature_name, signature_date = self.get_last_page_signature(page_num='7', | ||
994 | top='保证人1', bottom='保证人2') | ||
995 | self.init_result['page_8']['保证人1签字']['签字'] = signature_name | ||
996 | self.init_result['page_8']['保证人1签字']['日期'] = signature_date | ||
997 | signature_name, signature_date = self.get_last_page_signature(page_num='7', | ||
998 | top='保证人2', bottom='在本人面前亲笔签署本合同') | ||
999 | self.init_result['page_8']['保证人2签字']['签字'] = signature_name | ||
1000 | self.init_result['page_8']['保证人2签字']['日期'] = signature_date | ||
1001 | signature_name, signature_date = self.get_last_page_signature(page_num='7', | ||
1002 | top='在本人面前亲笔签署本合同', bottom='以下无正文') | ||
1003 | # print(signature_name, signature_date) | ||
1004 | self.init_result['page_8']['见证人签字']['签字'] = signature_name | ||
1005 | self.init_result['page_8']['见证人签字']['日期'] = signature_date | ||
1006 | else: | ||
1007 | # Page 7 | ||
1008 | # 找合同编号 | ||
1009 | contract_no = self.get_contract_no(page_num='6') | ||
1010 | self.init_result['page_7']['合同编号'] = contract_no | ||
1011 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
1012 | top='合同编号', bottom='共同借款人') | ||
1013 | if signature_name['words'] == None: | ||
1014 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
1015 | top='合同编号', bottom='共同借款人(抵押人)') | ||
1016 | # print(signature_name, signature_date) | ||
1017 | self.init_result['page_7']['主借人签字']['签字'] = signature_name | ||
1018 | self.init_result['page_7']['主借人签字']['日期'] = signature_date | ||
1019 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
1020 | top='共同借款人', bottom='保证人1') | ||
1021 | if signature_name['words'] == None: | ||
1022 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
1023 | top='共同借款人(抵押人)', bottom='保证人1') | ||
1024 | # print(signature_name, signature_date) | ||
1025 | self.init_result['page_7']['共借人签字']['签字'] = signature_name | ||
1026 | self.init_result['page_7']['共借人签字']['日期'] = signature_date | ||
1027 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
1028 | top='保证人1', bottom='保证人2') | ||
1029 | self.init_result['page_7']['保证人1签字']['签字'] = signature_name | ||
1030 | self.init_result['page_7']['保证人1签字']['日期'] = signature_date | ||
1031 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
1032 | top='保证人2', bottom='在本人面前亲笔签署本合同') | ||
1033 | self.init_result['page_7']['保证人2签字']['签字'] = signature_name | ||
1034 | self.init_result['page_7']['保证人2签字']['日期'] = signature_date | ||
1035 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
1036 | top='在本人面前亲笔签署本合同', bottom='以下无正文') | ||
1037 | # print(signature_name, signature_date) | ||
1038 | self.init_result['page_7']['见证人签字']['签字'] = signature_name | ||
1039 | self.init_result['page_7']['见证人签字']['日期'] = signature_date | ||
1040 | |||
1041 | |||
1042 | # 重新定制输出 | ||
1043 | new_results = {"is_asp": self.is_asp, | ||
1044 | "page_info": self.init_result | ||
1045 | } | ||
1046 | return new_results | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
1 | import re | ||
2 | import numpy as np | ||
3 | from fuzzywuzzy import fuzz | ||
4 | from shapely.geometry import Polygon | ||
5 | |||
6 | def caculate_iou(g, p): | ||
7 | g = Polygon(np.array(g).reshape((-1, 2))) | ||
8 | p = Polygon(np.array(p).reshape((-1, 2))) | ||
9 | inter = Polygon(g).intersection(Polygon(p)).area | ||
10 | union = g.area + p.area - inter | ||
11 | return inter/union | ||
12 | |||
13 | def get_table_info(bbox_1, bbox_2, ocr_result): | ||
14 | anchor = [bbox_2[0], bbox_1[1], bbox_2[2], bbox_1[3], | ||
15 | bbox_2[4], bbox_1[5], bbox_2[6], bbox_1[7]] | ||
16 | table_info = '' | ||
17 | for span in ocr_result: | ||
18 | iou = caculate_iou(anchor, span[0]) | ||
19 | if iou > 0: | ||
20 | table_info = span[1] | ||
21 | return table_info | ||
22 | |||
23 | class Finder: | ||
24 | |||
25 | def __init__(self, pdf_info): | ||
26 | self.pdf_info = pdf_info | ||
27 | self.item = {"words": None, | ||
28 | "page": None, | ||
29 | "position": None, | ||
30 | } | ||
31 | # 格式化算法输出 | ||
32 | self.init_result = {"合同编号": self.item, | ||
33 | "承租人-姓名": self.item, | ||
34 | "承租人-证件号码": self.item, | ||
35 | "承租人-法定代表人或授权代表": self.item, | ||
36 | |||
37 | "共同承租人-姓名": self.item, | ||
38 | "共同承租人-证件号码": self.item, | ||
39 | "共同承租人-法定代表人或授权代表": self.item, | ||
40 | |||
41 | "保证人1-姓名": self.item, | ||
42 | "保证人1-证件号码": self.item, | ||
43 | "保证人1-法定代表人或授权代表": self.item, | ||
44 | |||
45 | "保证人2-姓名": self.item, | ||
46 | "保证人2-证件号码": self.item, | ||
47 | "保证人2-法定代表人或授权代表": self.item, | ||
48 | "保证人3-姓名": self.item, | ||
49 | "保证人3-证件号码": self.item, | ||
50 | "保证人3-法定代表人或授权代表": self.item, | ||
51 | "合同编号(正文)": self.item, | ||
52 | "车辆识别代码": self.item, | ||
53 | "车辆卖方(经销商)": self.item, | ||
54 | "车辆代理商": self.item, | ||
55 | "车辆原始销售价格(《机动车销售统一发票》所列金额)": self.item, | ||
56 | "车辆附加产品明细表": self.item, | ||
57 | "融资成本总额": self.item, | ||
58 | "租期": self.item, | ||
59 | "付款计划表": self.item, | ||
60 | "收款银行账户-户名": self.item, | ||
61 | "收款银行账户-银行账号": self.item, | ||
62 | "收款银行账户-开户行": self.item, | ||
63 | "银行账户-户名": self.item, | ||
64 | "银行账户-银行账号": self.item, | ||
65 | "银行账户-开户行": self.item, | ||
66 | "签字页-承租人姓名": self.item, | ||
67 | "签字页-承租人签章": self.item, | ||
68 | |||
69 | "签字页-共同承租人姓名": self.item, | ||
70 | "签字页-共同承租人签章": self.item, | ||
71 | |||
72 | "签字页-保证人1姓名": self.item, | ||
73 | "签字页-保证人1签章": self.item, | ||
74 | |||
75 | "签字页-保证人2姓名": self.item, | ||
76 | "签字页-保证人2签章": self.item, | ||
77 | "签字页-保证人3姓名": self.item, | ||
78 | "签字页-保证人3签章": self.item, | ||
79 | } | ||
80 | |||
81 | # 格式化输出 车辆处置协议 要是别的字段 | ||
82 | self.init_result_1 = {"合同编号": self.item, | ||
83 | "承租人-姓名": self.item, | ||
84 | "承租人-证件号码": self.item, | ||
85 | "销售经销商": self.item, | ||
86 | "合同编号(正文)": self.item, | ||
87 | "签字页-承租人姓名": self.item, | ||
88 | "签字页-承租人证件号码": self.item, | ||
89 | "签字页-承租人签章": self.item, | ||
90 | "签字页-销售经销商": self.item, | ||
91 | "签字页-销售经销商签章": self.item, | ||
92 | } | ||
93 | |||
94 | # 格式化输出 车辆租赁抵押合同 | ||
95 | self.init_result_2 = {"合同编号": self.item, | ||
96 | "合同编号(正文)": self.item, | ||
97 | "抵押人姓名/名称": self.item, | ||
98 | "抵押人证件号码": self.item, | ||
99 | "抵押人配偶姓名/名称": self.item, | ||
100 | "抵押人配偶证件号码": self.item, | ||
101 | "车辆识别代码": self.item, | ||
102 | "租金总额": self.item, | ||
103 | "融资租赁期限": self.item, | ||
104 | "签字页-抵押人姓名": self.item, | ||
105 | "签字页-抵押人签章": self.item, | ||
106 | "签字页-抵押人配偶姓名": self.item, | ||
107 | "签字页-抵押人配偶签章": self.item, | ||
108 | } | ||
109 | |||
110 | def get_contract_no(self, page_num): | ||
111 | """传入页码,查看该页码右上角的编号 | ||
112 | |||
113 | Args: | ||
114 | page_num (string): | ||
115 | |||
116 | Returns: | ||
117 | sting: | ||
118 | """ | ||
119 | contract_no = self.item.copy() | ||
120 | # 只看第一页 | ||
121 | for block in self.pdf_info[page_num]['blocks']: | ||
122 | if block['type'] != 0: | ||
123 | continue | ||
124 | for line in block['lines']: | ||
125 | for span in line['spans']: | ||
126 | bbox, text = span['bbox'], span['text'] | ||
127 | if '合同编号:' in text: | ||
128 | words = text.split(':')[-1] | ||
129 | contract_no['position'] = bbox | ||
130 | contract_no['page'] = page_num | ||
131 | contract_no['words'] = words | ||
132 | if contract_no['words'] == '': | ||
133 | for block in self.pdf_info[page_num]['blocks']: | ||
134 | if block['type'] != 0: | ||
135 | continue | ||
136 | for line in block['lines']: | ||
137 | for span in line['spans']: | ||
138 | bbox, text = span['bbox'], span['text'] | ||
139 | if bbox[1] < contract_no['position'][3] and 'CH' in text: | ||
140 | contract_no['position'] = bbox | ||
141 | contract_no['page'] = page_num | ||
142 | contract_no['words'] = text | ||
143 | return contract_no | ||
144 | |||
145 | def get_vehicle_price(self, page_num='0'): | ||
146 | vehicle_price = self.item.copy() | ||
147 | for block in self.pdf_info[page_num]['blocks']: | ||
148 | if block['type'] != 0: | ||
149 | continue | ||
150 | for line in block['lines']: | ||
151 | for span in line['spans']: | ||
152 | bbox, text = span['bbox'], span['text'] | ||
153 | if '所购车辆价格为人民币' in text: | ||
154 | words = text.split('币')[-1] | ||
155 | vehicle_price['position'] = bbox | ||
156 | vehicle_price['words'] = words | ||
157 | return vehicle_price | ||
158 | |||
159 | def get_contract_no_one(self): | ||
160 | # 查找正文中的合同编号,有可能存在换行的情况 | ||
161 | contract_no = self.item.copy() | ||
162 | for pno in self.pdf_info: | ||
163 | all_text = '' | ||
164 | for block in self.pdf_info[pno]['blocks']: | ||
165 | if block['type'] != 0: | ||
166 | continue | ||
167 | for line in block['lines']: | ||
168 | for span in line['spans']: | ||
169 | bbox, text = span['bbox'], span['text'] | ||
170 | all_text += text | ||
171 | all_text = all_text.replace(' ', '') | ||
172 | matchObj = re.search(r'(合同编号:\[(.*?)\])', all_text) | ||
173 | if matchObj: | ||
174 | words = matchObj.group(1) | ||
175 | contract_no['position'] = None | ||
176 | contract_no['page'] = pno | ||
177 | # contract_no['words'] = words | ||
178 | contract_no['words'] = re.sub("\s", "", words).replace(")", "") | ||
179 | return contract_no | ||
180 | |||
181 | matchObj = re.search(r'编号为(.*?)的', all_text) | ||
182 | if matchObj: | ||
183 | words = matchObj.group(1).strip() | ||
184 | contract_no['position'] = None | ||
185 | contract_no['page'] = pno | ||
186 | # contract_no['words'] = words | ||
187 | contract_no['words'] = re.sub("\s", "", words).replace(")", "") | ||
188 | return contract_no | ||
189 | |||
190 | matchObj = re.search(r'编号为(.*?))的', all_text) | ||
191 | if matchObj: | ||
192 | words = matchObj.group(1).strip() | ||
193 | contract_no['position'] = None | ||
194 | contract_no['page'] = pno | ||
195 | # contract_no['words'] = words | ||
196 | contract_no['words'] = re.sub("\s", "", words) | ||
197 | return contract_no | ||
198 | |||
199 | def get_key_value(self, key, page_num=None): | ||
200 | value = self.item.copy() | ||
201 | if page_num is not None: | ||
202 | pno = page_num | ||
203 | for block in self.pdf_info[pno]['blocks']: | ||
204 | if block['type'] != 0: | ||
205 | continue | ||
206 | for line in block['lines']: | ||
207 | for span in line['spans']: | ||
208 | bbox, text = span['bbox'], span['text'] | ||
209 | if key in text: | ||
210 | words = text.split(':')[-1].replace("。", "") | ||
211 | value['position'] = bbox | ||
212 | value['page'] = pno | ||
213 | # value['words'] = words | ||
214 | value['words'] = re.sub("\s", "", words) | ||
215 | else: | ||
216 | for pno in self.pdf_info: | ||
217 | for block in self.pdf_info[pno]['blocks']: | ||
218 | if block['type'] != 0: | ||
219 | continue | ||
220 | for line in block['lines']: | ||
221 | for span in line['spans']: | ||
222 | bbox, text = span['bbox'], span['text'] | ||
223 | if key in text: | ||
224 | # print(self.pdf_info[pno]) | ||
225 | words = text.split(':')[-1].replace("。", "") | ||
226 | value['position'] = bbox | ||
227 | value['page'] = pno | ||
228 | # value['words'] = words | ||
229 | value['words'] = re.sub("\s", "", words) | ||
230 | return value | ||
231 | |||
232 | def get_loan_principal(self, page_num='0'): | ||
233 | chinese_keywords = ['壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖', '拾', | ||
234 | '佰', '仟', '万', '亿', '元', '角', '分', '零', '整'] | ||
235 | upper = self.item.copy() | ||
236 | lower = self.item.copy() | ||
237 | asp_1 = self.item.copy() | ||
238 | asp_2 = self.item.copy() | ||
239 | anchor_bbox = None | ||
240 | for block in self.pdf_info[page_num]['blocks']: | ||
241 | if block['type'] != 0: | ||
242 | continue | ||
243 | for line in block['lines']: | ||
244 | for span in line['spans']: | ||
245 | bbox, text = span['bbox'], span['text'] | ||
246 | if fuzz.ratio(''.join(chinese_keywords), text) > 15: | ||
247 | text = text.split(':')[-1].strip() | ||
248 | upper['position'] = bbox | ||
249 | upper['words'] = text | ||
250 | if '小写:¥' in text: | ||
251 | words = text.split('¥')[-1].strip() | ||
252 | lower['position'] = bbox | ||
253 | lower['words'] = words | ||
254 | if '附加产品融资贷款本金总金额' == text: | ||
255 | anchor_bbox = bbox | ||
256 | if anchor_bbox: | ||
257 | for block in self.pdf_info[page_num]['blocks']: | ||
258 | if block['type'] != 0: | ||
259 | continue | ||
260 | for line in block['lines']: | ||
261 | for span in line['spans']: | ||
262 | bbox, text = span['bbox'], span['text'] | ||
263 | if np.mean(bbox[1::2]) < np.mean(anchor_bbox[1::2]) and '人民币:小写:' in text: | ||
264 | words = re.findall(r'人民币:小写:\[(.*)\]', text)[0] | ||
265 | asp_1['position'] = bbox | ||
266 | asp_1['words'] = words | ||
267 | if np.mean(bbox[1::2]) > np.mean(anchor_bbox[1::2]) and '人民币:小写:' in text: | ||
268 | words = re.findall(r'人民币:小写:\[(.*)\]', text)[0] | ||
269 | asp_2['position'] = bbox | ||
270 | asp_2['words'] = words | ||
271 | return upper, lower, asp_1, asp_2 | ||
272 | |||
273 | def get_loan_term(self, page_num='0'): | ||
274 | loan_term = self.item.copy() | ||
275 | all_text = '' | ||
276 | for block in self.pdf_info[page_num]['blocks']: | ||
277 | if block['type'] != 0: | ||
278 | continue | ||
279 | for line in block['lines']: | ||
280 | for span in line['spans']: | ||
281 | bbox, text = span['bbox'], span['text'] | ||
282 | all_text += text | ||
283 | matchs = re.search(r'贷款期限(\d+)个月', all_text) | ||
284 | if matchs: | ||
285 | words = matchs.group(1) | ||
286 | for block in self.pdf_info[page_num]['blocks']: | ||
287 | if block['type'] != 0: | ||
288 | continue | ||
289 | for line in block['lines']: | ||
290 | for span in line['spans']: | ||
291 | bbox, text = span['bbox'], span['text'] | ||
292 | if f'{words}个月' in text: | ||
293 | loan_term['position'] = bbox | ||
294 | loan_term['words'] = words | ||
295 | return loan_term | ||
296 | |||
297 | def get_asp_details(self, page_num): | ||
298 | asp_details_table_term = self.item.copy() | ||
299 | |||
300 | asp_details_table = [] | ||
301 | asp_details_text_list = [] | ||
302 | table = False | ||
303 | for block in self.pdf_info[page_num]['blocks']: | ||
304 | if block['type'] != 0: | ||
305 | continue | ||
306 | for line in block['lines']: | ||
307 | for span in line['spans']: | ||
308 | bbox, text = span['bbox'], span['text'] | ||
309 | if '附加产品融资贷款本金总金额明细' == text: | ||
310 | table = True | ||
311 | if '第二条' in text or '征信管理' in text: | ||
312 | table = False | ||
313 | if table == True: | ||
314 | asp_details_text_list.append(text) | ||
315 | |||
316 | for i in range((len(asp_details_text_list)+2)//3): | ||
317 | |||
318 | line = [] | ||
319 | if i == 0: | ||
320 | line = [asp_details_text_list[0]] | ||
321 | else: | ||
322 | for j in range(3): | ||
323 | line.append(asp_details_text_list[i*3-2+j]) | ||
324 | |||
325 | asp_details_table.append(line) | ||
326 | |||
327 | if len(asp_details_table) > 0: | ||
328 | asp_details_table_term['words'] = asp_details_table | ||
329 | return asp_details_table_term | ||
330 | |||
331 | def get_signature(self): | ||
332 | signature = self.item.copy() | ||
333 | |||
334 | for block in self.pdf_info['0']['blocks']: | ||
335 | if block['type'] != 0: | ||
336 | continue | ||
337 | for line in block['lines']: | ||
338 | for span in line['spans']: | ||
339 | bbox, text = span['bbox'], span['text'] | ||
340 | if '签署日期' in text: | ||
341 | words = text | ||
342 | signature['words'] = words | ||
343 | signature['position'] = bbox | ||
344 | return signature | ||
345 | |||
346 | def get_somebody(self, top, bottom): | ||
347 | # 指定上下边界后,返回上下边界内的客户信息 | ||
348 | _name = self.item.copy() | ||
349 | _id = self.item.copy() | ||
350 | # 只看第一页,先划定上下边界 | ||
351 | y_top = 0 | ||
352 | y_bottom = 0 | ||
353 | for block in self.pdf_info['1']['blocks']: | ||
354 | if block['type'] != 0: | ||
355 | continue | ||
356 | for line in block['lines']: | ||
357 | for span in line['spans']: | ||
358 | bbox, text = span['bbox'], span['text'] | ||
359 | if top in text: | ||
360 | y_top = bbox[3] | ||
361 | if bottom in text: | ||
362 | y_bottom = bbox[3] | ||
363 | for block in self.pdf_info['1']['blocks']: | ||
364 | if block['type'] != 0: | ||
365 | continue | ||
366 | for line in block['lines']: | ||
367 | for span in line['spans']: | ||
368 | bbox, text = span['bbox'], span['text'] | ||
369 | if y_top < bbox[3] < y_bottom: | ||
370 | if '姓名/名称' in text: | ||
371 | words = text.split(':')[-1] | ||
372 | _name['position'] = bbox | ||
373 | _name['words'] = words | ||
374 | if '自然人身份证件号码/法人执照号码' in text: | ||
375 | words = text.split(':')[-1] | ||
376 | _id['position'] = bbox | ||
377 | _id['words'] = words | ||
378 | return _name, _id | ||
379 | |||
380 | def get_seller(self): | ||
381 | seller = self.item.copy() | ||
382 | # 先找到 key | ||
383 | anchor_bbox = None | ||
384 | for block in self.pdf_info['1']['blocks']: | ||
385 | if block['type'] != 0: | ||
386 | continue | ||
387 | for line in block['lines']: | ||
388 | for span in line['spans']: | ||
389 | bbox, text = span['bbox'], span['text'] | ||
390 | if '经销商' == text: | ||
391 | anchor_bbox = bbox | ||
392 | # 当找到了 key, 则根据 key 去匹配 value | ||
393 | if anchor_bbox: | ||
394 | half_width = self.pdf_info['1']['width'] * 0.5 | ||
395 | for block in self.pdf_info['1']['blocks']: | ||
396 | if block['type'] != 0: | ||
397 | continue | ||
398 | for line in block['lines']: | ||
399 | for span in line['spans']: | ||
400 | bbox, text = span['bbox'], span['text'] | ||
401 | if anchor_bbox[2]<np.mean(bbox[::2])<half_width and \ | ||
402 | anchor_bbox[1]<np.mean(bbox[1::2])<anchor_bbox[3]: | ||
403 | seller['position'] = bbox | ||
404 | seller['words'] = text | ||
405 | return seller | ||
406 | |||
407 | def get_payback_account(self): | ||
408 | account = self.item.copy() | ||
409 | account_name = self.item.copy() | ||
410 | account_bank = self.item.copy() | ||
411 | all_text = '' | ||
412 | for block in self.pdf_info['1']['blocks']: | ||
413 | if block['type'] != 0: | ||
414 | continue | ||
415 | for line in block['lines']: | ||
416 | for span in line['spans']: | ||
417 | bbox, text = span['bbox'], span['text'] | ||
418 | all_text += text | ||
419 | # 首先确定账户信息是哪种,我们只输出非另行通知的格式 | ||
420 | if '☑账号' in all_text: | ||
421 | all_text = all_text.replace(' ', '') | ||
422 | matchs_1 = re.findall(r'账号:(.*)户名', all_text) | ||
423 | if matchs_1: | ||
424 | words = matchs_1[0] | ||
425 | for block in self.pdf_info['1']['blocks']: | ||
426 | if block['type'] != 0: | ||
427 | continue | ||
428 | for line in block['lines']: | ||
429 | for span in line['spans']: | ||
430 | bbox, text = span['bbox'], span['text'] | ||
431 | if f'{words}' in text: | ||
432 | account['position'] = bbox | ||
433 | account['words'] = words | ||
434 | matchs_2 = re.findall(r'户名:(.*)开户行', all_text) | ||
435 | if matchs_2: | ||
436 | words = matchs_2[0] | ||
437 | for block in self.pdf_info['1']['blocks']: | ||
438 | if block['type'] != 0: | ||
439 | continue | ||
440 | for line in block['lines']: | ||
441 | for span in line['spans']: | ||
442 | bbox, text = span['bbox'], span['text'] | ||
443 | if f'{words}' in text: | ||
444 | account_name['position'] = bbox | ||
445 | account_name['words'] = words | ||
446 | matchs_3 = re.findall(r'开户行:(.*);', all_text) | ||
447 | if matchs_3: | ||
448 | words = matchs_3[0] | ||
449 | for block in self.pdf_info['1']['blocks']: | ||
450 | if block['type'] != 0: | ||
451 | continue | ||
452 | for line in block['lines']: | ||
453 | for span in line['spans']: | ||
454 | bbox, text = span['bbox'], span['text'] | ||
455 | if f'开户行:{words};' in text.replace(' ', ''): | ||
456 | account_bank['position'] = bbox | ||
457 | account_bank['words'] = words | ||
458 | return account, account_name, account_bank | ||
459 | |||
460 | def get_repayment_schedule(self): | ||
461 | repayment_schedule = self.item.copy() | ||
462 | |||
463 | repayment_schedule_text_list = [] | ||
464 | table = False | ||
465 | page = None | ||
466 | left = 0 | ||
467 | right = 0 | ||
468 | for pno in self.pdf_info: | ||
469 | for block in self.pdf_info[pno]['blocks']: | ||
470 | if block['type'] != 0: | ||
471 | continue | ||
472 | for line in block['lines']: | ||
473 | for span in line['spans']: | ||
474 | bbox, text = span['bbox'], span['text'] | ||
475 | if '剩余融资' in text: | ||
476 | right = bbox[2] | ||
477 | if '以上表格中所列序号' in text: | ||
478 | table = False | ||
479 | if table == True: | ||
480 | # 过滤汉字 | ||
481 | if re.compile(r'[\u4e00-\u9fff]').search(text): | ||
482 | continue | ||
483 | # 过滤 1. - 61. 这些标题 | ||
484 | if re.findall("\d+", text): | ||
485 | if len(re.findall("\d+", text)) == 1: | ||
486 | continue | ||
487 | if not left < bbox[0] < right: | ||
488 | continue | ||
489 | repayment_schedule_text_list.append(text) | ||
490 | |||
491 | if text.strip() == "61.": | ||
492 | page = pno | ||
493 | table = True | ||
494 | left = bbox[0] | ||
495 | # print("repayment_schedule_text_list = ", repayment_schedule_text_list) | ||
496 | # repayment_schedule_table = [['序号', '融资租赁成本', '融资租赁费用', '租金', '剩余融资租赁成本']] | ||
497 | repayment_schedule_table = [['序号', '租金']] | ||
498 | for i in range(len(repayment_schedule_text_list)//4): | ||
499 | line = [f'{i+1}.'] | ||
500 | # 4表示4列的意思 | ||
501 | for j in range(4): | ||
502 | line.append(repayment_schedule_text_list[i*4+j]) | ||
503 | |||
504 | # 只保留序号和租金列 | ||
505 | line = [line[0].replace('.', ''), line[3]] | ||
506 | |||
507 | repayment_schedule_table.append(line) | ||
508 | |||
509 | repayment_schedule['words'] = repayment_schedule_table | ||
510 | repayment_schedule['page'] = page | ||
511 | return repayment_schedule | ||
512 | |||
513 | def get_signature_role_1(self): | ||
514 | signature_role_1 = self.item.copy() | ||
515 | for pno in self.pdf_info: | ||
516 | for block in self.pdf_info[pno]['blocks']: | ||
517 | if block['type'] != 0: | ||
518 | continue | ||
519 | for line in block['lines']: | ||
520 | for span in line['spans']: | ||
521 | bbox, text = span['bbox'], span['text'] | ||
522 | if '签署日期' in text: | ||
523 | signature_role_1['position'] = bbox | ||
524 | signature_role_1['page'] = pno | ||
525 | signature_role_1['words'] = text | ||
526 | return signature_role_1 | ||
527 | |||
528 | def get_signature_role_2(self): | ||
529 | signature_role_2 = self.init_item.copy() | ||
530 | # 先定位签字区域 | ||
531 | texts = [] | ||
532 | boxes = [] | ||
533 | page_num = None | ||
534 | position = None | ||
535 | words = None | ||
536 | region = False | ||
537 | for i in list(self.pdf_info.keys()): | ||
538 | for block in self.pdf_info[i]['blocks']: | ||
539 | if block['type'] != 0: | ||
540 | continue | ||
541 | for line in block['lines']: | ||
542 | for span in line['spans']: | ||
543 | bbox, text = span['bbox'], span['text'] | ||
544 | if '共同借款人(共同抵押人)' in text: | ||
545 | region = True | ||
546 | if '日期' in text: | ||
547 | region = False | ||
548 | if region == True: | ||
549 | page_num = i | ||
550 | texts.append(text) | ||
551 | boxes.append(bbox) | ||
552 | if len(texts) > 4: | ||
553 | words = '有' | ||
554 | else: | ||
555 | words = '无' | ||
556 | boxes = np.array(boxes).reshape((-1, 2)) | ||
557 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | ||
558 | signature_role_2['page_num'] = page_num | ||
559 | signature_role_2['position'] = position | ||
560 | signature_role_2['words'] = words | ||
561 | return signature_role_2 | ||
562 | |||
563 | def get_signature_role_3(self): | ||
564 | signature_role_3 = self.init_item.copy() | ||
565 | # 先定位签字区域 | ||
566 | texts = [] | ||
567 | boxes = [] | ||
568 | page_num = None | ||
569 | position = None | ||
570 | words = None | ||
571 | region = False | ||
572 | for i in list(self.pdf_info.keys()): | ||
573 | for block in self.pdf_info[i]['blocks']: | ||
574 | if block['type'] != 0: | ||
575 | continue | ||
576 | for line in block['lines']: | ||
577 | for span in line['spans']: | ||
578 | bbox, text = span['bbox'], span['text'] | ||
579 | if '保证人1' in text and int(i) != 0: | ||
580 | region = True | ||
581 | if '日期' in text: | ||
582 | region = False | ||
583 | if region == True: | ||
584 | page_num = i | ||
585 | texts.append(text) | ||
586 | boxes.append(bbox) | ||
587 | if len(texts) > 4: | ||
588 | words = '有' | ||
589 | else: | ||
590 | words = '无' | ||
591 | boxes = np.array(boxes).reshape((-1, 2)) | ||
592 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | ||
593 | signature_role_3['page_num'] = page_num | ||
594 | signature_role_3['position'] = position | ||
595 | signature_role_3['words'] = words | ||
596 | return signature_role_3 | ||
597 | |||
598 | def get_signature_role_4(self): | ||
599 | signature_role_4 = self.init_item.copy() | ||
600 | # 先定位签字区域 | ||
601 | texts = [] | ||
602 | boxes = [] | ||
603 | page_num = None | ||
604 | position = None | ||
605 | words = None | ||
606 | region = False | ||
607 | for i in list(self.pdf_info.keys()): | ||
608 | for block in self.pdf_info[i]['blocks']: | ||
609 | if block['type'] != 0: | ||
610 | continue | ||
611 | for line in block['lines']: | ||
612 | for span in line['spans']: | ||
613 | bbox, text = span['bbox'], span['text'] | ||
614 | if '保证人2' in text and int(i) != 0: | ||
615 | region = True | ||
616 | if '日期' in text: | ||
617 | region = False | ||
618 | if region == True: | ||
619 | page_num = i | ||
620 | texts.append(text) | ||
621 | boxes.append(bbox) | ||
622 | if len(texts) > 4: | ||
623 | words = '有' | ||
624 | else: | ||
625 | words = '无' | ||
626 | boxes = np.array(boxes).reshape((-1, 2)) | ||
627 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | ||
628 | signature_role_4['page_num'] = page_num | ||
629 | signature_role_4['position'] = position | ||
630 | signature_role_4['words'] = words | ||
631 | return signature_role_4 | ||
632 | |||
633 | def get_signature_role_5(self): | ||
634 | signature_role_5 = self.init_item.copy() | ||
635 | # 先定位签字区域 | ||
636 | texts = [] | ||
637 | boxes = [] | ||
638 | page_num = None | ||
639 | position = None | ||
640 | words = None | ||
641 | region = False | ||
642 | for i in list(self.pdf_info.keys()): | ||
643 | for block in self.pdf_info[i]['blocks']: | ||
644 | if block['type'] != 0: | ||
645 | continue | ||
646 | for line in block['lines']: | ||
647 | for span in line['spans']: | ||
648 | bbox, text = span['bbox'], span['text'] | ||
649 | if '见证人签字' in text and int(i) != 0: | ||
650 | region = True | ||
651 | if '年' in text: | ||
652 | region = False | ||
653 | if region == True: | ||
654 | page_num = i | ||
655 | texts.append(text) | ||
656 | boxes.append(bbox) | ||
657 | # print(texts) | ||
658 | if len(texts) > 4: | ||
659 | words = '有' | ||
660 | else: | ||
661 | words = '无' | ||
662 | boxes = np.array(boxes).reshape((-1, 2)) | ||
663 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | ||
664 | signature_role_5['page_num'] = page_num | ||
665 | signature_role_5['position'] = position | ||
666 | signature_role_5['words'] = words | ||
667 | return signature_role_5 | ||
668 | |||
669 | def get_last_page_signature(self, page_num, top, bottom): | ||
670 | signature_name = self.item.copy() | ||
671 | signature_date = self.item.copy() | ||
672 | anchor_top = None | ||
673 | anchor_bottom = None | ||
674 | for block in self.pdf_info[page_num]['blocks']: | ||
675 | if block['type'] != 0: | ||
676 | continue | ||
677 | for line in block['lines']: | ||
678 | for span in line['spans']: | ||
679 | bbox, text = span['bbox'], span['text'] | ||
680 | if top in text: | ||
681 | anchor_top = bbox[1] | ||
682 | if bottom in text: | ||
683 | anchor_bottom = bbox[1] | ||
684 | if anchor_top is not None and anchor_bottom is not None: | ||
685 | for block in self.pdf_info[page_num]['blocks']: | ||
686 | if block['type'] != 0: | ||
687 | continue | ||
688 | for line in block['lines']: | ||
689 | for span in line['spans']: | ||
690 | bbox, text = span['bbox'], span['text'] | ||
691 | if '签署日期' in text and int(anchor_top)<np.mean(bbox[1::2])<int(anchor_bottom): | ||
692 | name = text.split(' ')[0] | ||
693 | date = text.split(':')[-1] | ||
694 | signature_name['words'] = name | ||
695 | signature_name['position'] = bbox | ||
696 | signature_date['words'] = date | ||
697 | signature_name['position'] = bbox | ||
698 | return signature_name, signature_date | ||
699 | |||
700 | def get_electronic_signature(self, top, bottom, t_pno=None): | ||
701 | signature = self.item.copy() | ||
702 | anchor_top = None | ||
703 | anchor_bottom = None | ||
704 | for pno in self.pdf_info: | ||
705 | if t_pno is not None and pno != t_pno: | ||
706 | continue | ||
707 | for block in self.pdf_info[pno]['blocks']: | ||
708 | if block['type'] != 0: | ||
709 | continue | ||
710 | for line in block['lines']: | ||
711 | for span in line['spans']: | ||
712 | bbox, text = span['bbox'], span['text'] | ||
713 | if top in text: | ||
714 | anchor_top = bbox[1] | ||
715 | elif bottom in text and anchor_top is not None and bbox[3] > anchor_top: | ||
716 | anchor_bottom = bbox[3] | ||
717 | if anchor_top is not None and anchor_bottom is not None: | ||
718 | # print('in') | ||
719 | # print(anchor_top) | ||
720 | # print(anchor_bottom) | ||
721 | for pno in self.pdf_info: | ||
722 | if t_pno is not None and pno != t_pno: | ||
723 | continue | ||
724 | for block in self.pdf_info[pno]['blocks']: | ||
725 | if block['type'] != 0: | ||
726 | continue | ||
727 | for line in block['lines']: | ||
728 | for span in line['spans']: | ||
729 | bbox, text = span['bbox'], span['text'] | ||
730 | # ------------ # | ||
731 | # print("--text = ", text) | ||
732 | if '签署日期' in text and int(anchor_top)<np.mean(bbox[1::2])<int(anchor_bottom): | ||
733 | words = text | ||
734 | signature['words'] = words | ||
735 | signature['page'] = pno | ||
736 | signature['position'] = bbox | ||
737 | return signature | ||
738 | |||
739 | def get_role_info(self, role_key, page_num='0'): | ||
740 | name = self.item.copy() | ||
741 | id_num = self.item.copy() | ||
742 | representative = self.item.copy() | ||
743 | |||
744 | # 以保证人3 的左上角为定位点 | ||
745 | anchor = None | ||
746 | for block in self.pdf_info[page_num]['blocks']: | ||
747 | if block['type'] != 0: | ||
748 | continue | ||
749 | for line in block['lines']: | ||
750 | for span in line['spans']: | ||
751 | bbox, text = span['bbox'], span['text'] | ||
752 | # 找到角色姓名 | ||
753 | if re.match('保证人3', text) is not None: | ||
754 | anchor = [bbox[0], bbox[1]] | ||
755 | |||
756 | if anchor is not None: | ||
757 | for block in self.pdf_info[page_num]['blocks']: | ||
758 | if block['type'] != 0: | ||
759 | continue | ||
760 | for line in block['lines']: | ||
761 | for span in line['spans']: | ||
762 | bbox, text = span['bbox'], span['text'] | ||
763 | # 找到角色姓名 | ||
764 | if re.match(role_key, text) is not None: | ||
765 | words = text.split(':')[-1] | ||
766 | name['words'] = words | ||
767 | name['page'] = page_num | ||
768 | name['position'] = bbox | ||
769 | if role_key == '承租人:': | ||
770 | # 找到证件号码且确定位置 | ||
771 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]: | ||
772 | words = text.split(':')[-1] | ||
773 | id_num['words'] = words | ||
774 | id_num['page'] = page_num | ||
775 | id_num['position'] = bbox | ||
776 | # 找到法人代表且确定位置 | ||
777 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]: | ||
778 | words = text.split(':')[-1] | ||
779 | representative['words'] = words | ||
780 | representative['page'] = page_num | ||
781 | representative['position'] = bbox | ||
782 | if role_key == '保证人1:': | ||
783 | # 找到证件号码且确定位置 | ||
784 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]: | ||
785 | words = text.split(':')[-1] | ||
786 | id_num['words'] = words | ||
787 | id_num['page'] = page_num | ||
788 | id_num['position'] = bbox | ||
789 | # 找到法人代表且确定位置 | ||
790 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]: | ||
791 | words = text.split(':')[-1] | ||
792 | representative['words'] = words | ||
793 | representative['page'] = page_num | ||
794 | representative['position'] = bbox | ||
795 | if role_key == '保证人2:': | ||
796 | # 找到证件号码且确定位置 | ||
797 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]: | ||
798 | words = text.split(':')[-1] | ||
799 | id_num['words'] = words | ||
800 | id_num['page'] = page_num | ||
801 | id_num['position'] = bbox | ||
802 | # 找到法人代表且确定位置 | ||
803 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]: | ||
804 | words = text.split(':')[-1] | ||
805 | representative['words'] = words | ||
806 | representative['page'] = page_num | ||
807 | representative['position'] = bbox | ||
808 | if role_key == '保证人3:': | ||
809 | # 找到证件号码且确定位置 | ||
810 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]: | ||
811 | words = text.split(':')[-1] | ||
812 | id_num['words'] = words | ||
813 | id_num['page'] = page_num | ||
814 | id_num['position'] = bbox | ||
815 | # 找到法人代表且确定位置 | ||
816 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]: | ||
817 | words = text.split(':')[-1] | ||
818 | representative['words'] = words | ||
819 | representative['page'] = page_num | ||
820 | representative['position'] = bbox | ||
821 | return name, id_num, representative | ||
822 | |||
823 | def get_table_add_product(self): | ||
824 | table_add_product = self.item.copy() | ||
825 | |||
826 | add_product_page_num = None | ||
827 | for pno in self.pdf_info: | ||
828 | for block in self.pdf_info[f'{pno}']['blocks']: | ||
829 | if block['type'] != 0: | ||
830 | continue | ||
831 | for line in block['lines']: | ||
832 | for span in line['spans']: | ||
833 | bbox, text = span['bbox'], span['text'] | ||
834 | if '车辆附加产品(明细见下表)' in text: | ||
835 | add_product_page_num = pno | ||
836 | ocr_results = [] | ||
837 | for block in self.pdf_info[f'{add_product_page_num}']['blocks']: | ||
838 | if block['type'] != 0: | ||
839 | continue | ||
840 | for line in block['lines']: | ||
841 | for span in line['spans']: | ||
842 | bbox, text = span['bbox'], span['text'] | ||
843 | xmin, ymin, xmax, ymax = bbox | ||
844 | bbox = [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax] | ||
845 | ocr_results.append([bbox, text]) | ||
846 | |||
847 | lines = [['项目', '购买价格', '实际融资金额']] | ||
848 | |||
849 | key_xm = None | ||
850 | key_gmjg = None | ||
851 | key_sjrzje = None | ||
852 | key_total = None | ||
853 | |||
854 | for index, span in enumerate(ocr_results): | ||
855 | if span[1] == '项目': | ||
856 | key_xm = index | ||
857 | if span[1] == '购买价格': | ||
858 | key_gmjg = index | ||
859 | if span[1] == '实际融资金额': | ||
860 | key_sjrzje = index | ||
861 | if span[1] == '总计': | ||
862 | key_total = index | ||
863 | |||
864 | bbox, text = ocr_results[key_xm] | ||
865 | rh = abs(bbox[1]-bbox[-1]) | ||
866 | anchor = np.array(bbox).reshape((-1, 2)) | ||
867 | anchor[:, 0] += 2*rh | ||
868 | anchor[:, 1] += rh | ||
869 | |||
870 | for i in range(5): | ||
871 | for span in ocr_results: | ||
872 | iou = caculate_iou(anchor, span[0]) | ||
873 | if iou > 0.01 and span[1].strip() != '所购': | ||
874 | x = get_table_info(span[0], ocr_results[key_gmjg][0], ocr_results) | ||
875 | y = get_table_info(span[0], ocr_results[key_sjrzje][0], ocr_results) | ||
876 | line = [span[1].replace('\u3000', ' '), x, y] | ||
877 | # print(line) | ||
878 | lines.append(line) | ||
879 | anchor = np.array(span[0]).reshape((-1, 2)) | ||
880 | anchor[:, 1] += rh | ||
881 | |||
882 | total = get_table_info(ocr_results[key_total][0], ocr_results[key_sjrzje][0], ocr_results) | ||
883 | lines.append(['总计', '', total]) | ||
884 | |||
885 | # 所购 BMW悦然焕 | ||
886 | # 新服务 | ||
887 | |||
888 | # 所购 BMW5年10 | ||
889 | # 万公里长悦保养套餐 | ||
890 | |||
891 | # 所购 事故维修补偿 | ||
892 | # 方案 | ||
893 | |||
894 | # 所购 BMW5年10万公里 | ||
895 | # 长悦保养套餐 | ||
896 | |||
897 | # 所购 MINI4年6万公里长悦 | ||
898 | # 保养套餐 | ||
899 | |||
900 | filtered_lines = [] | ||
901 | for line in lines: | ||
902 | if line[0][:2] not in ['所购', '项目', '总计']: | ||
903 | continue | ||
904 | if 'BMW悦然' in line[0]: | ||
905 | line[0] = '所购 BMW悦然焕新服务' | ||
906 | if 'BMW5年10' in line[0]: | ||
907 | line[0] = '所购 BMW5年10万公里长悦保养套餐' | ||
908 | if '事故维修补' in line[0]: | ||
909 | line[0] = '所购 事故维修补偿方案' | ||
910 | if 'MINI4年6万公里长悦' in line[0]: | ||
911 | line[0] = '所购 MINI4年6万公里长悦保养套餐' | ||
912 | filtered_lines.append(line) | ||
913 | table_add_product['words'] = filtered_lines | ||
914 | table_add_product['page'] = add_product_page_num | ||
915 | table_add_product['position'] = None | ||
916 | return table_add_product | ||
917 | |||
918 | def get_contract_no_dy(self): | ||
919 | # 查找抵押合同编号 | ||
920 | contract_no = self.item.copy() | ||
921 | |||
922 | key_box = None | ||
923 | for pno in self.pdf_info: | ||
924 | for block in self.pdf_info[pno]['blocks']: | ||
925 | if block['type'] != 0: | ||
926 | continue | ||
927 | for line in block['lines']: | ||
928 | for span in line['spans']: | ||
929 | bbox, text = span['bbox'], span['text'] | ||
930 | if '抵押合同编号' in text: | ||
931 | key_box = bbox | ||
932 | |||
933 | if key_box is not None: | ||
934 | for pno in self.pdf_info: | ||
935 | for block in self.pdf_info[pno]['blocks']: | ||
936 | if block['type'] != 0: | ||
937 | continue | ||
938 | for line in block['lines']: | ||
939 | for span in line['spans']: | ||
940 | bbox, text = span['bbox'], span['text'] | ||
941 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3] and 'CH-' in text: | ||
942 | contract_no['position'] = bbox | ||
943 | contract_no['page'] = pno | ||
944 | contract_no['words'] = text | ||
945 | return contract_no | ||
946 | |||
947 | def get_dyr_name_id(self): | ||
948 | name = self.item.copy() | ||
949 | _id = self.item.copy() | ||
950 | |||
951 | key_box = None | ||
952 | for pno in self.pdf_info: | ||
953 | for block in self.pdf_info[pno]['blocks']: | ||
954 | if block['type'] != 0: | ||
955 | continue | ||
956 | for line in block['lines']: | ||
957 | for span in line['spans']: | ||
958 | bbox, text = span['bbox'], span['text'] | ||
959 | if text == '抵押人': | ||
960 | key_box = bbox | ||
961 | |||
962 | if key_box is not None: | ||
963 | rh = abs(key_box[1]-key_box[3]) | ||
964 | for pno in self.pdf_info: | ||
965 | for block in self.pdf_info[pno]['blocks']: | ||
966 | if block['type'] != 0: | ||
967 | continue | ||
968 | for line in block['lines']: | ||
969 | for span in line['spans']: | ||
970 | bbox, text = span['bbox'], span['text'] | ||
971 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '姓名' in text: | ||
972 | words = text.split(':')[-1] | ||
973 | name['position'] = bbox | ||
974 | name['page'] = pno | ||
975 | name['words'] = words | ||
976 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '证件号码' in text: | ||
977 | words = text.split(':')[-1] | ||
978 | _id['position'] = bbox | ||
979 | _id['page'] = pno | ||
980 | _id['words'] = words | ||
981 | return name, _id | ||
982 | |||
983 | def get_dyrpo_name_id(self): | ||
984 | name = self.item.copy() | ||
985 | _id = self.item.copy() | ||
986 | |||
987 | key_box = None | ||
988 | for pno in self.pdf_info: | ||
989 | for block in self.pdf_info[pno]['blocks']: | ||
990 | if block['type'] != 0: | ||
991 | continue | ||
992 | for line in block['lines']: | ||
993 | for span in line['spans']: | ||
994 | bbox, text = span['bbox'], span['text'] | ||
995 | if text == '抵押人配偶(如适': | ||
996 | key_box = bbox | ||
997 | |||
998 | if key_box is not None: | ||
999 | rh = abs(key_box[1]-key_box[3]) | ||
1000 | for pno in self.pdf_info: | ||
1001 | for block in self.pdf_info[pno]['blocks']: | ||
1002 | if block['type'] != 0: | ||
1003 | continue | ||
1004 | for line in block['lines']: | ||
1005 | for span in line['spans']: | ||
1006 | bbox, text = span['bbox'], span['text'] | ||
1007 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '姓名' in text: | ||
1008 | words = text.split(':')[-1] | ||
1009 | name['position'] = bbox | ||
1010 | name['page'] = pno | ||
1011 | name['words'] = words | ||
1012 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '证件号码' in text: | ||
1013 | words = text.split(':')[-1] | ||
1014 | _id['position'] = bbox | ||
1015 | _id['page'] = pno | ||
1016 | _id['words'] = words.strip() | ||
1017 | return name, _id | ||
1018 | |||
1019 | def get_key_value_position(self, key): | ||
1020 | value = self.item.copy() | ||
1021 | |||
1022 | key_box = None | ||
1023 | for pno in self.pdf_info: | ||
1024 | for block in self.pdf_info[pno]['blocks']: | ||
1025 | if block['type'] != 0: | ||
1026 | continue | ||
1027 | for line in block['lines']: | ||
1028 | for span in line['spans']: | ||
1029 | bbox, text = span['bbox'], span['text'] | ||
1030 | if text == key: | ||
1031 | key_box = bbox | ||
1032 | |||
1033 | if key_box is not None: | ||
1034 | rh = abs(key_box[1]-key_box[3]) | ||
1035 | for pno in self.pdf_info: | ||
1036 | for block in self.pdf_info[pno]['blocks']: | ||
1037 | if block['type'] != 0: | ||
1038 | continue | ||
1039 | for line in block['lines']: | ||
1040 | for span in line['spans']: | ||
1041 | bbox, text = span['bbox'], span['text'] | ||
1042 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3] and key_box[0] < bbox[0] and abs(key_box[2]-bbox[0]) < rh*10: | ||
1043 | words = text | ||
1044 | value['position'] = bbox | ||
1045 | value['page'] = pno | ||
1046 | value['words'] = words | ||
1047 | return value | ||
1048 | |||
1049 | def get_role_info_3_3(self, role_key, page_num='0'): | ||
1050 | name = self.item.copy() | ||
1051 | id_num = self.item.copy() | ||
1052 | representative = self.item.copy() | ||
1053 | |||
1054 | # 以保证人2 的左上角为定位点 | ||
1055 | anchor = None | ||
1056 | for block in self.pdf_info[page_num]['blocks']: | ||
1057 | if block['type'] != 0: | ||
1058 | continue | ||
1059 | for line in block['lines']: | ||
1060 | for span in line['spans']: | ||
1061 | bbox, text = span['bbox'], span['text'] | ||
1062 | # 找到角色姓名 | ||
1063 | if re.match('保证人2', text) is not None: | ||
1064 | anchor = [bbox[0], bbox[1]] | ||
1065 | |||
1066 | if anchor is not None: | ||
1067 | for block in self.pdf_info[page_num]['blocks']: | ||
1068 | if block['type'] != 0: | ||
1069 | continue | ||
1070 | for line in block['lines']: | ||
1071 | for span in line['spans']: | ||
1072 | bbox, text = span['bbox'], span['text'] | ||
1073 | # 找到角色姓名 | ||
1074 | if re.match(role_key, text) is not None: | ||
1075 | words = text.split(':')[-1] | ||
1076 | name['words'] = words | ||
1077 | name['page'] = page_num | ||
1078 | name['position'] = bbox | ||
1079 | if role_key == '承租人一:': | ||
1080 | # 找到证件号码且确定位置 | ||
1081 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]: | ||
1082 | words = text.split(':')[-1] | ||
1083 | id_num['words'] = words | ||
1084 | id_num['page'] = page_num | ||
1085 | id_num['position'] = bbox | ||
1086 | # 找到法人代表且确定位置 | ||
1087 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]: | ||
1088 | words = text.split(':')[-1] | ||
1089 | representative['words'] = words | ||
1090 | representative['page'] = page_num | ||
1091 | representative['position'] = bbox | ||
1092 | if role_key == '共同承租人:': | ||
1093 | # 找到证件号码且确定位置 | ||
1094 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]: | ||
1095 | words = text.split(':')[-1] | ||
1096 | id_num['words'] = words | ||
1097 | id_num['page'] = page_num | ||
1098 | id_num['position'] = bbox | ||
1099 | # 找到法人代表且确定位置 | ||
1100 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]: | ||
1101 | words = text.split(':')[-1] | ||
1102 | representative['words'] = words | ||
1103 | representative['page'] = page_num | ||
1104 | representative['position'] = bbox | ||
1105 | if role_key == '保证人1:': | ||
1106 | # 找到证件号码且确定位置 | ||
1107 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]: | ||
1108 | words = text.split(':')[-1] | ||
1109 | id_num['words'] = words | ||
1110 | id_num['page'] = page_num | ||
1111 | id_num['position'] = bbox | ||
1112 | # 找到法人代表且确定位置 | ||
1113 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]: | ||
1114 | words = text.split(':')[-1] | ||
1115 | representative['words'] = words | ||
1116 | representative['page'] = page_num | ||
1117 | representative['position'] = bbox | ||
1118 | if role_key == '保证人2:': | ||
1119 | # 找到证件号码且确定位置 | ||
1120 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]: | ||
1121 | words = text.split(':')[-1] | ||
1122 | id_num['words'] = words | ||
1123 | id_num['page'] = page_num | ||
1124 | id_num['position'] = bbox | ||
1125 | # 找到法人代表且确定位置 | ||
1126 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]: | ||
1127 | words = text.split(':')[-1] | ||
1128 | representative['words'] = words | ||
1129 | representative['page'] = page_num | ||
1130 | representative['position'] = bbox | ||
1131 | return name, id_num, representative | ||
1132 | |||
1133 | def get_value_by_findall(self, prefix, suffix, page_num): | ||
1134 | value = self.item.copy() | ||
1135 | all_text = '' | ||
1136 | pno = page_num | ||
1137 | for block in self.pdf_info[pno]['blocks']: | ||
1138 | if block['type'] != 0: | ||
1139 | continue | ||
1140 | for line in block['lines']: | ||
1141 | for span in line['spans']: | ||
1142 | bbox, text = span['bbox'], span['text'] | ||
1143 | all_text += text | ||
1144 | words_list = re.findall(f"{prefix}(.*?){suffix}", all_text) | ||
1145 | if len(words_list) > 0: | ||
1146 | for block in self.pdf_info[pno]['blocks']: | ||
1147 | if block['type'] != 0: | ||
1148 | continue | ||
1149 | for line in block['lines']: | ||
1150 | for span in line['spans']: | ||
1151 | bbox, text = span['bbox'], span['text'] | ||
1152 | if words_list[0] in text: | ||
1153 | value['position'] = bbox | ||
1154 | value['page'] = pno | ||
1155 | value['words'] = words_list[0] | ||
1156 | return value | ||
1157 | |||
1158 | def get_info(self): | ||
1159 | """ | ||
1160 | block['type'] == 0 : 表示该元素为图片 | ||
1161 | |||
1162 | Returns: | ||
1163 | dict: Description | ||
1164 | """ | ||
1165 | if len(self.pdf_info) > 0: | ||
1166 | # 取 Page 1 上的合同编号 | ||
1167 | contract_no = self.get_contract_no(page_num='0') | ||
1168 | self.init_result['合同编号'] = contract_no | ||
1169 | |||
1170 | # 粗略判断是否是 ‘车贷分离版本’ 的合同 | ||
1171 | is_cdfl = False | ||
1172 | for block in self.pdf_info['0']['blocks']: | ||
1173 | if block['type'] != 0: | ||
1174 | continue | ||
1175 | for line in block['lines']: | ||
1176 | for span in line['spans']: | ||
1177 | bbox, text = span['bbox'], span['text'] | ||
1178 | if '共同承租人:' in text: | ||
1179 | is_cdfl = True | ||
1180 | |||
1181 | if is_cdfl == False: | ||
1182 | # 从第一页上取四个角色的姓名和证件号码 | ||
1183 | name, id_num, representative = self.get_role_info(role_key='承租人:', page_num='0') | ||
1184 | |||
1185 | if name["words"] == None: | ||
1186 | name, id_num, representative = self.get_role_info_3_3(role_key='承租人一:', page_num='0') | ||
1187 | self.init_result['承租人-姓名'] = name | ||
1188 | self.init_result['承租人-证件号码'] = id_num | ||
1189 | self.init_result['承租人-法定代表人或授权代表'] = representative | ||
1190 | |||
1191 | name, id_num, representative = self.get_role_info(role_key='保证人1:', page_num='0') | ||
1192 | self.init_result['保证人1-姓名'] = name | ||
1193 | self.init_result['保证人1-证件号码'] = id_num | ||
1194 | self.init_result['保证人1-法定代表人或授权代表'] = representative | ||
1195 | # if条件判别 对应3_3版本 | ||
1196 | if name["words"] == None: | ||
1197 | name, id_num, representative = self.get_role_info_3_3(role_key='共同承租人:', page_num='0') | ||
1198 | self.init_result['共同承租人-姓名'] = name | ||
1199 | self.init_result['共同承租人-证件号码'] = id_num | ||
1200 | self.init_result['共同承租人-法定代表人或授权代表'] = representative | ||
1201 | |||
1202 | name, id_num, representative = self.get_role_info(role_key='保证人2:', page_num='0') | ||
1203 | self.init_result['保证人2-姓名'] = name | ||
1204 | self.init_result['保证人2-证件号码'] = id_num | ||
1205 | self.init_result['保证人2-法定代表人或授权代表'] = representative | ||
1206 | # if条件判别 对应3_3版本 | ||
1207 | if name["words"] == None: | ||
1208 | name, id_num, representative = self.get_role_info_3_3(role_key='保证人1:', page_num='0') | ||
1209 | self.init_result['保证人2-姓名'] = name | ||
1210 | self.init_result['保证人2-证件号码'] = id_num | ||
1211 | self.init_result['保证人2-法定代表人或授权代表'] = representative | ||
1212 | |||
1213 | name, id_num, representative = self.get_role_info(role_key='保证人3:', page_num='0') | ||
1214 | self.init_result['保证人3-姓名'] = name | ||
1215 | self.init_result['保证人3-证件号码'] = id_num | ||
1216 | self.init_result['保证人3-法定代表人或授权代表'] = representative | ||
1217 | if name["words"] == None: | ||
1218 | name, id_num, representative = self.get_role_info_3_3(role_key='保证人2:', page_num='0') | ||
1219 | self.init_result['保证人3-姓名'] = name | ||
1220 | self.init_result['保证人3-证件号码'] = id_num | ||
1221 | self.init_result['保证人3-法定代表人或授权代表'] = representative | ||
1222 | else: | ||
1223 | name, id_num, representative = self.get_role_info_3_3(role_key='承租人一:', page_num='0') | ||
1224 | self.init_result['承租人-姓名'] = name | ||
1225 | self.init_result['承租人-证件号码'] = id_num | ||
1226 | self.init_result['承租人-法定代表人或授权代表'] = representative | ||
1227 | |||
1228 | name, id_num, representative = self.get_role_info_3_3(role_key='共同承租人:', page_num='0') | ||
1229 | self.init_result['共同承租人-姓名'] = name | ||
1230 | self.init_result['共同承租人-证件号码'] = id_num | ||
1231 | self.init_result['共同承租人-法定代表人或授权代表'] = representative | ||
1232 | |||
1233 | name, id_num, representative = self.get_role_info_3_3(role_key='保证人1:', page_num='0') | ||
1234 | self.init_result['保证人1-姓名'] = name | ||
1235 | self.init_result['保证人1-证件号码'] = id_num | ||
1236 | self.init_result['保证人1-法定代表人或授权代表'] = representative | ||
1237 | |||
1238 | name, id_num, representative = self.get_role_info_3_3(role_key='保证人2:', page_num='0') | ||
1239 | self.init_result['保证人2-姓名'] = name | ||
1240 | self.init_result['保证人2-证件号码'] = id_num | ||
1241 | self.init_result['保证人2-法定代表人或授权代表'] = representative | ||
1242 | |||
1243 | # 在所有页面中找正文中(第二部分 融资租赁主要条款及付款计划)的那个编号,因为存在换行的情况所以暂时不带位置输出 | ||
1244 | contract_no = self.get_contract_no_one() | ||
1245 | self.init_result['合同编号(正文)'] = contract_no | ||
1246 | # 找到车辆识别代码 | ||
1247 | vin = self.get_key_value(key='车辆识别代码:') | ||
1248 | self.init_result['车辆识别代码'] = vin | ||
1249 | # 找到经销商(车辆卖方(经销商)) | ||
1250 | seller = self.get_key_value(key='车辆卖方(经销商):') | ||
1251 | if seller['words'] == None: | ||
1252 | seller = self.get_key_value(key='车辆卖方:') | ||
1253 | self.init_result['车辆卖方(经销商)'] = seller | ||
1254 | # 找到车辆代理商 | ||
1255 | cldls = self.get_key_value(key='车辆代理商', page_num='4') | ||
1256 | self.init_result['车辆代理商'] = cldls | ||
1257 | # 找到 —— 车辆原始销售价格 | ||
1258 | vehicle_price = self.get_key_value(key='车辆原始销售价格(《机动车销售统一发票》所列金额):') | ||
1259 | self.init_result['车辆原始销售价格(《机动车销售统一发票》所列金额)'] = vehicle_price | ||
1260 | # 找车辆附加产品明细(表) | ||
1261 | table_add_product = self.get_table_add_product() | ||
1262 | self.init_result['车辆附加产品明细表'] = table_add_product | ||
1263 | # 找融资成本总额 | ||
1264 | financing_cost = self.get_key_value(key='融资成本总额:') | ||
1265 | self.init_result['融资成本总额'] = financing_cost | ||
1266 | # 找租期 | ||
1267 | lease_term = self.get_key_value(key='租期:') | ||
1268 | self.init_result['租期'] = lease_term | ||
1269 | # 找还款计划(表) | ||
1270 | repayment_schedule = self.get_repayment_schedule() | ||
1271 | self.init_result['付款计划表'] = repayment_schedule | ||
1272 | # 找承租人收款账户户名、银行账号、银行 | ||
1273 | name = self.get_key_value(key='户名:', page_num='4') | ||
1274 | self.init_result['收款银行账户-户名'] = name | ||
1275 | account = self.get_key_value(key='银行账号:', page_num='4') | ||
1276 | self.init_result['收款银行账户-银行账号'] = account | ||
1277 | bank = self.get_key_value(key='开户银行:', page_num='4') | ||
1278 | self.init_result['收款银行账户-开户行'] = bank | ||
1279 | # 找承租人扣款账户户名、银行账号、银行 | ||
1280 | name = self.get_key_value(key='户名:', page_num='5') | ||
1281 | self.init_result['银行账户-户名'] = name | ||
1282 | account = self.get_key_value(key='银行账号:', page_num='5') | ||
1283 | self.init_result['银行账户-银行账号'] = account | ||
1284 | bank = self.get_key_value(key='开户银行:', page_num='5') | ||
1285 | self.init_result['银行账户-开户行'] = bank | ||
1286 | |||
1287 | # 找签字页上的系列信息 | ||
1288 | # 承租人姓名、签章 | ||
1289 | if is_cdfl == False: | ||
1290 | name = self.get_key_value(key='承租人姓名:') | ||
1291 | electronic_signature = self.get_electronic_signature(top='承租人姓名:', bottom='保证人1姓名:', t_pno='5') | ||
1292 | |||
1293 | if name["words"] == None: | ||
1294 | name = self.get_key_value(key='承租人一姓名:') | ||
1295 | electronic_signature = self.get_electronic_signature(top='承租人一姓名:', bottom='共同承租人名称:', t_pno='5') | ||
1296 | |||
1297 | self.init_result['签字页-承租人姓名'] = name | ||
1298 | self.init_result['签字页-承租人签章'] = electronic_signature | ||
1299 | # 保证人1姓名、签章 | ||
1300 | name = self.get_key_value(key='保证人1姓名:') | ||
1301 | electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:', t_pno='5') | ||
1302 | self.init_result['签字页-保证人1姓名'] = name | ||
1303 | self.init_result['签字页-保证人1签章'] = electronic_signature | ||
1304 | # 这里用的是 name["words"] == "" | ||
1305 | if name["words"] == "": | ||
1306 | name = self.get_key_value(key='共同承租人名称:') | ||
1307 | electronic_signature = self.get_electronic_signature(top='共同承租人名称:', bottom='保证人1姓名:', t_pno='5') | ||
1308 | self.init_result['签字页-共同承租人姓名'] = name | ||
1309 | self.init_result['签字页-共同承租人签章'] = electronic_signature | ||
1310 | # 保证人2姓名、签章 | ||
1311 | name = self.get_key_value(key='保证人2姓名:') | ||
1312 | electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='保证人3姓名:', t_pno='5') | ||
1313 | self.init_result['签字页-保证人2姓名'] = name | ||
1314 | self.init_result['签字页-保证人2签章'] = electronic_signature | ||
1315 | # if判断条件对应3_3版本 | ||
1316 | if name["words"] == "": | ||
1317 | name = self.get_key_value(key='保证人1姓名:') | ||
1318 | electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:', t_pno='5') | ||
1319 | self.init_result['签字页-保证人1姓名'] = name | ||
1320 | self.init_result['签字页-保证人1签章'] = electronic_signature | ||
1321 | # 保证人3姓名、签章 | ||
1322 | name = self.get_key_value(key='保证人3姓名:') | ||
1323 | electronic_signature = self.get_electronic_signature(top='保证人3姓名:', bottom='日期:', t_pno='5') | ||
1324 | self.init_result['签字页-保证人3姓名'] = name | ||
1325 | self.init_result['签字页-保证人3签章'] = electronic_signature | ||
1326 | # if判断条件对应3_3版本 | ||
1327 | if name["words"] == None: | ||
1328 | name = self.get_key_value(key='保证人2姓名:') | ||
1329 | electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='日期:', t_pno='5') | ||
1330 | self.init_result['签字页-保证人2姓名'] = name | ||
1331 | self.init_result['签字页-保证人2签章'] = electronic_signature | ||
1332 | else: | ||
1333 | name = self.get_key_value(key='承租人一姓名:') | ||
1334 | electronic_signature = self.get_electronic_signature(top='承租人一姓名:', bottom='共同承租人名称:', t_pno='5') | ||
1335 | self.init_result['签字页-承租人姓名'] = name | ||
1336 | self.init_result['签字页-承租人签章'] = electronic_signature | ||
1337 | |||
1338 | name = self.get_key_value(key='共同承租人名称:') | ||
1339 | electronic_signature = self.get_electronic_signature(top='共同承租人名称:', bottom='保证人1姓名:', t_pno='5') | ||
1340 | self.init_result['签字页-共同承租人姓名'] = name | ||
1341 | self.init_result['签字页-共同承租人签章'] = electronic_signature | ||
1342 | |||
1343 | name = self.get_key_value(key='保证人1姓名:') | ||
1344 | electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:', t_pno='5') | ||
1345 | self.init_result['签字页-保证人1姓名'] = name | ||
1346 | self.init_result['签字页-保证人1签章'] = electronic_signature | ||
1347 | |||
1348 | name = self.get_key_value(key='保证人2姓名:') | ||
1349 | electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='保证人3姓名:', t_pno='5') | ||
1350 | self.init_result['签字页-保证人2姓名'] = name | ||
1351 | self.init_result['签字页-保证人2签章'] = electronic_signature | ||
1352 | |||
1353 | return self.init_result | ||
1354 | |||
1355 | def get_info_1(self): | ||
1356 | if len(self.pdf_info) > 0: | ||
1357 | contract_no = self.get_contract_no(page_num='0') | ||
1358 | self.init_result_1['合同编号'] = contract_no | ||
1359 | # 承租人姓名 | ||
1360 | name = self.get_key_value(key='承租人:', page_num='0') | ||
1361 | self.init_result_1['承租人-姓名'] = name | ||
1362 | # 承租人证件号码 | ||
1363 | _id = self.get_key_value(key='证件号码:', page_num='0') | ||
1364 | self.init_result_1['承租人-证件号码'] = _id | ||
1365 | # 销售经销商 | ||
1366 | seller = self.get_key_value(key='销售经销商:', page_num='0') | ||
1367 | if seller['words'] == "": | ||
1368 | seller = self.get_value_by_findall('销售经销商:', '地址:', page_num='0') | ||
1369 | self.init_result_1['销售经销商'] = seller | ||
1370 | # 合同编号(正文) | ||
1371 | contract_no = self.get_contract_no_one() | ||
1372 | self.init_result_1['合同编号(正文)'] = contract_no | ||
1373 | # 签字页-承租人姓名 | ||
1374 | name = self.get_key_value(key='姓名/名称:') | ||
1375 | self.init_result_1['签字页-承租人姓名'] = name | ||
1376 | # 签字页-承租人证件号码 | ||
1377 | _id = self.get_key_value(key='自然人身份证件号码/法人执照号码:') | ||
1378 | self.init_result_1['签字页-承租人证件号码'] = _id | ||
1379 | # 签字页-承租人签章 | ||
1380 | signature_role_1 = self.get_signature_role_1() | ||
1381 | self.init_result_1['签字页-承租人签章'] = signature_role_1 | ||
1382 | # 签字页-销售经销商 | ||
1383 | seller = self.get_key_value(key='销售经销商:') | ||
1384 | if seller['words'] == "": | ||
1385 | # 销售经销商:深圳市宝创汽车贸易有限公司南山分公司(请授权代表签字并请盖章) | ||
1386 | seller = self.get_value_by_findall('销售经销商:', '(请授权代表签字并请盖章)', page_num='3') | ||
1387 | self.init_result_1['签字页-销售经销商'] = seller | ||
1388 | # 经销商签章 | ||
1389 | pass | ||
1390 | return self.init_result_1 | ||
1391 | |||
1392 | def get_info_2(self): | ||
1393 | if len(self.pdf_info) > 0: | ||
1394 | contract_no = self.get_contract_no_dy() | ||
1395 | self.init_result_2['合同编号'] = contract_no | ||
1396 | # 合同编号(正文) | ||
1397 | contract_no = self.get_contract_no_one() | ||
1398 | self.init_result_2['合同编号(正文)'] = contract_no | ||
1399 | # 抵押人姓名/名称 | ||
1400 | name, _id = self.get_dyr_name_id() | ||
1401 | self.init_result_2['抵押人姓名/名称'] = name | ||
1402 | self.init_result_2['抵押人证件号码'] = _id | ||
1403 | # 抵押人配偶信息 | ||
1404 | name, _id = self.get_dyrpo_name_id() | ||
1405 | self.init_result_2['抵押人配偶姓名/名称'] = name | ||
1406 | self.init_result_2['抵押人配偶证件号码'] = _id | ||
1407 | # 车辆识别代码 | ||
1408 | vin = self.get_key_value(key='车辆识别代码:') | ||
1409 | self.init_result_2['车辆识别代码'] = vin | ||
1410 | # 租金总额 | ||
1411 | rent = self.get_key_value_position(key='租金总额') | ||
1412 | self.init_result_2['租金总额'] = rent | ||
1413 | # 融资租赁期限 | ||
1414 | lease_term = self.get_key_value_position(key='融资租赁期限') | ||
1415 | self.init_result_2['融资租赁期限'] = lease_term | ||
1416 | # 签字页抵押人姓名和签章 | ||
1417 | name = self.get_key_value(key='抵押人姓名:') | ||
1418 | electronic_signature = self.get_electronic_signature(top='抵押权人盖章', bottom='抵押人配偶姓名:', t_pno='1') | ||
1419 | self.init_result_2['签字页-抵押人姓名'] = name | ||
1420 | self.init_result_2['签字页-抵押人签章'] = electronic_signature | ||
1421 | # 签字页抵押人配偶姓名和签章 | ||
1422 | name = self.get_key_value(key='抵押人配偶姓名:') | ||
1423 | electronic_signature = self.get_electronic_signature(top='抵押人配偶姓名:', bottom='日期', t_pno='1') | ||
1424 | self.init_result_2['签字页-抵押人配偶姓名'] = name | ||
1425 | self.init_result_2['签字页-抵押人配偶签章'] = electronic_signature | ||
1426 | return self.init_result_2 | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
... | @@ -6,9 +6,10 @@ | ... | @@ -6,9 +6,10 @@ |
6 | # @Description : | 6 | # @Description : |
7 | 7 | ||
8 | from .get_char import Finder | 8 | from .get_char import Finder |
9 | from .get_char_fsm import Finder as FSMFinder | ||
9 | 10 | ||
10 | 11 | ||
11 | def predict(pdf_info, file_cls): | 12 | def predict(pdf_info, file_cls, is_fsm=False): |
12 | """Summary | 13 | """Summary |
13 | 14 | ||
14 | Args: | 15 | Args: |
... | @@ -58,6 +59,10 @@ def predict(pdf_info, file_cls): | ... | @@ -58,6 +59,10 @@ def predict(pdf_info, file_cls): |
58 | pdf_info = dict() | 59 | pdf_info = dict() |
59 | for pno, page_info in enumerate(pdf_info_1): | 60 | for pno, page_info in enumerate(pdf_info_1): |
60 | pdf_info[str(pno)] = page_info | 61 | pdf_info[str(pno)] = page_info |
62 | |||
63 | if is_fsm: | ||
64 | f = FSMFinder(pdf_info) | ||
65 | else: | ||
61 | f = Finder(pdf_info) | 66 | f = Finder(pdf_info) |
62 | if file_cls == 0: | 67 | if file_cls == 0: |
63 | results = f.get_info() | 68 | results = f.get_info() | ... | ... |
src/common/fsm_econtract/const.py
0 → 100644
1 | WEP_FIELD = { | ||
2 | "0": { | ||
3 | 'keys': { | ||
4 | '客户姓名': [('客户姓名', (r'^姓名.?$', r'^企业名称.?$'), 'top1', {})], | ||
5 | '证件类型': [('证件类型', (r'^证件类型.?$', ), 'top1', {})], | ||
6 | '证件号码': [('证件号码', (r'^证件号码.?$', r'^统一社会信用代码.?$'), 'top1', {})], | ||
7 | '合同价格(小写)': [('人民币', (r'^人民币¥.?$', ), 'top1', {})], | ||
8 | '客户签名': [('客户签名/盖章', (r'^客户签名/盖章.*$', ), 'top1', {})], | ||
9 | '签单日期': [('签单日期', (r'^签单日期.*签单日期.?$', ), 'top1', {})], | ||
10 | }, | ||
11 | 'value': { | ||
12 | '客户姓名': ('text', 'right', {'offset_tuple': (-1.1, 1, 0.3, 0)}, ''), | ||
13 | '证件类型': ('text', 'right', {'offset_tuple': (-1, 1, 0, 0)}, ''), | ||
14 | '证件号码': ('text', 'right', {'offset_tuple': (-1, 2, 0.3, 0)}, ''), | ||
15 | '合同价格(小写)': ('text', 'right', {'offset_tuple': (-1, 1, 0.3, 0)}, ''), | ||
16 | '客户签名': ('img', 'under', {'offset_tuple': (0, 0, 0, 4), 'rigorous': True}, '无'), | ||
17 | '签单日期': ('img', 'right', {'offset_tuple': (0, 0, 1.1, 0), 'rigorous': True}, '无'), | ||
18 | }, | ||
19 | } | ||
20 | |||
21 | } | ||
22 | |||
23 | MSI_FIELD = { | ||
24 | "0": { | ||
25 | 'keys': { | ||
26 | '客户姓名': [('客户姓名', (r'^客户姓名.?$', r'^企业名称.?$'), 'top1', {})], | ||
27 | '证件类型': [('证件类型', (r'^证件类型.?$', ), 'top1', {})], | ||
28 | '证件号码': [('证件号码', (r'^证件号码.?$', r'^统一社会信用代码.?$'), 'top1', {})], | ||
29 | '合同价格(小写)': [('人民币', (r'^人民币¥.?$', ), 'top1', {})], | ||
30 | }, | ||
31 | 'value': { | ||
32 | '客户姓名': ('text', 'right', {'offset_tuple': (-1.2, 1, 0.3, 0)}, ''), | ||
33 | '证件类型': ('text', 'right', {'offset_tuple': (-1, 1, 0, 0)}, ''), | ||
34 | '证件号码': ('text', 'right', {'offset_tuple': (-1, 2, 0.3, 0)}, ''), | ||
35 | '合同价格(小写)': ('text', 'right', {'offset_tuple': (-1, 1, 0.3, 0)}, ''), | ||
36 | }, | ||
37 | }, | ||
38 | "1": { | ||
39 | 'keys': { | ||
40 | '客户签名': [('客户签名/盖章', (r'^客户签名/盖章.*$', ), 'top1', {})], | ||
41 | '签单日期': [('签单日期', (r'^签单日期.*签单日期.?$', ), 'top1', {})], | ||
42 | }, | ||
43 | 'value': { | ||
44 | '客户签名': ('img', 'under', {'offset_tuple': (0, 0, 0, 4), 'rigorous': True}, '无'), | ||
45 | '签单日期': ('img', 'right', {'offset_tuple': (0, 0, 1.1, 0), 'rigorous': True}, '无'), | ||
46 | }, | ||
47 | } | ||
48 | } | ||
49 | |||
50 | SC_FIELD = { | ||
51 | "0": { | ||
52 | 'keys': { | ||
53 | '姓名': [('姓名', (r'^姓名.?$', r'^企业名称.?$'), 'top1', {})], | ||
54 | '证件类型': [('证件类型', (r'^证件类型.?$', ), 'top1', {})], | ||
55 | '证件号码': [('证件号码', (r'^证件号码.?$', r'^统一社会信用代码.?$'), 'top1', {})], | ||
56 | '总价': [('总价', (r'^总价.?$', ), 'top1', {})], | ||
57 | }, | ||
58 | 'value': { | ||
59 | '姓名': ('text', 'right', {'offset_tuple': (-2, 8, 0.5, 0)}, ''), | ||
60 | '证件类型': ('text', 'right', {'offset_tuple': (-2, 6, 0.5, 0)}, ''), | ||
61 | '证件号码': ('text', 'right', {'offset_tuple': (-2, 6, 0.5, 0)}, ''), | ||
62 | '总价': ('text', 'right', {'offset_tuple': (-2, 12, 0.5, 0)}, ''), | ||
63 | }, | ||
64 | }, | ||
65 | "-1": { | ||
66 | 'keys': { | ||
67 | '客户签名': [('客户签名/盖章', (r'^客户签名/盖章.*$', r'^客户签名/盖章.*$'), 'top1', {})], | ||
68 | '签单日期': [('签单日期', (r'^签单日期.*签单日期.?$', ), 'top1', {})], | ||
69 | }, | ||
70 | 'value': { | ||
71 | '客户签名': ('img', 'under', {'offset_tuple': (1.5, 1, 0, 4), 'rigorous': True}, '无'), | ||
72 | '签单日期': ('img', 'right', {'offset_tuple': (0, 0, 1.1, 0), 'rigorous': True}, '无'), | ||
73 | }, | ||
74 | } | ||
75 | } |
src/common/fsm_econtract/fsm_contract_ocr.py
0 → 100644
1 | from .retriever import Retriever | ||
2 | from .const import WEP_FIELD, MSI_FIELD, SC_FIELD | ||
3 | from .tools import pdf_info_rebuild | ||
4 | |||
5 | retriever_list = [Retriever(WEP_FIELD), Retriever(MSI_FIELD), Retriever(SC_FIELD)] | ||
6 | |||
7 | def predict(pdf_info, file_type=0): | ||
8 | retriever = retriever_list[file_type] | ||
9 | pdf_text_list, pdf_img_list = pdf_info_rebuild(pdf_info) | ||
10 | return retriever.get_target_fields(pdf_text_list, pdf_img_list) | ||
11 | |||
12 |
src/common/fsm_econtract/hmh_ocr.py
0 → 100644
src/common/fsm_econtract/retriever.py
0 → 100644
1 | import re | ||
2 | |||
3 | |||
4 | class HMHRetriever: | ||
5 | |||
6 | def __init__(self): | ||
7 | self.words_str = 'words' | ||
8 | self.position_str = 'location' | ||
9 | self.fix_hava_str = '有' | ||
10 | self.default_position = [0, 0, 0, 0] | ||
11 | self.search_fields_list = [ | ||
12 | ('借款/承租人姓名', ''), | ||
13 | ('证件号码', ''), | ||
14 | ('渠道', ''), | ||
15 | ('合同编号', ''), | ||
16 | ('借款人签字/盖章', '无'), | ||
17 | ] | ||
18 | |||
19 | def get_target_fields(self, pdf_text_list): | ||
20 | result = dict() | ||
21 | is_find_name_id_company, is_find_application_no, is_find_name_date = False, False, False | ||
22 | for bbox, text in pdf_text_list.pop(str(0), []): | ||
23 | # print(text) | ||
24 | if not is_find_name_id_company: | ||
25 | name_id_company_list = re.findall(r'姓名(.*)证件号码(.*)与(.*公司)', text) | ||
26 | for name_id_company_tuple in name_id_company_list: | ||
27 | if len(name_id_company_tuple) == 3: | ||
28 | result[self.search_fields_list[0][0]] = { | ||
29 | self.words_str: name_id_company_tuple[0].replace('\u3000', '').strip(), | ||
30 | self.position_str: bbox | ||
31 | } | ||
32 | result[self.search_fields_list[1][0]] = { | ||
33 | self.words_str: name_id_company_tuple[1].replace('\u3000', '').replace(')', '').replace(')', '').strip(), | ||
34 | self.position_str: bbox | ||
35 | } | ||
36 | result[self.search_fields_list[2][0]] = { | ||
37 | self.words_str: name_id_company_tuple[2], | ||
38 | self.position_str: bbox | ||
39 | } | ||
40 | is_find_name_id_company = True | ||
41 | break | ||
42 | if not is_find_application_no: | ||
43 | application_no_list = re.findall(r'合同编号.*(CH-B\d*-\d*).*', text) | ||
44 | if len(application_no_list) == 1: | ||
45 | result[self.search_fields_list[3][0]] = { | ||
46 | self.words_str: application_no_list[0], | ||
47 | self.position_str: bbox | ||
48 | } | ||
49 | is_find_application_no = True | ||
50 | if not is_find_name_date: | ||
51 | name_date_list = re.findall(r'(.*).*签署日期.*(\d{4}-\d{2}-\d{2})', text) | ||
52 | for name_date_tuple in name_date_list: | ||
53 | if len(name_date_tuple) == 2: | ||
54 | result[self.search_fields_list[4][0]] = { | ||
55 | # self.words_str: '{0} {1}'.format(name_date_tuple[0].replace('\u3000', '').strip(), name_date_tuple[1]), | ||
56 | self.words_str: self.fix_hava_str, | ||
57 | self.position_str: bbox | ||
58 | } | ||
59 | is_find_name_date = True | ||
60 | break | ||
61 | |||
62 | for find_key, default_value in self.search_fields_list: | ||
63 | if find_key not in result: | ||
64 | result[find_key] = { | ||
65 | self.words_str: default_value, | ||
66 | self.position_str: self.default_position, | ||
67 | } | ||
68 | # simple_result = [] | ||
69 | # for key, value_dict in result.items(): | ||
70 | # simple_result.append((key, value_dict[self.words_str])) | ||
71 | |||
72 | # return simple_result | ||
73 | return {"words_result": result} | ||
74 | |||
75 | class Retriever: | ||
76 | |||
77 | def __init__(self, target_fields): | ||
78 | self.keys_str = 'keys' | ||
79 | self.value_str = 'value' | ||
80 | self.text_str = 'text' | ||
81 | self.words_str = 'words' | ||
82 | self.position_str = 'position' | ||
83 | self.default_position = [-1, -1, -1, -1] | ||
84 | self.target_fields = target_fields | ||
85 | self.replace_map = { | ||
86 | 'int': { | ||
87 | '(': '0' | ||
88 | } | ||
89 | } | ||
90 | |||
91 | @staticmethod | ||
92 | def key_top1(coordinates_list, key_coordinates): | ||
93 | # 关键词查找方向:最上面 | ||
94 | coordinates_list.sort(key=lambda x: x[1]) | ||
95 | return coordinates_list[0] | ||
96 | |||
97 | def key_right(self, coordinates_list, key_coordinates, offset_tuple, rigorous=False): | ||
98 | # 关键词查找方向:右侧 | ||
99 | if len(coordinates_list) == 1: | ||
100 | return coordinates_list[0] | ||
101 | |||
102 | # 没有上一层关键词的坐标时,返回最上面的坐标 | ||
103 | if key_coordinates is None: | ||
104 | return self.key_top1(coordinates_list, key_coordinates) | ||
105 | |||
106 | x_min, y_min, x_max, y_max = self.get_target_bbox(key_coordinates, offset_tuple) | ||
107 | |||
108 | x_min_find, find_key_coordinates = None, None | ||
109 | for x0, y0, x1, y1 in coordinates_list: | ||
110 | if rigorous: | ||
111 | is_eligible = x_min < x0 and x1 < x_max and y_min < y0 and y1 < y_max | ||
112 | else: | ||
113 | cent_x = x0 + ((x1 - x0) / 2) | ||
114 | cent_y = y0 + ((y1 - y0) / 2) | ||
115 | is_eligible = x_min < cent_x < x_max and y_min < cent_y < y_max | ||
116 | if is_eligible: | ||
117 | if x_min_find is None or x0 < x_min_find: | ||
118 | x_min_find = x0 | ||
119 | find_key_coordinates = (x0, y0, x1, y1) | ||
120 | |||
121 | if find_key_coordinates is None: | ||
122 | return self.key_top1(coordinates_list, key_coordinates) | ||
123 | else: | ||
124 | return find_key_coordinates | ||
125 | |||
126 | def value_right(self, search_list, key_coordinates, offset_tuple, value_type=None, rigorous=False): | ||
127 | # 字段值查找方向:右侧 | ||
128 | x_min, y_min, x_max, y_max = self.get_target_bbox(key_coordinates, offset_tuple) | ||
129 | |||
130 | x_min_find, value, coordinates = None, None, None | ||
131 | for (x0, y0, x1, y1), text in search_list: | ||
132 | if rigorous: | ||
133 | is_eligible = x_min < x0 and x1 < x_max and y_min < y0 and y1 < y_max | ||
134 | else: | ||
135 | cent_x = x0 + ((x1 - x0) / 2) | ||
136 | cent_y = y0 + ((y1 - y0) / 2) | ||
137 | is_eligible = x_min < cent_x < x_max and y_min < cent_y < y_max | ||
138 | if is_eligible: | ||
139 | if x_min_find is None or x0 < x_min_find: | ||
140 | if len(text.strip()) > 0: | ||
141 | x_min_find = x0 | ||
142 | value = text | ||
143 | coordinates = (x0, y0, x1, y1) | ||
144 | |||
145 | if isinstance(value_type, str) and value_type in self.replace_map and isinstance(value, str): | ||
146 | new_value = value.translate(str.maketrans(self.replace_map.get(value_type, {}))) | ||
147 | return new_value, coordinates | ||
148 | |||
149 | return value, coordinates | ||
150 | |||
151 | def value_under(self, search_list, key_coordinates, offset_tuple, value_type=None, append=False, rigorous=False): | ||
152 | # 字段值查找方向:下方 | ||
153 | x_min, y_min, x_max, y_max = self.get_target_bbox(key_coordinates, offset_tuple) | ||
154 | |||
155 | find_list = [] | ||
156 | for (x0, y0, x1, y1), text in search_list: | ||
157 | if rigorous: | ||
158 | is_eligible = x_min < x0 and x1 < x_max and y_min < y0 and y1 < y_max | ||
159 | else: | ||
160 | cent_x = x0 + ((x1 - x0) / 2) | ||
161 | cent_y = y0 + ((y1 - y0) / 2) | ||
162 | is_eligible = x_min < cent_x < x_max and y_min < cent_y < y_max | ||
163 | if is_eligible: | ||
164 | if len(text.strip()) > 0: | ||
165 | find_list.append((x0, y0, x1, y1, text)) | ||
166 | |||
167 | if len(find_list) == 0: | ||
168 | return None, None | ||
169 | else: | ||
170 | find_list.sort(key=lambda x: (x[1], x[0])) | ||
171 | coordinates = find_list[0][:-1] | ||
172 | if append: | ||
173 | value = ''.join([text for _, _, _, _, text in find_list]) | ||
174 | else: | ||
175 | value = find_list[0][-1] | ||
176 | |||
177 | if isinstance(value_type, str) and value_type in self.replace_map and isinstance(value, str): | ||
178 | new_value = value.translate(str.maketrans(self.replace_map.get(value_type, {}))) | ||
179 | return new_value, coordinates | ||
180 | |||
181 | return value, coordinates | ||
182 | |||
183 | @staticmethod | ||
184 | def get_target_bbox(key_coordinates, offset_tuple): | ||
185 | offset_xmin, offset_xmax, offset_ymin, offset_ymax = offset_tuple | ||
186 | |||
187 | width = key_coordinates[2] - key_coordinates[0] | ||
188 | height = key_coordinates[-1] - key_coordinates[1] | ||
189 | |||
190 | x_min = key_coordinates[0] - (width * offset_xmin) # -1 | ||
191 | x_max = key_coordinates[2] + (width * offset_xmax) | ||
192 | y_min = key_coordinates[1] - (height * offset_ymin) # -1 | ||
193 | y_max = key_coordinates[-1] + (height * offset_ymax) | ||
194 | return x_min, y_min, x_max, y_max | ||
195 | |||
196 | def get_target_fields(self, pdf_text_list, pdf_img_list): | ||
197 | pdf_result = dict() | ||
198 | |||
199 | for pno_str, fields_dict in self.target_fields.items(): | ||
200 | is_last_pno = False | ||
201 | if pno_str == '-1': | ||
202 | is_last_pno = True | ||
203 | pno_int_list = [int(pno_str) for pno_str in pdf_text_list.keys()] | ||
204 | pno_str = str(max(pno_int_list)) | ||
205 | |||
206 | # 搜索关键词 | ||
207 | key_text_info = dict() | ||
208 | for key_text_list in fields_dict[self.keys_str].values(): | ||
209 | for key_text, key_re_tuple, _, _ in key_text_list: | ||
210 | for (x0, y0, x1, y1), text in pdf_text_list.get(pno_str, []): | ||
211 | for key_re in key_re_tuple: | ||
212 | if re.match(key_re, text): | ||
213 | key_text_info.setdefault(key_text, list()).append((x0, y0, x1, y1)) | ||
214 | |||
215 | # 搜索关键词 | ||
216 | key_coordinates_info = dict() | ||
217 | for field, key_text_list in fields_dict[self.keys_str].items(): | ||
218 | last_key_coordinates = None | ||
219 | for key_text, _, direction, kwargs in key_text_list: | ||
220 | if key_text not in key_text_info: | ||
221 | last_key_coordinates = None | ||
222 | continue | ||
223 | last_key_coordinates = getattr(self, 'key_{0}'.format(direction))( | ||
224 | key_text_info[key_text], | ||
225 | last_key_coordinates, | ||
226 | **kwargs) | ||
227 | |||
228 | key_coordinates_info[field] = last_key_coordinates | ||
229 | |||
230 | # 搜索字段值 | ||
231 | page_result = dict() | ||
232 | for field, (source, direction, kwargs, default_value) in fields_dict[self.value_str].items(): | ||
233 | if not isinstance(key_coordinates_info.get(field), tuple): | ||
234 | page_result[field] = { | ||
235 | self.words_str: default_value, | ||
236 | self.position_str: self.default_position, | ||
237 | } | ||
238 | continue | ||
239 | value, coordinates = getattr(self, 'value_{0}'.format(direction))( | ||
240 | pdf_text_list.get(pno_str, []) if source == self.text_str else pdf_img_list.get(pno_str, []), | ||
241 | key_coordinates_info[field], | ||
242 | **kwargs | ||
243 | ) | ||
244 | if not isinstance(value, str): | ||
245 | page_result[field] = { | ||
246 | self.words_str: default_value, | ||
247 | self.position_str: self.default_position, | ||
248 | } | ||
249 | else: | ||
250 | page_result[field] = { | ||
251 | self.words_str: value, | ||
252 | self.position_str: list(coordinates), | ||
253 | } | ||
254 | |||
255 | page_key = 'page_12' if is_last_pno else 'page_{0}'.format(int(pno_str) + 1) | ||
256 | pdf_result[page_key] = page_result | ||
257 | |||
258 | return pdf_result |
src/common/fsm_econtract/tools.py
0 → 100644
1 | def pdf_info_rebuild(pdf_info, fix_bbox=True): | ||
2 | pdf_text_info = dict() | ||
3 | pdf_img_info = dict() | ||
4 | for pno_str, page_info in pdf_info.items(): | ||
5 | text_set = set() | ||
6 | for block in page_info['blocks']: | ||
7 | if block['type'] == 0: | ||
8 | # text有重复的现象 | ||
9 | text_set.clear() | ||
10 | for line in block['lines']: | ||
11 | for span in line['spans']: | ||
12 | bbox, text = span['bbox'], span['text'].strip() | ||
13 | if len(text) != 0 and text not in text_set: | ||
14 | text_set.add(text) | ||
15 | # bbox的高,不准 | ||
16 | if fix_bbox and bbox[-1] - bbox[1] < span['size']: | ||
17 | bbox[-1] = bbox[-1] + span['size'] | ||
18 | pdf_text_info.setdefault(pno_str, list()).append([bbox, text]) | ||
19 | elif block['type'] == 1: | ||
20 | pdf_img_info.setdefault(pno_str, list()).append((block['bbox'], '有')) | ||
21 | |||
22 | return pdf_text_info, pdf_img_info | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
src/common/tools/mssql_script24.py
0 → 100644
1 | import pyodbc | ||
2 | |||
3 | hil_sql = """ | ||
4 | ALTER TABLE hil_ocr_result ADD fsm_wep_ocr nvarchar(max); | ||
5 | ALTER TABLE hil_ocr_result ADD fsm_msi_ocr nvarchar(max); | ||
6 | ALTER TABLE hil_ocr_result ADD fsm_sc_ocr nvarchar(max); | ||
7 | ALTER TABLE hil_se_ocr_result ADD fsm_wep_ocr nvarchar(max); | ||
8 | ALTER TABLE hil_se_ocr_result ADD fsm_msi_ocr nvarchar(max); | ||
9 | ALTER TABLE hil_se_ocr_result ADD fsm_sc_ocr nvarchar(max); | ||
10 | """ | ||
11 | |||
12 | afc_sql = """ | ||
13 | ALTER TABLE afc_ocr_result ADD fsm_wep_ocr nvarchar(max); | ||
14 | ALTER TABLE afc_ocr_result ADD fsm_msi_ocr nvarchar(max); | ||
15 | ALTER TABLE afc_ocr_result ADD fsm_sc_ocr nvarchar(max); | ||
16 | ALTER TABLE afc_se_ocr_result ADD fsm_wep_ocr nvarchar(max); | ||
17 | ALTER TABLE afc_se_ocr_result ADD fsm_msi_ocr nvarchar(max); | ||
18 | ALTER TABLE afc_se_ocr_result ADD fsm_sc_ocr nvarchar(max); | ||
19 | """ | ||
20 | |||
21 | hil_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit=True) | ||
22 | |||
23 | hil_cursor = hil_cnxn.cursor() | ||
24 | hil_cursor.execute(hil_sql) | ||
25 | |||
26 | hil_cursor.close() | ||
27 | hil_cnxn.close() | ||
28 | |||
29 | afc_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit=True) | ||
30 | |||
31 | afc_cursor = afc_cnxn.cursor() | ||
32 | afc_cursor.execute(afc_sql) | ||
33 | |||
34 | afc_cursor.close() | ||
35 | afc_cnxn.close() |
... | @@ -8,13 +8,16 @@ from common.tools.comparison import cp | ... | @@ -8,13 +8,16 @@ from common.tools.comparison import cp |
8 | from common.mixins import LoggerMixin | 8 | from common.mixins import LoggerMixin |
9 | from rest_framework.permissions import IsAuthenticated | 9 | from rest_framework.permissions import IsAuthenticated |
10 | from apps.account.authentication import OAuth2AuthenticationWithUser | 10 | from apps.account.authentication import OAuth2AuthenticationWithUser |
11 | from apps.doc.models import NscInvoice | ||
12 | import json | ||
13 | from datetime import datetime | ||
11 | 14 | ||
12 | params = { | 15 | params = { |
13 | 'invoiceCode': fields.Str(required=True, validate=validate.Length(max=128)), | 16 | 'invoiceCode': fields.Str(required=True, validate=validate.Length(max=128)), |
14 | 'invoiceNumber': fields.Str(required=True, validate=validate.Length(max=64)), | 17 | 'invoiceNumber': fields.Str(required=True, validate=validate.Length(max=64)), |
15 | 'issueDate': CustomDate(required=True), | 18 | 'issueDate': CustomDate(required=True), |
16 | 'buyerName': fields.Str(required=True, validate=validate.Length(max=64)), | 19 | 'buyerName': fields.Str(required=True, validate=validate.Length(max=64)), |
17 | "buyerId": fields.Int(required=True), | 20 | "buyerId": fields.Str(required=True, validate=validate.Length(max=64)), |
18 | 'vin': fields.Str(required=True, validate=validate.Length(max=128)), | 21 | 'vin': fields.Str(required=True, validate=validate.Length(max=128)), |
19 | 'dealer': fields.Str(required=False, validate=validate.Length(max=64)), | 22 | 'dealer': fields.Str(required=False, validate=validate.Length(max=64)), |
20 | 'priceWithVat': CustomDecimal(required=True), | 23 | 'priceWithVat': CustomDecimal(required=True), |
... | @@ -29,7 +32,7 @@ input_args = { | ... | @@ -29,7 +32,7 @@ input_args = { |
29 | } | 32 | } |
30 | 33 | ||
31 | 34 | ||
32 | # poss 接口接收NSC 发票信息 | 35 | # pos 接口接收NSC 发票信息 |
33 | class NSCInvoiceView(GenericView): | 36 | class NSCInvoiceView(GenericView): |
34 | permission_classes = [IsAuthenticated] | 37 | permission_classes = [IsAuthenticated] |
35 | authentication_classes = [OAuth2AuthenticationWithUser] | 38 | authentication_classes = [OAuth2AuthenticationWithUser] |
... | @@ -50,6 +53,7 @@ class NSCInvoiceView(GenericView): | ... | @@ -50,6 +53,7 @@ class NSCInvoiceView(GenericView): |
50 | vat = content.get('vat', 0.0) | 53 | vat = content.get('vat', 0.0) |
51 | vat_rate = content.get('vatRate', 0.0) | 54 | vat_rate = content.get('vatRate', 0.0) |
52 | 55 | ||
56 | NscInvoice.objects.create(vin=vin, content=json.dumps(content), create_time=datetime.now()) | ||
53 | return response.ok() | 57 | return response.ok() |
54 | 58 | ||
55 | 59 | ||
... | @@ -90,11 +94,17 @@ class DeMortgageView(GenericView): | ... | @@ -90,11 +94,17 @@ class DeMortgageView(GenericView): |
90 | 'applicationName': application_name, | 94 | 'applicationName': application_name, |
91 | 'deMortgageDate': de_mortgage_date | 95 | 'deMortgageDate': de_mortgage_date |
92 | } | 96 | } |
93 | de_mortgage_info = {} | 97 | de_mortgage_info = {'customer_name':'','applicationName':'','deMortgageDate':''} |
94 | # 绿本必须分开ocr | 98 | # 绿本必须分开ocr |
95 | for file_obj in files: | 99 | for file_obj in files: |
96 | info = PosHandler.de_mortgage_ocr_process1(file_obj) | 100 | info = PosHandler.de_mortgage_ocr_process1(file_obj) |
97 | de_mortgage_info.update(info) | 101 | if info.get('customerName') is not '': |
102 | de_mortgage_info['customerName'] = info.get('customerName') | ||
103 | if info.get('applicationName') is not '': | ||
104 | de_mortgage_info['applicationName'] = info.get('applicationName') | ||
105 | if info.get('deMortgageDate') is not '': | ||
106 | de_mortgage_info['deMortgageDate'] = info.get('deMortgageDate') | ||
107 | #de_mortgage_info.update(info) | ||
98 | 108 | ||
99 | request_pass = True | 109 | request_pass = True |
100 | fields_result = [] | 110 | fields_result = [] | ... | ... |
-
Please register or sign in to post a comment