Merge branch 'feature/uat-tmp' into 'master'
Feature/uat tmp See merge request !18
Showing
19 changed files
with
3341 additions
and
48 deletions
| ... | @@ -10,8 +10,8 @@ PAGE_SIZE_DEFAULT = 10 | ... | @@ -10,8 +10,8 @@ PAGE_SIZE_DEFAULT = 10 |
| 10 | 10 | ||
| 11 | FIXED_APPLICATION_ID_PREFIX = 'CH-S' | 11 | FIXED_APPLICATION_ID_PREFIX = 'CH-S' |
| 12 | 12 | ||
| 13 | DOC_SCHEME_LIST = ['ACCEPTANCE', 'SETTLEMENT', 'CONTRACTMANAGEMENT'] | 13 | DOC_SCHEME_LIST = ['ACCEPTANCE', 'SETTLEMENT', 'CONTRACTMANAGEMENT', 'INSURANCE'] |
| 14 | DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT'] | 14 | DATA_SOURCE_LIST = ['POS', 'EAPP', 'ECONTRACT', 'OVP'] |
| 15 | COMPARE_DOC_SCHEME_LIST = ['CA', 'SE'] | 15 | COMPARE_DOC_SCHEME_LIST = ['CA', 'SE'] |
| 16 | 16 | ||
| 17 | HIL_PREFIX = 'HIL' | 17 | HIL_PREFIX = 'HIL' |
| ... | @@ -1057,7 +1057,25 @@ HIL_CONTRACT_2_CLASSIFY = 44 | ... | @@ -1057,7 +1057,25 @@ HIL_CONTRACT_2_CLASSIFY = 44 |
| 1057 | HIL_CONTRACT_3_CN_NAME = '车辆处置协议' | 1057 | HIL_CONTRACT_3_CN_NAME = '车辆处置协议' |
| 1058 | HIL_CONTRACT_3_CLASSIFY = 45 | 1058 | HIL_CONTRACT_3_CLASSIFY = 45 |
| 1059 | 1059 | ||
| 1060 | CONTRACT_SET = {CONTRACT_QRS_CLASSIFY, CONTRACT_CLASSIFY, HIL_CONTRACT_1_CLASSIFY, HIL_CONTRACT_2_CLASSIFY, HIL_CONTRACT_3_CLASSIFY} | 1060 | FSM_CONTRACT_WEP_CN_NAME = '延长保修合同' |
| 1061 | FSM_CONTRACT_WEP_CLASSIFY = 51 | ||
| 1062 | |||
| 1063 | FSM_CONTRACT_MSI_CN_NAME = '长悦保养合同' | ||
| 1064 | FSM_CONTRACT_MSI_CLASSIFY = 52 | ||
| 1065 | |||
| 1066 | FSM_CONTRACT_SC_CN_NAME = '汽车销售合同' | ||
| 1067 | FSM_CONTRACT_SC_CLASSIFY = 53 | ||
| 1068 | |||
| 1069 | CONTRACT_SET = { | ||
| 1070 | CONTRACT_QRS_CLASSIFY, | ||
| 1071 | CONTRACT_CLASSIFY, | ||
| 1072 | HIL_CONTRACT_1_CLASSIFY, | ||
| 1073 | HIL_CONTRACT_2_CLASSIFY, | ||
| 1074 | HIL_CONTRACT_3_CLASSIFY, | ||
| 1075 | FSM_CONTRACT_WEP_CLASSIFY, | ||
| 1076 | FSM_CONTRACT_MSI_CLASSIFY, | ||
| 1077 | FSM_CONTRACT_SC_CLASSIFY, | ||
| 1078 | } | ||
| 1061 | 1079 | ||
| 1062 | CONTRACT_MAP = { | 1080 | CONTRACT_MAP = { |
| 1063 | HIL_CONTRACT_1_CLASSIFY: HIL_CONTRACT_1_CN_NAME, | 1081 | HIL_CONTRACT_1_CLASSIFY: HIL_CONTRACT_1_CN_NAME, |
| ... | @@ -1065,8 +1083,13 @@ CONTRACT_MAP = { | ... | @@ -1065,8 +1083,13 @@ CONTRACT_MAP = { |
| 1065 | HIL_CONTRACT_3_CLASSIFY: HIL_CONTRACT_3_CN_NAME, | 1083 | HIL_CONTRACT_3_CLASSIFY: HIL_CONTRACT_3_CN_NAME, |
| 1066 | CONTRACT_CLASSIFY: CONTRACT_CN_NAME, | 1084 | CONTRACT_CLASSIFY: CONTRACT_CN_NAME, |
| 1067 | CONTRACT_QRS_CLASSIFY: CONTRACT_QRS_CN_NAME, | 1085 | CONTRACT_QRS_CLASSIFY: CONTRACT_QRS_CN_NAME, |
| 1086 | FSM_CONTRACT_WEP_CLASSIFY: FSM_CONTRACT_WEP_CN_NAME, | ||
| 1087 | FSM_CONTRACT_MSI_CLASSIFY: FSM_CONTRACT_MSI_CN_NAME, | ||
| 1088 | FSM_CONTRACT_SC_CLASSIFY: FSM_CONTRACT_SC_CN_NAME, | ||
| 1068 | } | 1089 | } |
| 1069 | 1090 | ||
| 1091 | FSM_CONTRACT_CLASSIFY_SET = {FSM_CONTRACT_WEP_CLASSIFY, FSM_CONTRACT_MSI_CLASSIFY, FSM_CONTRACT_SC_CLASSIFY} | ||
| 1092 | |||
| 1070 | # 保单 | 1093 | # 保单 |
| 1071 | INSURANCE_CN_NAME = '保单' | 1094 | INSURANCE_CN_NAME = '保单' |
| 1072 | INSURANCE_CLASSIFY = 42 | 1095 | INSURANCE_CLASSIFY = 42 |
| ... | @@ -1215,6 +1238,11 @@ BS_FIELD = 'bss_ocr' | ... | @@ -1215,6 +1238,11 @@ BS_FIELD = 'bss_ocr' |
| 1215 | HIL_CONTRACT_1_FIELD = 'hil_contract_1_ocr' | 1238 | HIL_CONTRACT_1_FIELD = 'hil_contract_1_ocr' |
| 1216 | HIL_CONTRACT_2_FIELD = 'hil_contract_2_ocr' | 1239 | HIL_CONTRACT_2_FIELD = 'hil_contract_2_ocr' |
| 1217 | HIL_CONTRACT_3_FIELD = 'hil_contract_3_ocr' | 1240 | HIL_CONTRACT_3_FIELD = 'hil_contract_3_ocr' |
| 1241 | FSM_CONTRACT_WEP_FIELD = 'fsm_wep_ocr' | ||
| 1242 | FSM_CONTRACT_MSI_FIELD = 'fsm_msi_ocr' | ||
| 1243 | FSM_CONTRACT_SC_FIELD = 'fsm_sc_ocr' | ||
| 1244 | |||
| 1245 | |||
| 1218 | BS_CLASSIFY = 10089 | 1246 | BS_CLASSIFY = 10089 |
| 1219 | 1247 | ||
| 1220 | RESULT_MAPPING = { | 1248 | RESULT_MAPPING = { |
| ... | @@ -1239,6 +1267,9 @@ RESULT_MAPPING = { | ... | @@ -1239,6 +1267,9 @@ RESULT_MAPPING = { |
| 1239 | HIL_CONTRACT_1_CLASSIFY: HIL_CONTRACT_1_FIELD, | 1267 | HIL_CONTRACT_1_CLASSIFY: HIL_CONTRACT_1_FIELD, |
| 1240 | HIL_CONTRACT_2_CLASSIFY: HIL_CONTRACT_2_FIELD, | 1268 | HIL_CONTRACT_2_CLASSIFY: HIL_CONTRACT_2_FIELD, |
| 1241 | HIL_CONTRACT_3_CLASSIFY: HIL_CONTRACT_3_FIELD, | 1269 | HIL_CONTRACT_3_CLASSIFY: HIL_CONTRACT_3_FIELD, |
| 1270 | FSM_CONTRACT_WEP_CLASSIFY: FSM_CONTRACT_WEP_FIELD, | ||
| 1271 | FSM_CONTRACT_MSI_CLASSIFY: FSM_CONTRACT_MSI_FIELD, | ||
| 1272 | FSM_CONTRACT_SC_CLASSIFY: FSM_CONTRACT_SC_FIELD, | ||
| 1242 | } | 1273 | } |
| 1243 | 1274 | ||
| 1244 | CA_ADD_COMPARE_FIELDS = (IC_OCR_FIELD, BL_OCR_FIELD, BS_FIELD) | 1275 | CA_ADD_COMPARE_FIELDS = (IC_OCR_FIELD, BL_OCR_FIELD, BS_FIELD) |
| ... | @@ -1511,6 +1542,9 @@ SE_AFC_CON_MAP = { | ... | @@ -1511,6 +1542,9 @@ SE_AFC_CON_MAP = { |
| 1511 | '还款账号': (2, 2, '还款账户', '账号'), | 1542 | '还款账号': (2, 2, '还款账户', '账号'), |
| 1512 | '户名': (2, 2, '还款账户', '户名'), | 1543 | '户名': (2, 2, '还款账户', '户名'), |
| 1513 | '开户行': (2, 2, '还款账户', '开户行'), | 1544 | '开户行': (2, 2, '还款账户', '开户行'), |
| 1545 | '收款账号': (2, 2, '借款人收款账户', '账号'), | ||
| 1546 | '收款户名': (2, 2, '借款人收款账户', '户名'), | ||
| 1547 | '收款开户行': (2, 2, '借款人收款账户', '开户行'), | ||
| 1514 | 1548 | ||
| 1515 | '借款人签字及时间': (1, 1, '借款人签字及时间', None), | 1549 | '借款人签字及时间': (1, 1, '借款人签字及时间', None), |
| 1516 | 1550 | ||
| ... | @@ -1550,9 +1584,12 @@ SE_HIL_CON_1_MAP = { | ... | @@ -1550,9 +1584,12 @@ SE_HIL_CON_1_MAP = { |
| 1550 | '融资成本总额': (5, 4, 7, '融资成本总额', None), | 1584 | '融资成本总额': (5, 4, 7, '融资成本总额', None), |
| 1551 | '租期': (5, 4, 7, '租期', None), | 1585 | '租期': (5, 4, 7, '租期', None), |
| 1552 | '还款计划表': (5, 5, 7, '付款计划表', None), | 1586 | '还款计划表': (5, 5, 7, '付款计划表', None), |
| 1553 | '还款账号': (5, 5, 7, '银行账户-银行账号', None), | 1587 | '还款账号': (5, 6, 7, '银行账户-银行账号', None), |
| 1554 | '户名': (5, 5, 7, '银行账户-户名', None), | 1588 | '户名': (5, 6, 7, '银行账户-户名', None), |
| 1555 | '开户行': (5, 5, 7, '银行账户-开户行', None), | 1589 | '开户行': (5, 6, 7, '银行账户-开户行', None), |
| 1590 | '收款账号': (5, 5, 7, '收款银行账户-银行账号', None), | ||
| 1591 | '收款户名': (5, 5, 7, '收款银行账户-户名', None), | ||
| 1592 | '收款开户行': (5, 5, 7, '收款银行账户-开户行', None), | ||
| 1556 | 'ASP项目详情': (5, 4, 7, '车辆附加产品明细表', None), | 1593 | 'ASP项目详情': (5, 4, 7, '车辆附加产品明细表', None), |
| 1557 | '承租人法定代表人或授权代表': (1, 1, 7, '承租人-法定代表人或授权代表', None), | 1594 | '承租人法定代表人或授权代表': (1, 1, 7, '承租人-法定代表人或授权代表', None), |
| 1558 | '共同承租人法定代表人或授权代表': (1, 1, 7, '共同承租人-法定代表人或授权代表', None), | 1595 | '共同承租人法定代表人或授权代表': (1, 1, 7, '共同承租人-法定代表人或授权代表', None), |
| ... | @@ -1608,6 +1645,39 @@ SE_HIL_CON_MAP = { | ... | @@ -1608,6 +1645,39 @@ SE_HIL_CON_MAP = { |
| 1608 | HIL_CONTRACT_3_CLASSIFY: SE_HIL_CON_3_MAP, | 1645 | HIL_CONTRACT_3_CLASSIFY: SE_HIL_CON_3_MAP, |
| 1609 | } | 1646 | } |
| 1610 | 1647 | ||
| 1648 | SE_FSM_WEP_MAP = { | ||
| 1649 | '客户姓名': (1, '客户姓名'), | ||
| 1650 | '证件类型': (1, '证件类型'), | ||
| 1651 | '证件号码': (1, '证件号码'), | ||
| 1652 | '合同价格(小写)': (1, '合同价格(小写)'), | ||
| 1653 | '客户签名': (1, '客户签名'), | ||
| 1654 | '签单日期': (1, '签单日期'), | ||
| 1655 | } | ||
| 1656 | |||
| 1657 | SE_FSM_MSI_MAP = { | ||
| 1658 | '客户姓名': (1, '客户姓名'), | ||
| 1659 | '证件类型': (1, '证件类型'), | ||
| 1660 | '证件号码': (1, '证件号码'), | ||
| 1661 | '合同价格(小写)': (1, '合同价格(小写)'), | ||
| 1662 | '客户签名': (2, '客户签名'), | ||
| 1663 | '签单日期': (2, '签单日期'), | ||
| 1664 | } | ||
| 1665 | |||
| 1666 | SE_FSM_SC_MAP = { | ||
| 1667 | '姓名': (1, '姓名'), | ||
| 1668 | '证件类型': (1, '证件类型'), | ||
| 1669 | '证件号码': (1, '证件号码'), | ||
| 1670 | '总价': (1, '总价'), | ||
| 1671 | '客户签名': (12, '客户签名'), | ||
| 1672 | '签单日期': (12, '签单日期'), | ||
| 1673 | } | ||
| 1674 | |||
| 1675 | SE_FSM_CON_MAP = { | ||
| 1676 | FSM_CONTRACT_WEP_CLASSIFY: SE_FSM_WEP_MAP, | ||
| 1677 | FSM_CONTRACT_MSI_CLASSIFY: SE_FSM_MSI_MAP, | ||
| 1678 | FSM_CONTRACT_SC_CLASSIFY: SE_FSM_SC_MAP, | ||
| 1679 | } | ||
| 1680 | |||
| 1611 | SE_AFC_CON_QRS_FIELD = ['合同编号'] | 1681 | SE_AFC_CON_QRS_FIELD = ['合同编号'] |
| 1612 | SE_AFC_CON_FIELD = ['合同编号-每页', '所购车辆价格-小写-重要条款', '车架号-重要条款', '贷款本金金额-重要条款', '贷款期限-重要条款', | 1682 | SE_AFC_CON_FIELD = ['合同编号-每页', '所购车辆价格-小写-重要条款', '车架号-重要条款', '贷款本金金额-重要条款', '贷款期限-重要条款', |
| 1613 | '车辆贷款本金金额-重要条款', '附加产品融资贷款本金总额-重要条款', '所购车辆价格', '车架号', '经销商', | 1683 | '车辆贷款本金金额-重要条款', '附加产品融资贷款本金总额-重要条款', '所购车辆价格', '车架号', '经销商', |
| ... | @@ -2314,29 +2384,42 @@ APPLICANT_TYPE_MAP = { | ... | @@ -2314,29 +2384,42 @@ APPLICANT_TYPE_MAP = { |
| 2314 | 2384 | ||
| 2315 | APPLICANT_TYPE_ORDER = ['Borrower', 'Co-Borrower', 'Guarantor', 'Mortgager'] | 2385 | APPLICANT_TYPE_ORDER = ['Borrower', 'Co-Borrower', 'Guarantor', 'Mortgager'] |
| 2316 | 2386 | ||
| 2317 | FILE_NAME_PREFIX_MAP = { | 2387 | # FILE_NAME_PREFIX_MAP = { |
| 2318 | AFC_PREFIX: [ | 2388 | # AFC_PREFIX: [ |
| 2319 | ((CONTRACT_CLASSIFY, 0), '{0}_电子签署-汽车抵押贷款合同'), | 2389 | # ((CONTRACT_CLASSIFY, 0), '{0}_电子签署-汽车抵押贷款合同'), |
| 2320 | ((HMH_CLASSIFY, 0), '{0}_电子签署-抵押登记豁免函'), | 2390 | # ((HMH_CLASSIFY, 0), '{0}_电子签署-抵押登记豁免函'), |
| 2321 | ], | 2391 | # ], |
| 2322 | HIL_PREFIX: [ | 2392 | # HIL_PREFIX: [ |
| 2323 | ((HIL_CONTRACT_1_CLASSIFY, HIL_CONTRACT_3_CLASSIFY), '{0}_电子签署-售后回租合同'), | 2393 | # ((HIL_CONTRACT_1_CLASSIFY, HIL_CONTRACT_3_CLASSIFY), '{0}_电子签署-售后回租合同'), |
| 2324 | ((HIL_CONTRACT_2_CLASSIFY, 0), '{0}_电子签署-汽车租赁抵押合同'), | 2394 | # ((HIL_CONTRACT_2_CLASSIFY, 0), '{0}_电子签署-汽车租赁抵押合同'), |
| 2325 | ((HMH_CLASSIFY, 0), '{0}_电子签署-抵押登记豁免函'), | 2395 | # ((HMH_CLASSIFY, 0), '{0}_电子签署-抵押登记豁免函'), |
| 2326 | ] | 2396 | # ] |
| 2327 | } | 2397 | # } |
| 2328 | 2398 | ||
| 2329 | ECONTRACT_KEYWORDS_MAP = { | 2399 | ECONTRACT_KEYWORDS_MAP = { |
| 2330 | AFC_PREFIX: [ | 2400 | AFC_PREFIX: [ |
| 2331 | ('抵押贷款合同', CONTRACT_CLASSIFY), | 2401 | ('抵押贷款合同', CONTRACT_CLASSIFY), |
| 2332 | ('送达地址确认书', CONTRACT_QRS_CLASSIFY), | 2402 | ('送达地址确认书', CONTRACT_QRS_CLASSIFY), |
| 2333 | # ('电子签署-抵押登记豁免函', HMH_CLASSIFY, 0), | 2403 | ('抵押登记豁免函', HMH_CLASSIFY), |
| 2334 | ], | 2404 | ], |
| 2335 | HIL_PREFIX: [ | 2405 | HIL_PREFIX: [ |
| 2336 | ('售后回租合同', HIL_CONTRACT_1_CLASSIFY), | 2406 | ('售后回租合同', HIL_CONTRACT_1_CLASSIFY), |
| 2337 | ('租赁抵押合同', HIL_CONTRACT_2_CLASSIFY), | 2407 | ('租赁抵押合同', HIL_CONTRACT_2_CLASSIFY), |
| 2338 | ('车辆处置协议', HIL_CONTRACT_3_CLASSIFY), | 2408 | ('车辆处置协议', HIL_CONTRACT_3_CLASSIFY), |
| 2339 | # ('电子签署-抵押登记豁免函', HMH_CLASSIFY, 0), | 2409 | ('抵押登记豁免函', HMH_CLASSIFY), |
| 2410 | ] | ||
| 2411 | } | ||
| 2412 | |||
| 2413 | FSM_ECONTRACT_KEYWORDS_MAP = { | ||
| 2414 | AFC_PREFIX: [ | ||
| 2415 | ('延长保修条款与条件', FSM_CONTRACT_WEP_CLASSIFY), | ||
| 2416 | ('长悦保养套餐服务合约', FSM_CONTRACT_MSI_CLASSIFY), | ||
| 2417 | ('汽车销售合同', FSM_CONTRACT_SC_CLASSIFY), | ||
| 2418 | ], | ||
| 2419 | HIL_PREFIX: [ | ||
| 2420 | ('延长保修条款与条件', FSM_CONTRACT_WEP_CLASSIFY), | ||
| 2421 | ('长悦保养套餐服务合同', FSM_CONTRACT_MSI_CLASSIFY), | ||
| 2422 | ('汽车销售合同', FSM_CONTRACT_SC_CLASSIFY), | ||
| 2340 | ] | 2423 | ] |
| 2341 | } | 2424 | } |
| 2342 | 2425 | ||
| ... | @@ -2346,6 +2429,12 @@ HIL_CONTRACT_TYPE_MAP = { | ... | @@ -2346,6 +2429,12 @@ HIL_CONTRACT_TYPE_MAP = { |
| 2346 | str(HIL_CONTRACT_3_CLASSIFY): 1, | 2429 | str(HIL_CONTRACT_3_CLASSIFY): 1, |
| 2347 | } | 2430 | } |
| 2348 | 2431 | ||
| 2432 | FSM_CONTRACT_TYPE_MAP = { | ||
| 2433 | str(FSM_CONTRACT_WEP_CLASSIFY): 0, | ||
| 2434 | str(FSM_CONTRACT_MSI_CLASSIFY): 1, | ||
| 2435 | str(FSM_CONTRACT_SC_CLASSIFY): 2, | ||
| 2436 | } | ||
| 2437 | |||
| 2349 | RESULT_MAP = { | 2438 | RESULT_MAP = { |
| 2350 | 0: None, | 2439 | 0: None, |
| 2351 | 1: True, | 2440 | 1: True, |
| ... | @@ -2379,3 +2468,26 @@ MPOS_MAP = { | ... | @@ -2379,3 +2468,26 @@ MPOS_MAP = { |
| 2379 | } | 2468 | } |
| 2380 | 2469 | ||
| 2381 | FOLDER_WSC_CLASSIFY = 199 | 2470 | FOLDER_WSC_CLASSIFY = 199 |
| 2471 | |||
| 2472 | |||
| 2473 | FSM_BEFORE_ACTIVITED_STATUS = { | ||
| 2474 | "APSVD": "Saved", | ||
| 2475 | "APEAE": "E-app Editing", | ||
| 2476 | "APADA": "Awaiting Dealer Action", | ||
| 2477 | "APAPR": "Acceptance Processing", | ||
| 2478 | "APPSB": "Pre-submit Processed", | ||
| 2479 | "APSBT": "Submitted", | ||
| 2480 | "APAPP": "Approved", | ||
| 2481 | "APHOC": "Held Offer-Docs", | ||
| 2482 | "APHOD": "Held Offer-Data", | ||
| 2483 | "APINI": "Initiated", | ||
| 2484 | "APSEP": "Settlement Processing" | ||
| 2485 | } | ||
| 2486 | |||
| 2487 | FSM_ACTIVITED_STATUS = { | ||
| 2488 | "APADF": "Activated-Document Follow up", | ||
| 2489 | "APASC": "Activated-Awaiting Settlement Check", | ||
| 2490 | "APIPN": "Activated-Invoice Passed-Non PT", | ||
| 2491 | "APIPP": "Activated-Invoice Passed-PT Doc Required", | ||
| 2492 | "APARD": "Activated-Review done", | ||
| 2493 | } | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
| ... | @@ -20,6 +20,8 @@ from common.tools.file_tools import get_pwd_list_from_str, extract_zip_or_rar, g | ... | @@ -20,6 +20,8 @@ from common.tools.file_tools import get_pwd_list_from_str, extract_zip_or_rar, g |
| 20 | from common.tools.pdf_to_img import PDFHandler | 20 | from common.tools.pdf_to_img import PDFHandler |
| 21 | from common.electronic_afc_contract.afc_contract_ocr import predict as afc_predict | 21 | from common.electronic_afc_contract.afc_contract_ocr import predict as afc_predict |
| 22 | from common.electronic_hil_contract.hil_contract_ocr import predict as hil_predict | 22 | from common.electronic_hil_contract.hil_contract_ocr import predict as hil_predict |
| 23 | from common.fsm_econtract.fsm_contract_ocr import predict as fsm_predict | ||
| 24 | from common.fsm_econtract.hmh_ocr import predict as hmh_predict | ||
| 23 | from apps.doc import consts | 25 | from apps.doc import consts |
| 24 | # from apps.doc.ocr.edms import EDMS, rh | 26 | # from apps.doc.ocr.edms import EDMS, rh |
| 25 | from apps.doc.ocr.ecm import ECM, rh | 27 | from apps.doc.ocr.ecm import ECM, rh |
| ... | @@ -40,8 +42,10 @@ from apps.doc.models import ( | ... | @@ -40,8 +42,10 @@ from apps.doc.models import ( |
| 40 | DDARecords, | 42 | DDARecords, |
| 41 | IDBCRecords, | 43 | IDBCRecords, |
| 42 | Configs, | 44 | Configs, |
| 45 | AFCCmsStatusInfo, | ||
| 46 | HILCmsStatusInfo, | ||
| 43 | ) | 47 | ) |
| 44 | from celery_compare.tasks import compare | 48 | from celery_compare.tasks import compare, fsm_compare |
| 45 | 49 | ||
| 46 | 50 | ||
| 47 | class Command(BaseCommand, LoggerMixin): | 51 | class Command(BaseCommand, LoggerMixin): |
| ... | @@ -996,7 +1000,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -996,7 +1000,7 @@ class Command(BaseCommand, LoggerMixin): |
| 996 | res.setdefault(consts.ALL_POSITION_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get( | 1000 | res.setdefault(consts.ALL_POSITION_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get( |
| 997 | consts.ALL_POSITION_KEY, {}).get(key1, []) | 1001 | consts.ALL_POSITION_KEY, {}).get(key1, []) |
| 998 | license_summary[classify] = [res] | 1002 | license_summary[classify] = [res] |
| 999 | else: | 1003 | elif classify in consts.SE_HIL_CON_MAP: |
| 1000 | res = {} | 1004 | res = {} |
| 1001 | for key, (pno1, pno2, end_idx, key1, key2) in consts.SE_HIL_CON_MAP[classify].items(): | 1005 | for key, (pno1, pno2, end_idx, key1, key2) in consts.SE_HIL_CON_MAP[classify].items(): |
| 1002 | if pno1 is None: | 1006 | if pno1 is None: |
| ... | @@ -1020,7 +1024,14 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1020,7 +1024,14 @@ class Command(BaseCommand, LoggerMixin): |
| 1020 | res[key] = tmp_res | 1024 | res[key] = tmp_res |
| 1021 | res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(img_pno), {}).get( | 1025 | res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(img_pno), {}).get( |
| 1022 | consts.IMG_PATH_KEY, '') | 1026 | consts.IMG_PATH_KEY, '') |
| 1027 | license_summary[classify] = [res] | ||
| 1023 | 1028 | ||
| 1029 | elif classify in consts.SE_FSM_CON_MAP: | ||
| 1030 | res = {} | ||
| 1031 | for key, (pno1, key1) in consts.SE_FSM_CON_MAP[classify].items(): | ||
| 1032 | res[key] = page_info_dict.get(str(pno1), {}).get(key1) | ||
| 1033 | res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(pno1), {}).get( | ||
| 1034 | consts.IMG_PATH_KEY, '') | ||
| 1024 | license_summary[classify] = [res] | 1035 | license_summary[classify] = [res] |
| 1025 | 1036 | ||
| 1026 | def rebuild_bs_summary(self, bs_summary, unknown_summary): | 1037 | def rebuild_bs_summary(self, bs_summary, unknown_summary): |
| ... | @@ -1442,7 +1453,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1442,7 +1453,7 @@ class Command(BaseCommand, LoggerMixin): |
| 1442 | self.log_base, traceback.format_exc())) | 1453 | self.log_base, traceback.format_exc())) |
| 1443 | error_list.append(1) | 1454 | error_list.append(1) |
| 1444 | return | 1455 | return |
| 1445 | else: # e-contract | 1456 | else: # e-contract or or e-fsm-contract or e-hmh |
| 1446 | try: | 1457 | try: |
| 1447 | # pdf下载 处理 图片存储 识别 | 1458 | # pdf下载 处理 图片存储 识别 |
| 1448 | for times in range(consts.RETRY_TIMES): | 1459 | for times in range(consts.RETRY_TIMES): |
| ... | @@ -1472,8 +1483,10 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1472,8 +1483,10 @@ class Command(BaseCommand, LoggerMixin): |
| 1472 | self.online_log.error('{0} [process error (db save)] [error={1}]'.format( | 1483 | self.online_log.error('{0} [process error (db save)] [error={1}]'.format( |
| 1473 | self.log_base, traceback.format_exc())) | 1484 | self.log_base, traceback.format_exc())) |
| 1474 | 1485 | ||
| 1486 | # AFC合同 | ||
| 1475 | if classify_1_str == str(consts.CONTRACT_CLASSIFY): | 1487 | if classify_1_str == str(consts.CONTRACT_CLASSIFY): |
| 1476 | ocr_result = afc_predict(pdf_handler.pdf_info) | 1488 | is_fsm = doc.data_source == consts.DATA_SOURCE_LIST[3] |
| 1489 | ocr_result = afc_predict(pdf_handler.pdf_info, is_fsm=is_fsm) | ||
| 1477 | page_res = {} | 1490 | page_res = {} |
| 1478 | for page_num, page_info in ocr_result.get('page_info', {}).items(): | 1491 | for page_num, page_info in ocr_result.get('page_info', {}).items(): |
| 1479 | if isinstance(page_num, str) and page_num.startswith('page_'): | 1492 | if isinstance(page_num, str) and page_num.startswith('page_'): |
| ... | @@ -1483,6 +1496,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1483,6 +1496,7 @@ class Command(BaseCommand, LoggerMixin): |
| 1483 | 'page_num': page_num, | 1496 | 'page_num': page_num, |
| 1484 | 'page_info': page_info | 1497 | 'page_info': page_info |
| 1485 | } | 1498 | } |
| 1499 | # 送达地址确认书 | ||
| 1486 | elif classify_1_str == str(consts.CONTRACT_QRS_CLASSIFY): | 1500 | elif classify_1_str == str(consts.CONTRACT_QRS_CLASSIFY): |
| 1487 | ocr_result = afc_predict(pdf_handler.pdf_info, is_qrs=True) | 1501 | ocr_result = afc_predict(pdf_handler.pdf_info, is_qrs=True) |
| 1488 | page_num = 'page_1' | 1502 | page_num = 'page_1' |
| ... | @@ -1493,9 +1507,11 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1493,9 +1507,11 @@ class Command(BaseCommand, LoggerMixin): |
| 1493 | 'page_info': ocr_result.pop(page_num, {}) | 1507 | 'page_info': ocr_result.pop(page_num, {}) |
| 1494 | } | 1508 | } |
| 1495 | } | 1509 | } |
| 1496 | else: | 1510 | # HIL合同 |
| 1511 | elif classify_1_str in consts.HIL_CONTRACT_TYPE_MAP: | ||
| 1512 | is_fsm = doc.data_source == consts.DATA_SOURCE_LIST[3] | ||
| 1497 | file_type_1 = consts.HIL_CONTRACT_TYPE_MAP.get(classify_1_str) | 1513 | file_type_1 = consts.HIL_CONTRACT_TYPE_MAP.get(classify_1_str) |
| 1498 | ocr_result_1 = hil_predict(pdf_handler.pdf_info, file_type_1) | 1514 | ocr_result_1 = hil_predict(pdf_handler.pdf_info, file_type_1, is_fsm=is_fsm) |
| 1499 | rebuild_res_1 = {} | 1515 | rebuild_res_1 = {} |
| 1500 | page_res = {} | 1516 | page_res = {} |
| 1501 | for field_name, field_info in ocr_result_1.items(): | 1517 | for field_name, field_info in ocr_result_1.items(): |
| ... | @@ -1508,9 +1524,36 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1508,9 +1524,36 @@ class Command(BaseCommand, LoggerMixin): |
| 1508 | 'page_num': page_num, | 1524 | 'page_num': page_num, |
| 1509 | 'page_info': page_info | 1525 | 'page_info': page_info |
| 1510 | } | 1526 | } |
| 1527 | # FSM合同 WEP MSI SC | ||
| 1528 | elif classify_1_str in consts.FSM_CONTRACT_TYPE_MAP: | ||
| 1529 | file_type = consts.FSM_CONTRACT_TYPE_MAP.get(classify_1_str) | ||
| 1530 | ocr_result = fsm_predict(pdf_handler.pdf_info, file_type) | ||
| 1531 | page_res = {} | ||
| 1532 | for page_num, page_info in ocr_result.items(): | ||
| 1533 | if isinstance(page_num, str) and page_num.startswith('page_'): | ||
| 1534 | page_res[page_num] = { | ||
| 1535 | 'classify': int(classify_1_str), | ||
| 1536 | 'page_num': page_num, | ||
| 1537 | 'page_info': page_info | ||
| 1538 | } | ||
| 1539 | # hmh | ||
| 1540 | # else: | ||
| 1541 | # pass | ||
| 1542 | |||
| 1511 | 1543 | ||
| 1512 | contract_res = {} | 1544 | contract_res = {} |
| 1513 | for img_path_tmp, page_key in pdf_handler.img_path_pno_list: | 1545 | for img_path_tmp, page_key in pdf_handler.img_path_pno_list: |
| 1546 | if classify_1_str == str(consts.HMH_CLASSIFY): | ||
| 1547 | img_contract_res = { | ||
| 1548 | 'code': 1, | ||
| 1549 | 'data': [ | ||
| 1550 | { | ||
| 1551 | 'classify': consts.HMH_CLASSIFY, | ||
| 1552 | 'data': hmh_predict(pdf_handler.pdf_info) | ||
| 1553 | } | ||
| 1554 | ] | ||
| 1555 | } | ||
| 1556 | else: | ||
| 1514 | if page_key in page_res: | 1557 | if page_key in page_res: |
| 1515 | img_contract_res = { | 1558 | img_contract_res = { |
| 1516 | 'code': 1, | 1559 | 'code': 1, |
| ... | @@ -1966,6 +2009,9 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1966,6 +2009,9 @@ class Command(BaseCommand, LoggerMixin): |
| 1966 | report_list[5] = BSCheckResult.CHECK_FAILED.value | 2009 | report_list[5] = BSCheckResult.CHECK_FAILED.value |
| 1967 | 2010 | ||
| 1968 | finally: | 2011 | finally: |
| 2012 | self.online_log.info('{0} [task={1}] [license_summary={2}] ' | ||
| 2013 | '[contract_result_compare={3}]'.format(self.log_base, task_str, | ||
| 2014 | license_summary, contract_result_compare)) | ||
| 1969 | self.rebuild_contract(license_summary, contract_result_compare) | 2015 | self.rebuild_contract(license_summary, contract_result_compare) |
| 1970 | 2016 | ||
| 1971 | bs_rebuild = self.rebuild_bs(merged_bs_summary) | 2017 | bs_rebuild = self.rebuild_bs(merged_bs_summary) |
| ... | @@ -2015,6 +2061,16 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -2015,6 +2061,16 @@ class Command(BaseCommand, LoggerMixin): |
| 2015 | self.log_base, task_str, res_obj.id)) | 2061 | self.log_base, task_str, res_obj.id)) |
| 2016 | # 触发比对 | 2062 | # 触发比对 |
| 2017 | try: | 2063 | try: |
| 2064 | # 是否fsm | ||
| 2065 | cms_status_class = HILCmsStatusInfo if business_type in consts.HIL_SET else AFCCmsStatusInfo | ||
| 2066 | cms_status_info = cms_status_class.objects.filter(application_id=doc.application_id).first() | ||
| 2067 | is_fsm = cms_status_info is not None and cms_status_info.is_fsm == 1 | ||
| 2068 | self.online_log.info('{0} [isfsm] [task={1}] [true or false={2}]'.format( | ||
| 2069 | self.log_base, task_str, is_fsm)) | ||
| 2070 | if is_fsm: | ||
| 2071 | fsm_compare.apply_async((doc.application_id, business_type, None, res_obj.id, is_ca, True), | ||
| 2072 | queue='queue_compare') | ||
| 2073 | else: | ||
| 2018 | # pass | 2074 | # pass |
| 2019 | compare.apply_async((doc.application_id, business_type, None, res_obj.id, | 2075 | compare.apply_async((doc.application_id, business_type, None, res_obj.id, |
| 2020 | is_ca, True), queue='queue_compare') | 2076 | is_ca, True), queue='queue_compare') | ... | ... |
| ... | @@ -329,6 +329,11 @@ class AFCOCRResult(models.Model): | ... | @@ -329,6 +329,11 @@ class AFCOCRResult(models.Model): |
| 329 | hil_contract_2_ocr = models.TextField(null=True, verbose_name="HIL合同2") | 329 | hil_contract_2_ocr = models.TextField(null=True, verbose_name="HIL合同2") |
| 330 | hil_contract_3_ocr = models.TextField(null=True, verbose_name="HIL合同3") | 330 | hil_contract_3_ocr = models.TextField(null=True, verbose_name="HIL合同3") |
| 331 | qrs_ocr = models.TextField(null=True, verbose_name="AFC合同确认书") | 331 | qrs_ocr = models.TextField(null=True, verbose_name="AFC合同确认书") |
| 332 | fsm_wep_ocr = models.TextField(null=True, verbose_name="延长保修合同") | ||
| 333 | fsm_msi_ocr = models.TextField(null=True, verbose_name="长悦保养合同") | ||
| 334 | fsm_sc_ocr = models.TextField(null=True, verbose_name="汽车销售合同") | ||
| 335 | fsm_activited = models.IntegerField(null=False, default=0, verbose_name="fsm激活状态 1:激活") | ||
| 336 | |||
| 332 | 337 | ||
| 333 | update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间') | 338 | update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间') |
| 334 | create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间') | 339 | create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间') |
| ... | @@ -366,6 +371,11 @@ class HILOCRResult(models.Model): | ... | @@ -366,6 +371,11 @@ class HILOCRResult(models.Model): |
| 366 | hil_contract_3_ocr = models.TextField(null=True, verbose_name="HIL合同3") | 371 | hil_contract_3_ocr = models.TextField(null=True, verbose_name="HIL合同3") |
| 367 | qrs_ocr = models.TextField(null=True, verbose_name="AFC合同确认书") | 372 | qrs_ocr = models.TextField(null=True, verbose_name="AFC合同确认书") |
| 368 | 373 | ||
| 374 | fsm_wep_ocr = models.TextField(null=True, verbose_name="延长保修合同") | ||
| 375 | fsm_msi_ocr = models.TextField(null=True, verbose_name="长悦保养合同") | ||
| 376 | fsm_sc_ocr = models.TextField(null=True, verbose_name="汽车销售合同") | ||
| 377 | fsm_activited = models.IntegerField(null=False, default=0, verbose_name="fsm激活状态 1:激活") | ||
| 378 | |||
| 369 | update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间') | 379 | update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间') |
| 370 | create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间') | 380 | create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间') |
| 371 | 381 | ||
| ... | @@ -401,6 +411,11 @@ class AFCSEOCRResult(models.Model): | ... | @@ -401,6 +411,11 @@ class AFCSEOCRResult(models.Model): |
| 401 | hil_contract_3_ocr = models.TextField(null=True, verbose_name="HIL合同3") | 411 | hil_contract_3_ocr = models.TextField(null=True, verbose_name="HIL合同3") |
| 402 | qrs_ocr = models.TextField(null=True, verbose_name="AFC合同确认书") | 412 | qrs_ocr = models.TextField(null=True, verbose_name="AFC合同确认书") |
| 403 | 413 | ||
| 414 | fsm_wep_ocr = models.TextField(null=True, verbose_name="延长保修合同") | ||
| 415 | fsm_msi_ocr = models.TextField(null=True, verbose_name="长悦保养合同") | ||
| 416 | fsm_sc_ocr = models.TextField(null=True, verbose_name="汽车销售合同") | ||
| 417 | fsm_activited = models.IntegerField(null=False, default=0, verbose_name="fsm激活状态 1:激活") | ||
| 418 | |||
| 404 | update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间') | 419 | update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间') |
| 405 | create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间') | 420 | create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间') |
| 406 | 421 | ||
| ... | @@ -436,6 +451,10 @@ class HILSEOCRResult(models.Model): | ... | @@ -436,6 +451,10 @@ class HILSEOCRResult(models.Model): |
| 436 | hil_contract_2_ocr = models.TextField(null=True, verbose_name="HIL合同2") | 451 | hil_contract_2_ocr = models.TextField(null=True, verbose_name="HIL合同2") |
| 437 | hil_contract_3_ocr = models.TextField(null=True, verbose_name="HIL合同3") | 452 | hil_contract_3_ocr = models.TextField(null=True, verbose_name="HIL合同3") |
| 438 | qrs_ocr = models.TextField(null=True, verbose_name="AFC合同确认书") | 453 | qrs_ocr = models.TextField(null=True, verbose_name="AFC合同确认书") |
| 454 | fsm_wep_ocr = models.TextField(null=True, verbose_name="延长保修合同") | ||
| 455 | fsm_msi_ocr = models.TextField(null=True, verbose_name="长悦保养合同") | ||
| 456 | fsm_sc_ocr = models.TextField(null=True, verbose_name="汽车销售合同") | ||
| 457 | fsm_activited = models.IntegerField(null=False, default=0, verbose_name="fsm激活状态 1:激活") | ||
| 439 | 458 | ||
| 440 | update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间') | 459 | update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间') |
| 441 | create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间') | 460 | create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间') |
| ... | @@ -1042,3 +1061,41 @@ class AFCCompareReportNew(models.Model): | ... | @@ -1042,3 +1061,41 @@ class AFCCompareReportNew(models.Model): |
| 1042 | managed = False | 1061 | managed = False |
| 1043 | db_table = 'afc_compare_report_new' | 1062 | db_table = 'afc_compare_report_new' |
| 1044 | situ_db_label = 'afc' | 1063 | situ_db_label = 'afc' |
| 1064 | |||
| 1065 | |||
| 1066 | class NscInvoice(models.Model): | ||
| 1067 | id = models.AutoField(primary_key=True, verbose_name="id") # 主键 | ||
| 1068 | vin = models.CharField(max_length=64, verbose_name="车架号") # 索引 | ||
| 1069 | content = models.TextField(null=True, verbose_name="nsc发票信息") | ||
| 1070 | create_time = models.DateTimeField(verbose_name='创建时间') | ||
| 1071 | |||
| 1072 | class Meta: | ||
| 1073 | managed = False | ||
| 1074 | db_table = 'nsc_invoice' | ||
| 1075 | |||
| 1076 | |||
| 1077 | class AFCCmsStatusInfo(models.Model): | ||
| 1078 | id = models.AutoField(primary_key=True, verbose_name="id") # 主键 | ||
| 1079 | application_id = models.CharField(max_length=64, verbose_name="订单id") # 索引 | ||
| 1080 | business_type = models.CharField(max_length=64, verbose_name="业务类型") | ||
| 1081 | is_fsm = models.SmallIntegerField(null=False, default=0, verbose_name="是否fsm流程 1:是") | ||
| 1082 | update_time = models.DateTimeField(verbose_name='更新时间') | ||
| 1083 | create_time = models.DateTimeField(verbose_name='创建时间') | ||
| 1084 | |||
| 1085 | class Meta: | ||
| 1086 | managed = False | ||
| 1087 | db_table = 'afc_cms_status_info' | ||
| 1088 | situ_db_label = 'afc' | ||
| 1089 | |||
| 1090 | |||
| 1091 | class HILCmsStatusInfo(models.Model): | ||
| 1092 | id = models.AutoField(primary_key=True, verbose_name="id") # 主键 | ||
| 1093 | application_id = models.CharField(max_length=64, verbose_name="订单id") # 索引 | ||
| 1094 | business_type = models.CharField(max_length=64, verbose_name="业务类型") | ||
| 1095 | is_fsm = models.SmallIntegerField(null=False, default=0, verbose_name="是否fsm流程 1:是") | ||
| 1096 | update_time = models.DateTimeField(verbose_name='更新时间') | ||
| 1097 | create_time = models.DateTimeField(verbose_name='创建时间') | ||
| 1098 | |||
| 1099 | class Meta: | ||
| 1100 | managed = False | ||
| 1101 | db_table = 'hil_cms_status_info' | ... | ... |
| ... | @@ -27,6 +27,7 @@ class RequestTeam(NamedEnum): | ... | @@ -27,6 +27,7 @@ class RequestTeam(NamedEnum): |
| 27 | SETTLEMENT = (1, 'SETTLEMENT') | 27 | SETTLEMENT = (1, 'SETTLEMENT') |
| 28 | CONTRACTMANAGEMENT = (2, 'CONTRACTMANAGEMENT') | 28 | CONTRACTMANAGEMENT = (2, 'CONTRACTMANAGEMENT') |
| 29 | CONTROLLING = (3, 'CONTROLLING') | 29 | CONTROLLING = (3, 'CONTROLLING') |
| 30 | INSURANCE = (4, 'INSURANCE') | ||
| 30 | 31 | ||
| 31 | 32 | ||
| 32 | class RequestTrigger(NamedEnum): | 33 | class RequestTrigger(NamedEnum): |
| ... | @@ -36,6 +37,7 @@ class RequestTrigger(NamedEnum): | ... | @@ -36,6 +37,7 @@ class RequestTrigger(NamedEnum): |
| 36 | DOCUPLOAD = (3, 'Document Upload') | 37 | DOCUPLOAD = (3, 'Document Upload') |
| 37 | SUBMITING = (4, 'Submiting') | 38 | SUBMITING = (4, 'Submiting') |
| 38 | UPLOADING = (5, 'Uploading') | 39 | UPLOADING = (5, 'Uploading') |
| 40 | OVP = (6, 'OVP') | ||
| 39 | 41 | ||
| 40 | 42 | ||
| 41 | class FailureReason(NamedEnum): | 43 | class FailureReason(NamedEnum): | ... | ... |
| ... | @@ -34,6 +34,7 @@ class ECM: | ... | @@ -34,6 +34,7 @@ class ECM: |
| 34 | 'ACCEPTANCE': ('acceptance', conf.ECM_FOLDER_CA, conf.ECM_FOLDER_CA_HIL), | 34 | 'ACCEPTANCE': ('acceptance', conf.ECM_FOLDER_CA, conf.ECM_FOLDER_CA_HIL), |
| 35 | 'SETTLEMENT': (self.settlement_type, conf.ECM_FOLDER_SE, conf.ECM_FOLDER_SE_HIL), | 35 | 'SETTLEMENT': (self.settlement_type, conf.ECM_FOLDER_SE, conf.ECM_FOLDER_SE_HIL), |
| 36 | 'CONTRACTMANAGEMENT': ('contract_management', conf.ECM_FOLDER_CA, conf.ECM_FOLDER_CA_HIL), | 36 | 'CONTRACTMANAGEMENT': ('contract_management', conf.ECM_FOLDER_CA, conf.ECM_FOLDER_CA_HIL), |
| 37 | 'INSURANCE': ('insurance', conf.ECM_FOLDER_SE, conf.ECM_FOLDER_SE_HIL), | ||
| 37 | } | 38 | } |
| 38 | self.doc_base_map = { | 39 | self.doc_base_map = { |
| 39 | 'AFC': 'SF5_CN', | 40 | 'AFC': 'SF5_CN', | ... | ... |
| ... | @@ -808,10 +808,12 @@ class BSWorkbook(Workbook): | ... | @@ -808,10 +808,12 @@ class BSWorkbook(Workbook): |
| 808 | if field_str is not None: | 808 | if field_str is not None: |
| 809 | count_list.append((field_str, count)) | 809 | count_list.append((field_str, count)) |
| 810 | 810 | ||
| 811 | def contract_rebuild(self, contract_result_dict): | 811 | def contract_rebuild(self, contract_result_dict, is_ca=False): |
| 812 | for classify, contract_result in contract_result_dict.items(): | 812 | for classify, contract_result in contract_result_dict.items(): |
| 813 | if len(contract_result) == 0: | 813 | if len(contract_result) == 0: |
| 814 | continue | 814 | continue |
| 815 | if is_ca and classify not in consts.FSM_CONTRACT_CLASSIFY_SET: | ||
| 816 | continue | ||
| 815 | ws = self.create_sheet(consts.CONTRACT_MAP.get(classify)) | 817 | ws = self.create_sheet(consts.CONTRACT_MAP.get(classify)) |
| 816 | for i in range(30): | 818 | for i in range(30): |
| 817 | if str(i) in contract_result: | 819 | if str(i) in contract_result: |
| ... | @@ -934,6 +936,7 @@ class BSWorkbook(Workbook): | ... | @@ -934,6 +936,7 @@ class BSWorkbook(Workbook): |
| 934 | else: | 936 | else: |
| 935 | self.bs_rebuild(bs_summary, res_count_tuple, metadata) | 937 | self.bs_rebuild(bs_summary, res_count_tuple, metadata) |
| 936 | self.license_rebuild(license_summary, document_scheme, count_list) | 938 | self.license_rebuild(license_summary, document_scheme, count_list) |
| 939 | self.contract_rebuild(contract_result, True) | ||
| 937 | self.move_res_sheet() | 940 | self.move_res_sheet() |
| 938 | self.remove_base_sheet() | 941 | self.remove_base_sheet() |
| 939 | return count_list, self.need_follow | 942 | return count_list, self.need_follow | ... | ... |
| ... | @@ -48,14 +48,23 @@ from .models import ( | ... | @@ -48,14 +48,23 @@ from .models import ( |
| 48 | MposReport, | 48 | MposReport, |
| 49 | GenericOCRReport, | 49 | GenericOCRReport, |
| 50 | InterfaceReport, | 50 | InterfaceReport, |
| 51 | HILOCRResult, | ||
| 52 | HILSEOCRResult, | ||
| 53 | AFCOCRResult, | ||
| 54 | AFCSEOCRResult, | ||
| 55 | HILCmsStatusInfo, | ||
| 56 | AFCCmsStatusInfo | ||
| 51 | ) | 57 | ) |
| 52 | from .named_enum import ErrorType, AutoResult, WholeResult, RPAResult, SystemName | 58 | from .named_enum import ErrorType, AutoResult, WholeResult, RPAResult, SystemName, RequestTeam |
| 53 | from .mixins import DocHandler, MPOSHandler, PreSEHandler | 59 | from .mixins import DocHandler, MPOSHandler, PreSEHandler |
| 54 | from . import consts | 60 | from . import consts |
| 55 | from apps.account.authentication import OAuth2AuthenticationWithUser | 61 | from apps.account.authentication import OAuth2AuthenticationWithUser |
| 56 | from celery_compare.tasks import compare | 62 | from celery_compare.tasks import compare, fsm_compare |
| 63 | from prese.compare import get_empty_result | ||
| 57 | 64 | ||
| 58 | import time | 65 | import time |
| 66 | |||
| 67 | |||
| 59 | class CustomDate(fields.Date): | 68 | class CustomDate(fields.Date): |
| 60 | 69 | ||
| 61 | def _deserialize(self, value, attr, data, **kwargs): | 70 | def _deserialize(self, value, attr, data, **kwargs): |
| ... | @@ -248,6 +257,7 @@ se_compare_content = { | ... | @@ -248,6 +257,7 @@ se_compare_content = { |
| 248 | 'fsmSpecialCar': fields.Boolean(required=False), | 257 | 'fsmSpecialCar': fields.Boolean(required=False), |
| 249 | 'fsmBestPrice': fields.Boolean(required=False), | 258 | 'fsmBestPrice': fields.Boolean(required=False), |
| 250 | 'isAutoSettlement': fields.Boolean(required=False), | 259 | 'isAutoSettlement': fields.Boolean(required=False), |
| 260 | 'fsmLandingDealer': fields.Str(required=False, validate=validate.Length(max=1024)), | ||
| 251 | 261 | ||
| 252 | 'individualCusInfo': fields.List(fields.Nested(se_individual_args), | 262 | 'individualCusInfo': fields.List(fields.Nested(se_individual_args), |
| 253 | required=True, validate=validate.Length(min=1, max=4)), | 263 | required=True, validate=validate.Length(min=1, max=4)), |
| ... | @@ -551,6 +561,7 @@ class UploadDocView(GenericView, DocHandler): | ... | @@ -551,6 +561,7 @@ class UploadDocView(GenericView, DocHandler): |
| 551 | # authentication_classes = [] | 561 | # authentication_classes = [] |
| 552 | permission_classes = [IsAuthenticated] | 562 | permission_classes = [IsAuthenticated] |
| 553 | authentication_classes = [OAuth2AuthenticationWithUser] | 563 | authentication_classes = [OAuth2AuthenticationWithUser] |
| 564 | |||
| 554 | # required_scopes = ['write'] | 565 | # required_scopes = ['write'] |
| 555 | 566 | ||
| 556 | # 上传(接收)文件接口 | 567 | # 上传(接收)文件接口 |
| ... | @@ -563,6 +574,8 @@ class UploadDocView(GenericView, DocHandler): | ... | @@ -563,6 +574,8 @@ class UploadDocView(GenericView, DocHandler): |
| 563 | document = args.get('document') | 574 | document = args.get('document') |
| 564 | business_type = document.get('businessType') | 575 | business_type = document.get('businessType') |
| 565 | application_id = application_data.get('applicationId') | 576 | application_id = application_data.get('applicationId') |
| 577 | # 包含FSM 激活状态 | ||
| 578 | application_status = application_data.get('applicationStatus', '') | ||
| 566 | document_scheme = document.get('documentScheme') | 579 | document_scheme = document.get('documentScheme') |
| 567 | data_source = document.get('dataSource') | 580 | data_source = document.get('dataSource') |
| 568 | document_name = document.get('documentName', '') | 581 | document_name = document.get('documentName', '') |
| ... | @@ -571,6 +584,34 @@ class UploadDocView(GenericView, DocHandler): | ... | @@ -571,6 +584,34 @@ class UploadDocView(GenericView, DocHandler): |
| 571 | data_source = self.fix_data_source(data_source) | 584 | data_source = self.fix_data_source(data_source) |
| 572 | document_scheme = self.fix_scheme(document_scheme) | 585 | document_scheme = self.fix_scheme(document_scheme) |
| 573 | 586 | ||
| 587 | # fsm激活状态, 更新ocr_result 表fsm状态 | ||
| 588 | self.running_log.info('[doc upload applicationId-{0}] [applicationStatus-{1}, activated-{2}]' | ||
| 589 | .format(application_id, application_status, | ||
| 590 | True if consts.FSM_ACTIVITED_STATUS.get(application_status) else False)) | ||
| 591 | if consts.FSM_ACTIVITED_STATUS.get(application_status): | ||
| 592 | result_class = None | ||
| 593 | if business_type == consts.HIL_PREFIX: | ||
| 594 | if document_scheme == RequestTeam.ACCEPTANCE.name: | ||
| 595 | result_class = HILOCRResult | ||
| 596 | elif document_scheme == RequestTeam.SETTLEMENT.name or document_scheme == RequestTeam.INSURANCE.name: | ||
| 597 | result_class = HILSEOCRResult | ||
| 598 | elif business_type == consts.AFC_PREFIX: | ||
| 599 | if document_scheme == RequestTeam.ACCEPTANCE.name: | ||
| 600 | result_class = AFCOCRResult | ||
| 601 | elif document_scheme == RequestTeam.SETTLEMENT.name or document_scheme == RequestTeam.INSURANCE.name: | ||
| 602 | result_class = AFCSEOCRResult | ||
| 603 | |||
| 604 | ocr_result_obj = result_class.objects.filter(application_id=application_id).first() | ||
| 605 | if ocr_result_obj: | ||
| 606 | ocr_result_obj.fsm_activited = 1 | ||
| 607 | ocr_result_obj.save() | ||
| 608 | else: | ||
| 609 | ocr_result_obj = result_class() | ||
| 610 | ocr_result_obj.application_id = application_id | ||
| 611 | ocr_result_obj.fsm_activited = 1 | ||
| 612 | ocr_result_obj.save() | ||
| 613 | |||
| 614 | self.running_log.info('[doc upload applicationId-{0}] [ocr result saved]'.format(application_id)) | ||
| 574 | if data_source == consts.DATA_SOURCE_LIST[1]: | 615 | if data_source == consts.DATA_SOURCE_LIST[1]: |
| 575 | if document_name.endswith('-证书.pdf') or document_name.endswith('-证书'): | 616 | if document_name.endswith('-证书.pdf') or document_name.endswith('-证书'): |
| 576 | self.running_log.info('[doc upload success] [eapp license skip] [args={0}]'.format(args)) | 617 | self.running_log.info('[doc upload success] [eapp license skip] [args={0}]'.format(args)) |
| ... | @@ -602,13 +643,22 @@ class UploadDocView(GenericView, DocHandler): | ... | @@ -602,13 +643,22 @@ class UploadDocView(GenericView, DocHandler): |
| 602 | is_zip = False | 643 | is_zip = False |
| 603 | 644 | ||
| 604 | classify_1 = 0 | 645 | classify_1 = 0 |
| 605 | # 电子合同 | 646 | # 电子合同 Econtract or OVP(FSM) |
| 606 | if data_source == consts.DATA_SOURCE_LIST[-1] and document_scheme == consts.DOC_SCHEME_LIST[1]: | 647 | if data_source == consts.DATA_SOURCE_LIST[2] or data_source == consts.DATA_SOURCE_LIST[3]: |
| 648 | if document_scheme == consts.DOC_SCHEME_LIST[1]: | ||
| 607 | for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix): | 649 | for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix): |
| 608 | if keyword in document_name: | 650 | if keyword in document_name: |
| 609 | classify_1 = classify_1_tmp | 651 | classify_1 = classify_1_tmp |
| 610 | break | 652 | break |
| 611 | elif document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \ | 653 | # FSM合同:WEP/MSI/SC |
| 654 | elif data_source == consts.DATA_SOURCE_LIST[0] and document_scheme == consts.DOC_SCHEME_LIST[0]: | ||
| 655 | for keyword, classify_1_tmp in consts.FSM_ECONTRACT_KEYWORDS_MAP.get(prefix): | ||
| 656 | if keyword in document_name: | ||
| 657 | classify_1 = classify_1_tmp | ||
| 658 | break | ||
| 659 | |||
| 660 | |||
| 661 | if document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \ | ||
| 612 | or document_name.endswith('.RAR'): | 662 | or document_name.endswith('.RAR'): |
| 613 | is_zip = True | 663 | is_zip = True |
| 614 | 664 | ||
| ... | @@ -809,6 +859,9 @@ class CompareView(GenericView): | ... | @@ -809,6 +859,9 @@ class CompareView(GenericView): |
| 809 | ''' | 859 | ''' |
| 810 | 860 | ||
| 811 | 861 | ||
| 862 | pre_fsm_url = conf.PRE_FSM_URL | ||
| 863 | |||
| 864 | |||
| 812 | class SECompareView(GenericView, PreSEHandler): | 865 | class SECompareView(GenericView, PreSEHandler): |
| 813 | permission_classes = [IsAuthenticated] | 866 | permission_classes = [IsAuthenticated] |
| 814 | authentication_classes = [OAuth2AuthenticationWithUser] | 867 | authentication_classes = [OAuth2AuthenticationWithUser] |
| ... | @@ -829,7 +882,52 @@ class SECompareView(GenericView, PreSEHandler): | ... | @@ -829,7 +882,52 @@ class SECompareView(GenericView, PreSEHandler): |
| 829 | fsm_flag = content.get('fsmFlag', False) | 882 | fsm_flag = content.get('fsmFlag', False) |
| 830 | fsm_special_car = content.get('fsmSpecialCar', False) | 883 | fsm_special_car = content.get('fsmSpecialCar', False) |
| 831 | fsm_best_price = content.get('fsmBestPrice', False) | 884 | fsm_best_price = content.get('fsmBestPrice', False) |
| 885 | fsm_landing_dealer = content.get('fsmLandingDealer') | ||
| 832 | 886 | ||
| 887 | if fsm_special_car: | ||
| 888 | compare_result = { | ||
| 889 | "is_pass": False, | ||
| 890 | "particulars": [{ | ||
| 891 | "object_name": "", | ||
| 892 | "fields": [{ | ||
| 893 | "input": "", | ||
| 894 | "ocr": "", | ||
| 895 | "field_is_pass": False, | ||
| 896 | "comments": "此申请为FSM 特殊申请,暂不支持预放款流程" | ||
| 897 | }] | ||
| 898 | }] | ||
| 899 | } | ||
| 900 | elif fsm_best_price: | ||
| 901 | compare_result = { | ||
| 902 | "is_pass": False, | ||
| 903 | "particulars": [{ | ||
| 904 | "object_name": "", | ||
| 905 | "fields": [{ | ||
| 906 | "input": "", | ||
| 907 | "ocr": "", | ||
| 908 | "field_is_pass": False, | ||
| 909 | "comments": "此申请为FSM 特殊申请,暂不支持预放款流程" | ||
| 910 | }] | ||
| 911 | }] | ||
| 912 | } | ||
| 913 | elif fsm_flag and (not fsm_special_car or not fsm_best_price): | ||
| 914 | # 调用Java pre fsm接口 | ||
| 915 | try: | ||
| 916 | self.running_log.info("{0} request java pre fsm api, url:{1}, body:{2}".format(log_base, pre_fsm_url, json.dumps(content))) | ||
| 917 | headers = { | ||
| 918 | 'Content-Type': 'application/json' | ||
| 919 | } | ||
| 920 | resp = requests.post(pre_fsm_url, headers=headers, json=content) | ||
| 921 | self.running_log.info("{0} response from java pre fsm api, resp:{1}".format(log_base, resp.text)) | ||
| 922 | result = json.loads(resp.text) | ||
| 923 | compare_result = result.get("result") | ||
| 924 | if not compare_result: | ||
| 925 | compare_result = get_empty_result() | ||
| 926 | except Exception as e: | ||
| 927 | self.running_log.error("{0} pre fsm request to java error, url:{1}, param:{2}, errorMsg:{3}".format( | ||
| 928 | log_base, pre_fsm_url, json.dumps(content), traceback.format_exc())) | ||
| 929 | compare_result = get_empty_result() | ||
| 930 | elif not fsm_flag: | ||
| 833 | # 存库, 用于银行卡比对 | 931 | # 存库, 用于银行卡比对 |
| 834 | try: | 932 | try: |
| 835 | bank_class = HILbankVerification if business_type in consts.HIL_SET else AFCbankVerification | 933 | bank_class = HILbankVerification if business_type in consts.HIL_SET else AFCbankVerification |
| ... | @@ -853,7 +951,8 @@ class SECompareView(GenericView, PreSEHandler): | ... | @@ -853,7 +951,8 @@ class SECompareView(GenericView, PreSEHandler): |
| 853 | # preSettlement比对 | 951 | # preSettlement比对 |
| 854 | compare_result = self.pre_compare_entrance(content) | 952 | compare_result = self.pre_compare_entrance(content) |
| 855 | self.running_log.info('{0} [prese completed] [applicationEntity={1}] [application_id={2}] [uniq_seq={3}] ' | 953 | self.running_log.info('{0} [prese completed] [applicationEntity={1}] [application_id={2}] [uniq_seq={3}] ' |
| 856 | '[result={4}]'.format(log_base, business_type, application_id, uniq_seq, compare_result)) | 954 | '[result={4}]'.format(log_base, business_type, application_id, uniq_seq, |
| 955 | compare_result)) | ||
| 857 | 956 | ||
| 858 | try: | 957 | try: |
| 859 | end_time = time.time() | 958 | end_time = time.time() |
| ... | @@ -956,10 +1055,10 @@ class DocView(GenericView, DocHandler): | ... | @@ -956,10 +1055,10 @@ class DocView(GenericView, DocHandler): |
| 956 | application_id_query = Q(application_id__contains=application_id) if application_id is not None else Q() | 1055 | application_id_query = Q(application_id__contains=application_id) if application_id is not None else Q() |
| 957 | data_source_query = Q(data_source=data_source) if data_source is not None else Q() | 1056 | data_source_query = Q(data_source=data_source) if data_source is not None else Q() |
| 958 | upload_finish_time_query = Q(upload_finish_time__gte=upload_time_start, | 1057 | upload_finish_time_query = Q(upload_finish_time__gte=upload_time_start, |
| 959 | upload_finish_time__lt=upload_time_end + datetime.timedelta(days=1))\ | 1058 | upload_finish_time__lt=upload_time_end + datetime.timedelta(days=1)) \ |
| 960 | if upload_time_start is not None and upload_time_end is not None else Q() | 1059 | if upload_time_start is not None and upload_time_end is not None else Q() |
| 961 | create_time_query = Q(create_time__gte=create_time_start, | 1060 | create_time_query = Q(create_time__gte=create_time_start, |
| 962 | create_time__lt=create_time_end + datetime.timedelta(days=1))\ | 1061 | create_time__lt=create_time_end + datetime.timedelta(days=1)) \ |
| 963 | if create_time_start is not None and create_time_end is not None else Q() | 1062 | if create_time_start is not None and create_time_end is not None else Q() |
| 964 | query = application_id_query & status_query & data_source_query & upload_finish_time_query & create_time_query | 1063 | query = application_id_query & status_query & data_source_query & upload_finish_time_query & create_time_query |
| 965 | val_tuple = ('id', 'application_id', 'upload_finish_time', 'create_time', 'document_scheme', 'data_source', | 1064 | val_tuple = ('id', 'application_id', 'upload_finish_time', 'create_time', 'document_scheme', 'data_source', |
| ... | @@ -971,10 +1070,11 @@ class DocView(GenericView, DocHandler): | ... | @@ -971,10 +1070,11 @@ class DocView(GenericView, DocHandler): |
| 971 | if start_index >= total > 0: | 1070 | if start_index >= total > 0: |
| 972 | raise self.invalid_params('页数不存在') | 1071 | raise self.invalid_params('页数不存在') |
| 973 | 1072 | ||
| 974 | doc_queryset = doc_class.objects.filter(query).values(*val_tuple).order_by('-create_time')[start_index: end_index] | 1073 | doc_queryset = doc_class.objects.filter(query).values(*val_tuple).order_by('-create_time')[ |
| 1074 | start_index: end_index] | ||
| 975 | # doc_list = self.get_doc_list(doc_queryset, prefix) | 1075 | # doc_list = self.get_doc_list(doc_queryset, prefix) |
| 976 | for doc_dict in doc_queryset: | 1076 | for doc_dict in doc_queryset: |
| 977 | tmp_scheme = consts.COMPARE_DOC_SCHEME_LIST[0] if doc_dict['document_scheme'] == consts.DOC_SCHEME_LIST[0]\ | 1077 | tmp_scheme = consts.COMPARE_DOC_SCHEME_LIST[0] if doc_dict['document_scheme'] == consts.DOC_SCHEME_LIST[0] \ |
| 978 | else consts.COMPARE_DOC_SCHEME_LIST[1] | 1078 | else consts.COMPARE_DOC_SCHEME_LIST[1] |
| 979 | application_link = '{0}/showList/showList?entity={1}&scheme={2}&case_id={3}'.format( | 1079 | application_link = '{0}/showList/showList?entity={1}&scheme={2}&case_id={3}'.format( |
| 980 | conf.BASE_URL, prefix, tmp_scheme, doc_dict['application_id']) | 1080 | conf.BASE_URL, prefix, tmp_scheme, doc_dict['application_id']) |
| ... | @@ -1021,7 +1121,6 @@ class DocView(GenericView, DocHandler): | ... | @@ -1021,7 +1121,6 @@ class DocView(GenericView, DocHandler): |
| 1021 | # os.remove(tmp_save_path) | 1121 | # os.remove(tmp_save_path) |
| 1022 | # raise self.invalid_params(msg='invalid params: PDF file XSS') | 1122 | # raise self.invalid_params(msg='invalid params: PDF file XSS') |
| 1023 | 1123 | ||
| 1024 | |||
| 1025 | file.close() | 1124 | file.close() |
| 1026 | # 1. 上传信息记录 | 1125 | # 1. 上传信息记录 |
| 1027 | application_id = '{0}{1}'.format(consts.FIXED_APPLICATION_ID_PREFIX, metadata_version_id) | 1126 | application_id = '{0}{1}'.format(consts.FIXED_APPLICATION_ID_PREFIX, metadata_version_id) |
| ... | @@ -1104,7 +1203,8 @@ class CompareResultView(GenericView): | ... | @@ -1104,7 +1203,8 @@ class CompareResultView(GenericView): |
| 1104 | latest_compared_time = '' | 1203 | latest_compared_time = '' |
| 1105 | else: | 1204 | else: |
| 1106 | whole_result = consts.RESULT_Y if result_obj.ocr_auto_result_pass else consts.RESULT_N | 1205 | whole_result = consts.RESULT_Y if result_obj.ocr_auto_result_pass else consts.RESULT_N |
| 1107 | latest_compared_time = '' if result_obj.ocr_latest_comparison_time is None else result_obj.ocr_latest_comparison_time.strftime('%Y-%m-%d %H:%M') | 1206 | latest_compared_time = '' if result_obj.ocr_latest_comparison_time is None else result_obj.ocr_latest_comparison_time.strftime( |
| 1207 | '%Y-%m-%d %H:%M') | ||
| 1108 | 1208 | ||
| 1109 | source = consts.INFO_SOURCE[1] | 1209 | source = consts.INFO_SOURCE[1] |
| 1110 | version = comments = '' | 1210 | version = comments = '' |
| ... | @@ -1120,7 +1220,8 @@ class CompareResultView(GenericView): | ... | @@ -1120,7 +1220,8 @@ class CompareResultView(GenericView): |
| 1120 | 'source': source, | 1220 | 'source': source, |
| 1121 | 'version': version, | 1221 | 'version': version, |
| 1122 | 'comments': comments, | 1222 | 'comments': comments, |
| 1123 | 'result': [] if result_obj is None or not result_obj.ocr_auto_result else json.loads(result_obj.ocr_auto_result) | 1223 | 'result': [] if result_obj is None or not result_obj.ocr_auto_result else json.loads( |
| 1224 | result_obj.ocr_auto_result) | ||
| 1124 | } | 1225 | } |
| 1125 | 1226 | ||
| 1126 | return response.ok(data=compare_result) | 1227 | return response.ok(data=compare_result) |
| ... | @@ -1155,7 +1256,8 @@ class CompareResultView(GenericView): | ... | @@ -1155,7 +1256,8 @@ class CompareResultView(GenericView): |
| 1155 | 'id': 0 if result_obj is None else result_obj.id, | 1256 | 'id': 0 if result_obj is None else result_obj.id, |
| 1156 | 'application_id': case_id, | 1257 | 'application_id': case_id, |
| 1157 | 'entity': entity, | 1258 | 'entity': entity, |
| 1158 | 'scheme': consts.DOC_SCHEME_LIST[0] if scheme == consts.COMPARE_DOC_SCHEME_LIST[0] else consts.DOC_SCHEME_LIST[1], | 1259 | 'scheme': consts.DOC_SCHEME_LIST[0] if scheme == consts.COMPARE_DOC_SCHEME_LIST[0] else |
| 1260 | consts.DOC_SCHEME_LIST[1], | ||
| 1159 | 'whole_result': whole_result, | 1261 | 'whole_result': whole_result, |
| 1160 | 'latest_compared_time': '' if result_obj is None else result_obj.update_time.strftime('%Y-%m-%d %H:%M'), | 1262 | 'latest_compared_time': '' if result_obj is None else result_obj.update_time.strftime('%Y-%m-%d %H:%M'), |
| 1161 | 'source': source, | 1263 | 'source': source, |
| ... | @@ -1328,7 +1430,8 @@ class SECMSView(GenericView): | ... | @@ -1328,7 +1430,8 @@ class SECMSView(GenericView): |
| 1328 | 1430 | ||
| 1329 | args = request.data | 1431 | args = request.data |
| 1330 | cms_info = args.get('content', {}) | 1432 | cms_info = args.get('content', {}) |
| 1331 | business_type = consts.AFC_PREFIX if cms_info.get('financeCompany', '').startswith('宝马') else consts.HIL_PREFIX | 1433 | business_type = consts.AFC_PREFIX if cms_info.get('financeCompany', '').startswith( |
| 1434 | '宝马') else consts.HIL_PREFIX | ||
| 1332 | src_application_id = cms_info.get('settlemnetVerification', {}).get('applicationNo', '') | 1435 | src_application_id = cms_info.get('settlemnetVerification', {}).get('applicationNo', '') |
| 1333 | application_id = src_application_id[:src_application_id.rfind('-')] | 1436 | application_id = src_application_id[:src_application_id.rfind('-')] |
| 1334 | 1437 | ||
| ... | @@ -1363,6 +1466,32 @@ class SECMSView(GenericView): | ... | @@ -1363,6 +1466,32 @@ class SECMSView(GenericView): |
| 1363 | content=content_str, | 1466 | content=content_str, |
| 1364 | ) | 1467 | ) |
| 1365 | 1468 | ||
| 1469 | # 检查是否fsm流程(SE) | ||
| 1470 | fsm_contract = cms_info.get('FSMContract', False) | ||
| 1471 | fsm_best_price = cms_info.get('FSMBestPrice', False) | ||
| 1472 | if fsm_contract: | ||
| 1473 | # 记录fsm 流程的cms 提交 | ||
| 1474 | try: | ||
| 1475 | cms_status_class = HILCmsStatusInfo if business_type in consts.HIL_SET else AFCCmsStatusInfo | ||
| 1476 | cms_status_info = cms_status_class.objects.filter(application_id=application_id).first() | ||
| 1477 | if cms_status_info: | ||
| 1478 | cms_status_info.is_fsm = 1 | ||
| 1479 | cms_status_info.update_time = datetime.datetime.now() | ||
| 1480 | cms_status_info.save() | ||
| 1481 | else: | ||
| 1482 | cms_status_info = cms_status_class() | ||
| 1483 | cms_status_info.application_id = application_id | ||
| 1484 | cms_status_info.business_type = business_type | ||
| 1485 | cms_status_info.is_fsm = 1 | ||
| 1486 | cms_status_info.update_time = datetime.datetime.now() | ||
| 1487 | cms_status_info.create_time = datetime.datetime.now() | ||
| 1488 | cms_status_info.save() | ||
| 1489 | except Exception as e: | ||
| 1490 | self.exception_log.exception( | ||
| 1491 | '[cms view] [cms_status_info db save failed] [error={0}]'.format(traceback.format_exc())) | ||
| 1492 | fsm_compare.apply_async((application_id, business_type, None, None, False, True), | ||
| 1493 | queue='queue_compare') | ||
| 1494 | else: | ||
| 1366 | # 触发比对 | 1495 | # 触发比对 |
| 1367 | compare.apply_async((application_id, business_type, None, None, False, True), | 1496 | compare.apply_async((application_id, business_type, None, None, False, True), |
| 1368 | queue='queue_compare') | 1497 | queue='queue_compare') |
| ... | @@ -1458,7 +1587,7 @@ class AutoSettlementView(GenericView): | ... | @@ -1458,7 +1587,7 @@ class AutoSettlementView(GenericView): |
| 1458 | whole_result_query = Q(ocr_whole_result_pass=whole_result) if not isinstance(whole_result, str) else Q() | 1587 | whole_result_query = Q(ocr_whole_result_pass=whole_result) if not isinstance(whole_result, str) else Q() |
| 1459 | rpa_result_query = Q(rpa_result=rpa_result) if not isinstance(rpa_result, str) else Q() | 1588 | rpa_result_query = Q(rpa_result=rpa_result) if not isinstance(rpa_result, str) else Q() |
| 1460 | time1_query = Q(rpa_get_case_from_ocr_time__gte=get_case_from_ocr_time_start, | 1589 | time1_query = Q(rpa_get_case_from_ocr_time__gte=get_case_from_ocr_time_start, |
| 1461 | rpa_get_case_from_ocr_time__lt=get_case_from_ocr_time_end + datetime.timedelta(days=1))\ | 1590 | rpa_get_case_from_ocr_time__lt=get_case_from_ocr_time_end + datetime.timedelta(days=1)) \ |
| 1462 | if get_case_from_ocr_time_start is not None and get_case_from_ocr_time_end is not None else Q() | 1591 | if get_case_from_ocr_time_start is not None and get_case_from_ocr_time_end is not None else Q() |
| 1463 | time2_query = Q(rpa_activated_time__gte=activated_time_start, | 1592 | time2_query = Q(rpa_activated_time__gte=activated_time_start, |
| 1464 | rpa_activated_time__lt=activated_time_end + datetime.timedelta(days=1)) \ | 1593 | rpa_activated_time__lt=activated_time_end + datetime.timedelta(days=1)) \ | ... | ... |
| ... | @@ -7,6 +7,9 @@ import traceback | ... | @@ -7,6 +7,9 @@ import traceback |
| 7 | import numpy as np | 7 | import numpy as np |
| 8 | from datetime import datetime, timedelta | 8 | from datetime import datetime, timedelta |
| 9 | from collections import OrderedDict | 9 | from collections import OrderedDict |
| 10 | |||
| 11 | import requests | ||
| 12 | |||
| 10 | from . import app | 13 | from . import app |
| 11 | from settings import conf | 14 | from settings import conf |
| 12 | from apps.doc.models import ( | 15 | from apps.doc.models import ( |
| ... | @@ -3266,6 +3269,33 @@ def se_compare(application_id, application_entity, ocr_res_id, last_obj, ocr_res | ... | @@ -3266,6 +3269,33 @@ def se_compare(application_id, application_entity, ocr_res_id, last_obj, ocr_res |
| 3266 | 3269 | ||
| 3267 | 3270 | ||
| 3268 | @app.task | 3271 | @app.task |
| 3272 | def fsm_compare(application_id, application_entity, uniq_seq, ocr_res_id, is_ca=True, is_cms=False): | ||
| 3273 | compare_log.info('{0} [receive fsm task] [entity={1}] [id={2}] [uniq_seq={3}] [ocr_res_id={4}] [is_ca={5}] ' | ||
| 3274 | '[is_cms={6}]'.format(log_base, application_entity, application_id, uniq_seq, ocr_res_id, | ||
| 3275 | is_ca, is_cms)) | ||
| 3276 | # 调用java fsm 比对流程接口(http) | ||
| 3277 | # 调用Java fsm 比对流程接口, fsm 是se流程, ca可以暂时忽略 | ||
| 3278 | url = conf.FSM_URL | ||
| 3279 | body = { | ||
| 3280 | 'applicationId': application_id, | ||
| 3281 | 'businessType': application_entity, | ||
| 3282 | 'ocrResId': ocr_res_id, | ||
| 3283 | 'isCa': is_ca, | ||
| 3284 | 'isCms': is_cms | ||
| 3285 | } | ||
| 3286 | try: | ||
| 3287 | compare_log.info("request java fsm api, url:{0}, body:{1}".format(url, json.dumps(body))) | ||
| 3288 | headers = { | ||
| 3289 | 'Content-Type': 'application/json' | ||
| 3290 | } | ||
| 3291 | resp = requests.post(url, headers=headers, json=body) | ||
| 3292 | compare_log.info("response from fsm api, resp:{0}".format(resp.text)) | ||
| 3293 | except Exception as e: | ||
| 3294 | compare_log.error("fsm full request to java error, url:{0}, param:{1}, errorMsg:{2}".format( | ||
| 3295 | url, json.dumps(body), traceback.format_exc())) | ||
| 3296 | |||
| 3297 | |||
| 3298 | @app.task | ||
| 3269 | def compare(application_id, application_entity, uniq_seq, ocr_res_id, is_ca=True, is_cms=False): | 3299 | def compare(application_id, application_entity, uniq_seq, ocr_res_id, is_ca=True, is_cms=False): |
| 3270 | # POS: application_id, application_entity, uniq_seq, None | 3300 | # POS: application_id, application_entity, uniq_seq, None |
| 3271 | # OCR: application_id, business_type(application_entity), None, ocr_res_id | 3301 | # OCR: application_id, business_type(application_entity), None, ocr_res_id | ... | ... |
| ... | @@ -6,6 +6,7 @@ | ... | @@ -6,6 +6,7 @@ |
| 6 | # @Description : | 6 | # @Description : |
| 7 | 7 | ||
| 8 | from .get_char import Finder | 8 | from .get_char import Finder |
| 9 | from .get_char_fsm import Finder as FSMFinder | ||
| 9 | import numpy as np | 10 | import numpy as np |
| 10 | 11 | ||
| 11 | 12 | ||
| ... | @@ -23,7 +24,7 @@ def extract_info(ocr_results): | ... | @@ -23,7 +24,7 @@ def extract_info(ocr_results): |
| 23 | return {'page_1': {'合同编号': contract_no}} | 24 | return {'page_1': {'合同编号': contract_no}} |
| 24 | 25 | ||
| 25 | 26 | ||
| 26 | def predict(pdf_info, is_qrs=False): | 27 | def predict(pdf_info, is_qrs=False, is_fsm=False): |
| 27 | ocr_results = {} | 28 | ocr_results = {} |
| 28 | for pno in pdf_info: | 29 | for pno in pdf_info: |
| 29 | ocr_results[pno] = {} | 30 | ocr_results[pno] = {} |
| ... | @@ -50,6 +51,9 @@ def predict(pdf_info, is_qrs=False): | ... | @@ -50,6 +51,9 @@ def predict(pdf_info, is_qrs=False): |
| 50 | results = extract_info(ocr_results) | 51 | results = extract_info(ocr_results) |
| 51 | else: | 52 | else: |
| 52 | # 输入是整个 PDF 中的信息 | 53 | # 输入是整个 PDF 中的信息 |
| 54 | if is_fsm: | ||
| 55 | f = FSMFinder(pdf_info, ocr_results=ocr_results) | ||
| 56 | else: | ||
| 53 | f = Finder(pdf_info, ocr_results=ocr_results) | 57 | f = Finder(pdf_info, ocr_results=ocr_results) |
| 54 | results = f.get_info() | 58 | results = f.get_info() |
| 55 | return results | 59 | return results | ... | ... |
| 1 | import re | ||
| 2 | import numpy as np | ||
| 3 | from fuzzywuzzy import fuzz | ||
| 4 | from shapely.geometry import Polygon | ||
| 5 | |||
| 6 | |||
| 7 | class Finder: | ||
| 8 | |||
| 9 | def __init__(self, pdf_info, ocr_results): | ||
| 10 | self.pdf_info = pdf_info | ||
| 11 | self.ocr_results = ocr_results | ||
| 12 | self.is_asp = False | ||
| 13 | self.item = {"words": None, | ||
| 14 | "position": None, | ||
| 15 | } | ||
| 16 | |||
| 17 | def gen_init_result(self, is_asp): | ||
| 18 | # 格式化算法输出 | ||
| 19 | self.init_result = {"page_1": {"合同编号": self.item, | ||
| 20 | "所购车辆价格": self.item, | ||
| 21 | "车架号": self.item, | ||
| 22 | "贷款本金金额": {"大写": self.item, | ||
| 23 | "小写": self.item, | ||
| 24 | "车辆贷款本金金额": self.item, | ||
| 25 | "附加产品融资贷款本金总金额": self.item, | ||
| 26 | }, | ||
| 27 | "贷款期限": self.item, | ||
| 28 | "附加产品融资贷款本金总金额明细": self.item, | ||
| 29 | "借款人签字及时间": self.item, | ||
| 30 | }, | ||
| 31 | "page_2": {"合同编号": self.item, | ||
| 32 | "借款人及抵押人": {"name": self.item, | ||
| 33 | "id": self.item, | ||
| 34 | }, | ||
| 35 | "共同借款人及共同抵押人": {"name": self.item, | ||
| 36 | "id": self.item, | ||
| 37 | }, | ||
| 38 | "保证人1": {"name": self.item, | ||
| 39 | "id": self.item, | ||
| 40 | }, | ||
| 41 | "保证人2": {"name": self.item, | ||
| 42 | "id": self.item, | ||
| 43 | }, | ||
| 44 | "所购车辆价格": self.item, | ||
| 45 | "车架号": self.item, | ||
| 46 | "经销商": self.item, | ||
| 47 | "贷款本金金额": {"大写": self.item, | ||
| 48 | "小写": self.item, | ||
| 49 | "车辆贷款本金金额": self.item, | ||
| 50 | "附加产品融资贷款本金总金额": self.item, | ||
| 51 | }, | ||
| 52 | "贷款期限": self.item, | ||
| 53 | "标准利率": self.item, | ||
| 54 | "借款人收款账户": {"账号": self.item, | ||
| 55 | "户名": self.item, | ||
| 56 | "开户行": self.item, | ||
| 57 | }, | ||
| 58 | "还款账户": {"账号": self.item, | ||
| 59 | "户名": self.item, | ||
| 60 | "开户行": self.item, | ||
| 61 | }, | ||
| 62 | }, | ||
| 63 | "page_3": {"合同编号": self.item, | ||
| 64 | "还款计划表": self.item, | ||
| 65 | "车辆代理商": self.item, | ||
| 66 | }, | ||
| 67 | "page_4": {"合同编号": self.item, | ||
| 68 | "附加产品融资贷款本金总金额明细": self.item, | ||
| 69 | }, | ||
| 70 | "page_5": {"合同编号": self.item, | ||
| 71 | }, | ||
| 72 | "page_6": {"合同编号": self.item, | ||
| 73 | }, | ||
| 74 | } | ||
| 75 | if self.is_asp: | ||
| 76 | self.init_result["page_7"] = {"合同编号": self.item, | ||
| 77 | } | ||
| 78 | self.init_result["page_8"] = {"合同编号": self.item, | ||
| 79 | "主借人签字": {"签字": self.item, | ||
| 80 | "日期": self.item, | ||
| 81 | }, | ||
| 82 | "共借人签字": {"签字": self.item, | ||
| 83 | "日期": self.item, | ||
| 84 | }, | ||
| 85 | "保证人1签字": {"签字": self.item, | ||
| 86 | "日期": self.item, | ||
| 87 | }, | ||
| 88 | "保证人2签字": {"签字": self.item, | ||
| 89 | "日期": self.item, | ||
| 90 | }, | ||
| 91 | "见证人签字": {"签字": self.item, | ||
| 92 | "日期": self.item, | ||
| 93 | }, | ||
| 94 | } | ||
| 95 | else: | ||
| 96 | self.init_result["page_7"] = {"合同编号": self.item, | ||
| 97 | "主借人签字": {"签字": self.item, | ||
| 98 | "日期": self.item, | ||
| 99 | }, | ||
| 100 | "共借人签字": {"签字": self.item, | ||
| 101 | "日期": self.item, | ||
| 102 | }, | ||
| 103 | "保证人1签字": {"签字": self.item, | ||
| 104 | "日期": self.item, | ||
| 105 | }, | ||
| 106 | "保证人2签字": {"签字": self.item, | ||
| 107 | "日期": self.item, | ||
| 108 | }, | ||
| 109 | "见证人签字": {"签字": self.item, | ||
| 110 | "日期": self.item, | ||
| 111 | }, | ||
| 112 | } | ||
| 113 | |||
| 114 | |||
| 115 | def get_top_iou(self, poly, ocr_result): | ||
| 116 | """传入一个多边形, 找到与之最匹配的多边形 | ||
| 117 | |||
| 118 | Args: | ||
| 119 | poly (TYPE): Description | ||
| 120 | """ | ||
| 121 | iou_list = [] | ||
| 122 | for key in ocr_result: | ||
| 123 | bbox, text = ocr_result[key] | ||
| 124 | g = Polygon(np.array(bbox).reshape((-1, 2))) | ||
| 125 | p = Polygon(np.array(poly).reshape((-1, 2))) | ||
| 126 | if not g.is_valid or not p.is_valid: | ||
| 127 | continue | ||
| 128 | inter = Polygon(g).intersection(Polygon(p)).area | ||
| 129 | union = g.area + p.area - inter | ||
| 130 | iou = inter/union | ||
| 131 | iou_list.append([iou, key]) | ||
| 132 | if len(iou_list) == 0: | ||
| 133 | return -1, -1 | ||
| 134 | top_iou = sorted(iou_list, key=lambda x: x[0])[-1] | ||
| 135 | return top_iou | ||
| 136 | |||
| 137 | def poly_to_rectangle(self, poly): | ||
| 138 | xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax = poly | ||
| 139 | bbox = [xmin, ymin, xmax, ymax] | ||
| 140 | return bbox | ||
| 141 | |||
| 142 | def get_contract_no(self, page_num): | ||
| 143 | """传入页码,查看该页码右上角的编号 | ||
| 144 | |||
| 145 | Args: | ||
| 146 | page_num (string): | ||
| 147 | |||
| 148 | Returns: | ||
| 149 | sting: | ||
| 150 | """ | ||
| 151 | contract_no = self.item.copy() | ||
| 152 | # contract_no['words'] = '' | ||
| 153 | # contract_no['position'] = [-1, -1, -1, -1] | ||
| 154 | # 只看第一页 | ||
| 155 | for key in self.ocr_results[page_num]: | ||
| 156 | bbox, text = self.ocr_results[page_num][key] | ||
| 157 | if '合同编号:' in text: | ||
| 158 | words = text.split(':')[-1] | ||
| 159 | location = self.poly_to_rectangle(bbox) | ||
| 160 | contract_no['words'] = words | ||
| 161 | contract_no['position'] = location | ||
| 162 | return contract_no | ||
| 163 | |||
| 164 | def get_vehicle_price(self, page_num='0'): | ||
| 165 | vehicle_price = self.item.copy() | ||
| 166 | # vehicle_price['words'] = '' | ||
| 167 | # vehicle_price['position'] = [-1, -1, -1, -1] | ||
| 168 | for key in self.ocr_results[page_num]: | ||
| 169 | bbox, text = self.ocr_results[page_num][key] | ||
| 170 | if '所购车辆价格为人民币' in text: | ||
| 171 | words = text.split('币')[-1] | ||
| 172 | location = self.poly_to_rectangle(bbox) | ||
| 173 | vehicle_price['words'] = words | ||
| 174 | vehicle_price['position'] = location | ||
| 175 | return vehicle_price | ||
| 176 | |||
| 177 | def get_vin(self, page_num='0'): | ||
| 178 | vin = self.item.copy() | ||
| 179 | # vin['words'] = '' | ||
| 180 | # vin['position'] = [-1, -1, -1, -1] | ||
| 181 | for key in self.ocr_results[page_num]: | ||
| 182 | bbox, text = self.ocr_results[page_num][key] | ||
| 183 | if '车架号:' in text: | ||
| 184 | words = text.split(':')[-1] | ||
| 185 | location = self.poly_to_rectangle(bbox) | ||
| 186 | vin['words'] = words | ||
| 187 | vin['position'] = location | ||
| 188 | return vin | ||
| 189 | |||
| 190 | def get_loan_principal(self, page_num='0'): | ||
| 191 | chinese_keywords = ['壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖', '拾', | ||
| 192 | '佰', '仟', '万', '亿', '元', '角', '分', '零', '整'] | ||
| 193 | upper = self.item.copy() | ||
| 194 | lower = self.item.copy() | ||
| 195 | asp_1 = self.item.copy() | ||
| 196 | asp_2 = self.item.copy() | ||
| 197 | anchor_bbox = None | ||
| 198 | for block in self.pdf_info[page_num]['blocks']: | ||
| 199 | if block['type'] != 0: | ||
| 200 | continue | ||
| 201 | for line in block['lines']: | ||
| 202 | for span in line['spans']: | ||
| 203 | bbox, text = span['bbox'], span['text'] | ||
| 204 | if fuzz.ratio(''.join(chinese_keywords), text) > 15: | ||
| 205 | text = text.split(':')[-1].strip() | ||
| 206 | upper['position'] = bbox | ||
| 207 | upper['words'] = text | ||
| 208 | if '小写:¥' in text: | ||
| 209 | words = text.split('¥')[-1].strip() | ||
| 210 | lower['position'] = bbox | ||
| 211 | lower['words'] = words | ||
| 212 | if '附加产品融资贷款本金总金额' == text: | ||
| 213 | anchor_bbox = bbox | ||
| 214 | if anchor_bbox: | ||
| 215 | for block in self.pdf_info[page_num]['blocks']: | ||
| 216 | if block['type'] != 0: | ||
| 217 | continue | ||
| 218 | for line in block['lines']: | ||
| 219 | for span in line['spans']: | ||
| 220 | bbox, text = span['bbox'], span['text'] | ||
| 221 | if np.mean(bbox[1::2]) < np.mean(anchor_bbox[1::2]) and '人民币:小写:' in text: | ||
| 222 | words = re.findall(r'人民币:小写:\[(.*)\]', text)[0] | ||
| 223 | asp_1['position'] = bbox | ||
| 224 | asp_1['words'] = words | ||
| 225 | if np.mean(bbox[1::2]) > np.mean(anchor_bbox[1::2]) and '人民币:小写:' in text: | ||
| 226 | words = re.findall(r'人民币:小写:\[(.*)\]', text)[0] | ||
| 227 | asp_2['position'] = bbox | ||
| 228 | asp_2['words'] = words | ||
| 229 | return upper, lower, asp_1, asp_2 | ||
| 230 | |||
| 231 | def get_loan_term(self, page_num='0'): | ||
| 232 | loan_term = self.item.copy() | ||
| 233 | all_text = '' | ||
| 234 | for block in self.pdf_info[page_num]['blocks']: | ||
| 235 | if block['type'] != 0: | ||
| 236 | continue | ||
| 237 | for line in block['lines']: | ||
| 238 | for span in line['spans']: | ||
| 239 | bbox, text = span['bbox'], span['text'] | ||
| 240 | all_text += text | ||
| 241 | matchs = re.search(r'贷款期限(\d+)个月', all_text) | ||
| 242 | if matchs: | ||
| 243 | words = matchs.group(1) | ||
| 244 | for block in self.pdf_info[page_num]['blocks']: | ||
| 245 | if block['type'] != 0: | ||
| 246 | continue | ||
| 247 | for line in block['lines']: | ||
| 248 | for span in line['spans']: | ||
| 249 | bbox, text = span['bbox'], span['text'] | ||
| 250 | if f'{words}个月' in text: | ||
| 251 | loan_term['position'] = bbox | ||
| 252 | loan_term['words'] = words | ||
| 253 | return loan_term | ||
| 254 | |||
| 255 | def get_standard_rate(self, page_num='0'): | ||
| 256 | standard_rate = self.item.copy() | ||
| 257 | for block in self.pdf_info[page_num]['blocks']: | ||
| 258 | if block['type'] != 0: | ||
| 259 | continue | ||
| 260 | for line in block['lines']: | ||
| 261 | for span in line['spans']: | ||
| 262 | bbox, text = span['bbox'], span['text'] | ||
| 263 | matchs = re.search(r'本合同当期的标准利率为(\S+)%/年', text) | ||
| 264 | if matchs: | ||
| 265 | standard_rate['position'] = bbox | ||
| 266 | standard_rate['words'] = matchs.group(1) | ||
| 267 | return standard_rate | ||
| 268 | |||
| 269 | def mergelist(self, text_list): | ||
| 270 | pattern = re.compile("[^\u4e00-\u9fa5]") # 匹配不是中文的其他字符 | ||
| 271 | mergeindex = -1 | ||
| 272 | for index, i in enumerate(text_list): | ||
| 273 | if '所购' in i and len(pattern.sub('', pattern.sub('', text_list[index+1]))) != 0: | ||
| 274 | # if '所购' in i and '.00' not in text_list[index+1]: | ||
| 275 | mergeindex = index | ||
| 276 | if mergeindex == -1: | ||
| 277 | return text_list | ||
| 278 | else: | ||
| 279 | new_text_list = text_list[:mergeindex] + [text_list[mergeindex] + text_list[mergeindex+1]] + text_list[mergeindex+2:] | ||
| 280 | return self.mergelist(new_text_list) | ||
| 281 | |||
| 282 | def get_asp_details(self, page_num): | ||
| 283 | asp_details_table_term = self.item.copy() | ||
| 284 | |||
| 285 | asp_details_table = [['附加产品融资贷款本金总金额及贷款利率明细'], ['项目1', '用途总金额2', '贷款本金3']] | ||
| 286 | |||
| 287 | bbox_xm = None | ||
| 288 | bbox_ytzje = None | ||
| 289 | bbox_dkbj = None | ||
| 290 | bbox_total = None | ||
| 291 | for key in self.ocr_results[page_num]: | ||
| 292 | bbox, text = self.ocr_results[page_num][key] | ||
| 293 | if text == '项目1': | ||
| 294 | bbox_xm = bbox | ||
| 295 | if text == '用途总金额2': | ||
| 296 | bbox_ytzje = bbox | ||
| 297 | if text == '贷款本金3': | ||
| 298 | bbox_dkbj = bbox | ||
| 299 | if text in ['附加产品融资贷款本', '附加产品融资贷款本金', '附加产品融资贷']: | ||
| 300 | bbox_total = bbox | ||
| 301 | |||
| 302 | if bbox_xm: | ||
| 303 | for i in range(10): | ||
| 304 | rh = abs(bbox_xm[1]-bbox_xm[-1]) | ||
| 305 | anchor = np.array(bbox_xm).reshape((-1 ,2)) | ||
| 306 | anchor[:, 1] += int(rh*1.4) | ||
| 307 | _iou, _key = self.get_top_iou(poly=anchor, ocr_result=self.ocr_results[page_num]) | ||
| 308 | if _iou > 0: | ||
| 309 | bbox, xm_text = self.ocr_results[page_num][_key] | ||
| 310 | bbox_xm = bbox | ||
| 311 | # 解决项目内容是两行的问题 | ||
| 312 | if not '所购' in xm_text: | ||
| 313 | line = asp_details_table[-1] | ||
| 314 | line[0] += xm_text | ||
| 315 | asp_details_table[-1] = line | ||
| 316 | continue | ||
| 317 | # print(xm_text) | ||
| 318 | anchor_1 = [bbox_ytzje[0], bbox[1], bbox_ytzje[2], bbox[3], | ||
| 319 | bbox_ytzje[4], bbox[5], bbox_ytzje[6], bbox[7]] | ||
| 320 | _iou, _key = self.get_top_iou(poly=anchor_1, ocr_result=self.ocr_results[page_num]) | ||
| 321 | bbox, ytzje_text = self.ocr_results[page_num][_key] | ||
| 322 | # print(ytzje_text) | ||
| 323 | anchor_2 = [bbox_dkbj[0], bbox[1], bbox_dkbj[2], bbox[3], | ||
| 324 | bbox_dkbj[4], bbox[5], bbox_dkbj[6], bbox[7]] | ||
| 325 | _iou, _key = self.get_top_iou(poly=anchor_2, ocr_result=self.ocr_results[page_num]) | ||
| 326 | bbox, dkbj_text = self.ocr_results[page_num][_key] | ||
| 327 | # print(dkbj_text) | ||
| 328 | if xm_text == ytzje_text: | ||
| 329 | xm_text, ytzje_text = xm_text.split(' ') | ||
| 330 | line = [xm_text, ytzje_text, dkbj_text] | ||
| 331 | asp_details_table.append(line) | ||
| 332 | else: | ||
| 333 | break | ||
| 334 | |||
| 335 | if bbox_total: | ||
| 336 | anchor = [bbox_dkbj[0], bbox_total[1], bbox_dkbj[2], bbox_total[3], | ||
| 337 | bbox_dkbj[4], bbox_total[5], bbox_dkbj[6], bbox_total[7]] | ||
| 338 | _iou, _key = self.get_top_iou(poly=anchor, ocr_result=self.ocr_results[page_num]) | ||
| 339 | bbox, total_text = self.ocr_results[page_num][_key] | ||
| 340 | asp_details_table.append(['附加产品融资贷款本金总金额:', '', total_text]) | ||
| 341 | asp_details_table_term['words'] = asp_details_table | ||
| 342 | |||
| 343 | return asp_details_table_term | ||
| 344 | |||
| 345 | def get_signature(self): | ||
| 346 | signature = self.item.copy() | ||
| 347 | |||
| 348 | for block in self.pdf_info['0']['blocks']: | ||
| 349 | if block['type'] != 0: | ||
| 350 | continue | ||
| 351 | for line in block['lines']: | ||
| 352 | for span in line['spans']: | ||
| 353 | bbox, text = span['bbox'], span['text'] | ||
| 354 | if '签署日期' in text: | ||
| 355 | words = text | ||
| 356 | signature['words'] = words | ||
| 357 | signature['position'] = bbox | ||
| 358 | return signature | ||
| 359 | |||
| 360 | def get_somebody(self, top, bottom): | ||
| 361 | # 指定上下边界后,返回上下边界内的客户信息 | ||
| 362 | _name = self.item.copy() | ||
| 363 | _id = self.item.copy() | ||
| 364 | # 只看第一页,先划定上下边界 | ||
| 365 | y_top = 0 | ||
| 366 | y_bottom = 0 | ||
| 367 | for block in self.pdf_info['1']['blocks']: | ||
| 368 | if block['type'] != 0: | ||
| 369 | continue | ||
| 370 | for line in block['lines']: | ||
| 371 | for span in line['spans']: | ||
| 372 | bbox, text = span['bbox'], span['text'] | ||
| 373 | if top in text: | ||
| 374 | y_top = bbox[3] | ||
| 375 | if bottom in text: | ||
| 376 | y_bottom = bbox[3] | ||
| 377 | for block in self.pdf_info['1']['blocks']: | ||
| 378 | if block['type'] != 0: | ||
| 379 | continue | ||
| 380 | for line in block['lines']: | ||
| 381 | for span in line['spans']: | ||
| 382 | bbox, text = span['bbox'], span['text'] | ||
| 383 | if y_top < bbox[3] < y_bottom: | ||
| 384 | # print(top, bottom, text) | ||
| 385 | if '姓名/名称' in text: | ||
| 386 | words = text.split(':')[-1] | ||
| 387 | _name['position'] = bbox | ||
| 388 | _name['words'] = words | ||
| 389 | if '自然人身份证件号码/法人执照号码' in text: | ||
| 390 | words = text.split(':')[-1] | ||
| 391 | _id['position'] = bbox | ||
| 392 | _id['words'] = words | ||
| 393 | return _name, _id | ||
| 394 | |||
| 395 | def get_seller(self): | ||
| 396 | seller = self.item.copy() | ||
| 397 | # 先找到 key | ||
| 398 | anchor_bbox = None | ||
| 399 | for block in self.pdf_info['1']['blocks']: | ||
| 400 | if block['type'] != 0: | ||
| 401 | continue | ||
| 402 | for line in block['lines']: | ||
| 403 | for span in line['spans']: | ||
| 404 | bbox, text = span['bbox'], span['text'] | ||
| 405 | if text in ['经销商', '车辆销售方']: | ||
| 406 | anchor_bbox = bbox | ||
| 407 | # 当找到了 key, 则根据 key 去匹配 value | ||
| 408 | if anchor_bbox: | ||
| 409 | half_width = self.pdf_info['1']['width'] * 0.5 | ||
| 410 | for block in self.pdf_info['1']['blocks']: | ||
| 411 | if block['type'] != 0: | ||
| 412 | continue | ||
| 413 | for line in block['lines']: | ||
| 414 | for span in line['spans']: | ||
| 415 | bbox, text = span['bbox'], span['text'] | ||
| 416 | if anchor_bbox[2]<np.mean(bbox[::2])<half_width and \ | ||
| 417 | anchor_bbox[1]<np.mean(bbox[1::2])<anchor_bbox[3]: | ||
| 418 | seller['position'] = bbox | ||
| 419 | seller['words'] = text | ||
| 420 | return seller | ||
| 421 | |||
| 422 | def get_cldls(self): | ||
| 423 | seller = self.item.copy() | ||
| 424 | # 先找到 key | ||
| 425 | anchor_bbox = None | ||
| 426 | for block in self.pdf_info['2']['blocks']: | ||
| 427 | if anchor_bbox is not None: | ||
| 428 | break | ||
| 429 | if block['type'] != 0: | ||
| 430 | continue | ||
| 431 | for line in block['lines']: | ||
| 432 | if anchor_bbox is not None: | ||
| 433 | break | ||
| 434 | for span in line['spans']: | ||
| 435 | bbox, text = span['bbox'], span['text'] | ||
| 436 | if text.strip() == '车辆代理商': | ||
| 437 | anchor_bbox = bbox | ||
| 438 | # print(anchor_bbox) | ||
| 439 | # 当找到了 key, 则根据 key 去匹配 value | ||
| 440 | if anchor_bbox: | ||
| 441 | half_width = self.pdf_info['2']['width'] * 0.5 | ||
| 442 | for block in self.pdf_info['2']['blocks']: | ||
| 443 | if block['type'] != 0: | ||
| 444 | continue | ||
| 445 | for line in block['lines']: | ||
| 446 | for span in line['spans']: | ||
| 447 | bbox, text = span['bbox'], span['text'] | ||
| 448 | if anchor_bbox[2]<np.mean(bbox[::2])<half_width and \ | ||
| 449 | anchor_bbox[1]<np.mean(bbox[1::2])<anchor_bbox[3]: | ||
| 450 | seller['position'] = bbox | ||
| 451 | seller['words'] = text | ||
| 452 | return seller | ||
| 453 | return seller | ||
| 454 | |||
| 455 | def get_borrower_collection_account(self): | ||
| 456 | account = self.item.copy() | ||
| 457 | account_name = self.item.copy() | ||
| 458 | account_bank = self.item.copy() | ||
| 459 | all_text = '' | ||
| 460 | for block in self.pdf_info['1']['blocks']: | ||
| 461 | if block['type'] != 0: | ||
| 462 | continue | ||
| 463 | for line in block['lines']: | ||
| 464 | for span in line['spans']: | ||
| 465 | bbox, text = span['bbox'], span['text'] | ||
| 466 | all_text += text | ||
| 467 | # 首先确定账户信息是哪种,我们只输出非另行通知的格式 | ||
| 468 | if '借款人收款账户' in all_text: | ||
| 469 | all_text = all_text.replace(' ', '').replace(' ', '') | ||
| 470 | matchs_1 = re.findall(r'账号:(.*?)户名', all_text) | ||
| 471 | if matchs_1: | ||
| 472 | words = matchs_1[0] | ||
| 473 | for block in self.pdf_info['1']['blocks']: | ||
| 474 | if block['type'] != 0: | ||
| 475 | continue | ||
| 476 | for line in block['lines']: | ||
| 477 | for span in line['spans']: | ||
| 478 | bbox, text = span['bbox'], span['text'] | ||
| 479 | if f'{words}' in text: | ||
| 480 | account['position'] = bbox | ||
| 481 | account['words'] = words | ||
| 482 | matchs_2 = re.findall(r'户名:(.*?)开户行', all_text) | ||
| 483 | if matchs_2: | ||
| 484 | words = matchs_2[0] | ||
| 485 | for block in self.pdf_info['1']['blocks']: | ||
| 486 | if block['type'] != 0: | ||
| 487 | continue | ||
| 488 | for line in block['lines']: | ||
| 489 | for span in line['spans']: | ||
| 490 | bbox, text = span['bbox'], span['text'] | ||
| 491 | if f'{words}' in text: | ||
| 492 | account_name['position'] = bbox | ||
| 493 | account_name['words'] = words | ||
| 494 | matchs_3 = re.findall(r'开户行:(.*?)借款人', all_text) | ||
| 495 | if matchs_3: | ||
| 496 | words = matchs_3[0] | ||
| 497 | for block in self.pdf_info['1']['blocks']: | ||
| 498 | if block['type'] != 0: | ||
| 499 | continue | ||
| 500 | for line in block['lines']: | ||
| 501 | for span in line['spans']: | ||
| 502 | bbox, text = span['bbox'], span['text'] | ||
| 503 | if f'{words}' in text: | ||
| 504 | account_bank['position'] = bbox | ||
| 505 | account_bank['words'] = words | ||
| 506 | return account, account_name, account_bank | ||
| 507 | |||
| 508 | def get_payback_account(self): | ||
| 509 | account = self.item.copy() | ||
| 510 | account_name = self.item.copy() | ||
| 511 | account_bank = self.item.copy() | ||
| 512 | all_text = '' | ||
| 513 | for block in self.pdf_info['1']['blocks']: | ||
| 514 | if block['type'] != 0: | ||
| 515 | continue | ||
| 516 | for line in block['lines']: | ||
| 517 | for span in line['spans']: | ||
| 518 | bbox, text = span['bbox'], span['text'] | ||
| 519 | all_text += text | ||
| 520 | # 首先确定账户信息是哪种,我们只输出非另行通知的格式 | ||
| 521 | if '(13) 还款账户' in all_text: | ||
| 522 | all_text = all_text.split('(13) 还款账户')[-1] | ||
| 523 | all_text = all_text.replace(' ', '').replace(' ', '') | ||
| 524 | matchs_1 = re.findall(r'账号:(.*?)户名', all_text) | ||
| 525 | if matchs_1: | ||
| 526 | words = matchs_1[0] | ||
| 527 | for block in self.pdf_info['1']['blocks']: | ||
| 528 | if block['type'] != 0: | ||
| 529 | continue | ||
| 530 | for line in block['lines']: | ||
| 531 | for span in line['spans']: | ||
| 532 | bbox, text = span['bbox'], span['text'] | ||
| 533 | if f'{words}' in text: | ||
| 534 | account['position'] = bbox | ||
| 535 | account['words'] = words | ||
| 536 | matchs_2 = re.findall(r'户名:(.*?)开户行', all_text) | ||
| 537 | if matchs_2: | ||
| 538 | words = matchs_2[0] | ||
| 539 | for block in self.pdf_info['1']['blocks']: | ||
| 540 | if block['type'] != 0: | ||
| 541 | continue | ||
| 542 | for line in block['lines']: | ||
| 543 | for span in line['spans']: | ||
| 544 | bbox, text = span['bbox'], span['text'] | ||
| 545 | if f'{words}' in text: | ||
| 546 | account_name['position'] = bbox | ||
| 547 | account_name['words'] = words | ||
| 548 | matchs_3 = re.findall(r'开户行:(.*?);', all_text) | ||
| 549 | if matchs_3: | ||
| 550 | words = matchs_3[0] | ||
| 551 | for block in self.pdf_info['1']['blocks']: | ||
| 552 | if block['type'] != 0: | ||
| 553 | continue | ||
| 554 | for line in block['lines']: | ||
| 555 | for span in line['spans']: | ||
| 556 | bbox, text = span['bbox'], span['text'] | ||
| 557 | if f'开户行:{words};' in text.replace(' ', ''): | ||
| 558 | account_bank['position'] = bbox | ||
| 559 | account_bank['words'] = words | ||
| 560 | return account, account_name, account_bank | ||
| 561 | |||
| 562 | def get_repayment_schedule(self): | ||
| 563 | repayment_schedule = self.item.copy() | ||
| 564 | # 只看第二页 | ||
| 565 | repayment_schedule_table = [] | ||
| 566 | repayment_schedule_text_list = [] | ||
| 567 | table = False | ||
| 568 | for block in self.pdf_info['2']['blocks']: | ||
| 569 | if block['type'] != 0: | ||
| 570 | continue | ||
| 571 | for line in block['lines']: | ||
| 572 | for span in line['spans']: | ||
| 573 | bbox, text = span['bbox'], span['text'] | ||
| 574 | if '序号' == text: | ||
| 575 | table = True | ||
| 576 | if '以上表格中所列的序号并非还款期数' in text: | ||
| 577 | table = False | ||
| 578 | if table == True: | ||
| 579 | repayment_schedule_text_list.append(text) | ||
| 580 | |||
| 581 | for i in range(len(repayment_schedule_text_list)//5): | ||
| 582 | |||
| 583 | line = [] | ||
| 584 | # 5表示5列的意思 | ||
| 585 | for j in range(5): | ||
| 586 | line.append(repayment_schedule_text_list[i*5+j]) | ||
| 587 | |||
| 588 | if str(i+1) == line[1]: | ||
| 589 | break | ||
| 590 | |||
| 591 | repayment_schedule_table.append(line) | ||
| 592 | |||
| 593 | if len(repayment_schedule_table) > 0: | ||
| 594 | repayment_schedule['words'] = repayment_schedule_table | ||
| 595 | return repayment_schedule | ||
| 596 | |||
| 597 | def get_signature_role_1(self): | ||
| 598 | signature_role_1 = self.init_item.copy() | ||
| 599 | # 先定位签字区域 | ||
| 600 | texts = [] | ||
| 601 | boxes = [] | ||
| 602 | page_num = None | ||
| 603 | position = None | ||
| 604 | words = None | ||
| 605 | region = False | ||
| 606 | for i in list(self.pdf_info.keys()): | ||
| 607 | for block in self.pdf_info[i]['blocks']: | ||
| 608 | if block['type'] != 0: | ||
| 609 | continue | ||
| 610 | for line in block['lines']: | ||
| 611 | for span in line['spans']: | ||
| 612 | bbox, text = span['bbox'], span['text'] | ||
| 613 | if '借款人(抵押人)' in text: | ||
| 614 | region = True | ||
| 615 | if '日期' in text: | ||
| 616 | region = False | ||
| 617 | if region == True: | ||
| 618 | page_num = i | ||
| 619 | texts.append(text) | ||
| 620 | boxes.append(bbox) | ||
| 621 | if len(texts) > 4: | ||
| 622 | words = '有' | ||
| 623 | else: | ||
| 624 | words = '无' | ||
| 625 | boxes = np.array(boxes).reshape((-1, 2)) | ||
| 626 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | ||
| 627 | signature_role_1['page_num'] = page_num | ||
| 628 | signature_role_1['position'] = position | ||
| 629 | signature_role_1['words'] = words | ||
| 630 | return signature_role_1 | ||
| 631 | |||
| 632 | def get_signature_role_2(self): | ||
| 633 | signature_role_2 = self.init_item.copy() | ||
| 634 | # 先定位签字区域 | ||
| 635 | texts = [] | ||
| 636 | boxes = [] | ||
| 637 | page_num = None | ||
| 638 | position = None | ||
| 639 | words = None | ||
| 640 | region = False | ||
| 641 | for i in list(self.pdf_info.keys()): | ||
| 642 | for block in self.pdf_info[i]['blocks']: | ||
| 643 | if block['type'] != 0: | ||
| 644 | continue | ||
| 645 | for line in block['lines']: | ||
| 646 | for span in line['spans']: | ||
| 647 | bbox, text = span['bbox'], span['text'] | ||
| 648 | if '共同借款人(共同抵押人)' in text: | ||
| 649 | region = True | ||
| 650 | if '日期' in text: | ||
| 651 | region = False | ||
| 652 | if region == True: | ||
| 653 | page_num = i | ||
| 654 | texts.append(text) | ||
| 655 | boxes.append(bbox) | ||
| 656 | if len(texts) > 4: | ||
| 657 | words = '有' | ||
| 658 | else: | ||
| 659 | words = '无' | ||
| 660 | boxes = np.array(boxes).reshape((-1, 2)) | ||
| 661 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | ||
| 662 | signature_role_2['page_num'] = page_num | ||
| 663 | signature_role_2['position'] = position | ||
| 664 | signature_role_2['words'] = words | ||
| 665 | return signature_role_2 | ||
| 666 | |||
| 667 | def get_signature_role_3(self): | ||
| 668 | signature_role_3 = self.init_item.copy() | ||
| 669 | # 先定位签字区域 | ||
| 670 | texts = [] | ||
| 671 | boxes = [] | ||
| 672 | page_num = None | ||
| 673 | position = None | ||
| 674 | words = None | ||
| 675 | region = False | ||
| 676 | for i in list(self.pdf_info.keys()): | ||
| 677 | for block in self.pdf_info[i]['blocks']: | ||
| 678 | if block['type'] != 0: | ||
| 679 | continue | ||
| 680 | for line in block['lines']: | ||
| 681 | for span in line['spans']: | ||
| 682 | bbox, text = span['bbox'], span['text'] | ||
| 683 | if '保证人1' in text and int(i) != 0: | ||
| 684 | region = True | ||
| 685 | if '日期' in text: | ||
| 686 | region = False | ||
| 687 | if region == True: | ||
| 688 | page_num = i | ||
| 689 | texts.append(text) | ||
| 690 | boxes.append(bbox) | ||
| 691 | if len(texts) > 4: | ||
| 692 | words = '有' | ||
| 693 | else: | ||
| 694 | words = '无' | ||
| 695 | boxes = np.array(boxes).reshape((-1, 2)) | ||
| 696 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | ||
| 697 | signature_role_3['page_num'] = page_num | ||
| 698 | signature_role_3['position'] = position | ||
| 699 | signature_role_3['words'] = words | ||
| 700 | return signature_role_3 | ||
| 701 | |||
| 702 | def get_signature_role_4(self): | ||
| 703 | signature_role_4 = self.init_item.copy() | ||
| 704 | # 先定位签字区域 | ||
| 705 | texts = [] | ||
| 706 | boxes = [] | ||
| 707 | page_num = None | ||
| 708 | position = None | ||
| 709 | words = None | ||
| 710 | region = False | ||
| 711 | for i in list(self.pdf_info.keys()): | ||
| 712 | for block in self.pdf_info[i]['blocks']: | ||
| 713 | if block['type'] != 0: | ||
| 714 | continue | ||
| 715 | for line in block['lines']: | ||
| 716 | for span in line['spans']: | ||
| 717 | bbox, text = span['bbox'], span['text'] | ||
| 718 | if '保证人2' in text and int(i) != 0: | ||
| 719 | region = True | ||
| 720 | if '日期' in text: | ||
| 721 | region = False | ||
| 722 | if region == True: | ||
| 723 | page_num = i | ||
| 724 | texts.append(text) | ||
| 725 | boxes.append(bbox) | ||
| 726 | if len(texts) > 4: | ||
| 727 | words = '有' | ||
| 728 | else: | ||
| 729 | words = '无' | ||
| 730 | boxes = np.array(boxes).reshape((-1, 2)) | ||
| 731 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | ||
| 732 | signature_role_4['page_num'] = page_num | ||
| 733 | signature_role_4['position'] = position | ||
| 734 | signature_role_4['words'] = words | ||
| 735 | return signature_role_4 | ||
| 736 | |||
| 737 | def get_signature_role_5(self): | ||
| 738 | signature_role_5 = self.init_item.copy() | ||
| 739 | # 先定位签字区域 | ||
| 740 | texts = [] | ||
| 741 | boxes = [] | ||
| 742 | page_num = None | ||
| 743 | position = None | ||
| 744 | words = None | ||
| 745 | region = False | ||
| 746 | for i in list(self.pdf_info.keys()): | ||
| 747 | for block in self.pdf_info[i]['blocks']: | ||
| 748 | if block['type'] != 0: | ||
| 749 | continue | ||
| 750 | for line in block['lines']: | ||
| 751 | for span in line['spans']: | ||
| 752 | bbox, text = span['bbox'], span['text'] | ||
| 753 | if '见证人签字' in text and int(i) != 0: | ||
| 754 | region = True | ||
| 755 | if '年' in text: | ||
| 756 | region = False | ||
| 757 | if region == True: | ||
| 758 | page_num = i | ||
| 759 | texts.append(text) | ||
| 760 | boxes.append(bbox) | ||
| 761 | print(texts) | ||
| 762 | if len(texts) > 4: | ||
| 763 | words = '有' | ||
| 764 | else: | ||
| 765 | words = '无' | ||
| 766 | boxes = np.array(boxes).reshape((-1, 2)) | ||
| 767 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | ||
| 768 | signature_role_5['page_num'] = page_num | ||
| 769 | signature_role_5['position'] = position | ||
| 770 | signature_role_5['words'] = words | ||
| 771 | return signature_role_5 | ||
| 772 | |||
| 773 | def get_last_page_signature(self, page_num, top, bottom): | ||
| 774 | signature_name = self.item.copy() | ||
| 775 | signature_date = self.item.copy() | ||
| 776 | anchor_top = None | ||
| 777 | anchor_bottom = None | ||
| 778 | for block in self.pdf_info[page_num]['blocks']: | ||
| 779 | if block['type'] != 0: | ||
| 780 | continue | ||
| 781 | for line in block['lines']: | ||
| 782 | for span in line['spans']: | ||
| 783 | bbox, text = span['bbox'], span['text'] | ||
| 784 | if top in text: | ||
| 785 | anchor_top = bbox[1] | ||
| 786 | if bottom in text: | ||
| 787 | anchor_bottom = bbox[1] | ||
| 788 | # print(top, anchor_top, anchor_bottom) | ||
| 789 | if anchor_top is not None and anchor_bottom is not None: | ||
| 790 | for block in self.pdf_info[page_num]['blocks']: | ||
| 791 | if block['type'] != 0: | ||
| 792 | continue | ||
| 793 | for line in block['lines']: | ||
| 794 | for span in line['spans']: | ||
| 795 | bbox, text = span['bbox'], span['text'] | ||
| 796 | if '签署日期' in text and int(anchor_top)<np.mean(bbox[1::2])<int(anchor_bottom): | ||
| 797 | name = text.split(' ')[0] | ||
| 798 | date = text.split(':')[-1] | ||
| 799 | signature_name['words'] = name | ||
| 800 | signature_name['position'] = bbox | ||
| 801 | signature_date['words'] = date | ||
| 802 | signature_date['position'] = bbox | ||
| 803 | return signature_name, signature_date | ||
| 804 | |||
| 805 | def get_info(self): | ||
| 806 | """ | ||
| 807 | block['type'] == 0 : 表示该元素为图片 | ||
| 808 | |||
| 809 | Returns: | ||
| 810 | dict: Description | ||
| 811 | """ | ||
| 812 | |||
| 813 | # 先判断是否为 ASP 产品 | ||
| 814 | # 只看第一页,判断是否有 '附加产品融资贷款本金总金额' 这一句话,若有则为 ASP 产品 | ||
| 815 | # print(self.pdf_info['0']['blocks']) | ||
| 816 | # for block in self.pdf_info['0']['blocks']: | ||
| 817 | # if block['type'] != 0: | ||
| 818 | # continue | ||
| 819 | # for line in block['lines']: | ||
| 820 | # for span in line['spans']: | ||
| 821 | # bbox, text = span['bbox'], span['text'] | ||
| 822 | # if '附加产品融资贷款本金总金额' == text: | ||
| 823 | # self.is_asp = True | ||
| 824 | for key in self.ocr_results['0']: | ||
| 825 | bbox, text = self.ocr_results['0'][key] | ||
| 826 | if '附加产品融资贷款本金总金额' in text: | ||
| 827 | self.is_asp = True | ||
| 828 | |||
| 829 | self.gen_init_result(self.is_asp) | ||
| 830 | |||
| 831 | if len(list(self.ocr_results.keys())) <= 8: # 8.5 版本客户提供的样本出现串页的情况,暂时无法识别 | ||
| 832 | # Page 1 | ||
| 833 | # 找合同编号 | ||
| 834 | contract_no = self.get_contract_no(page_num='0') | ||
| 835 | # print(contract_no) | ||
| 836 | self.init_result['page_1']['合同编号'] = contract_no | ||
| 837 | # 所购车辆价格 | ||
| 838 | vehicle_price = self.get_vehicle_price() | ||
| 839 | # print(vehicle_price) | ||
| 840 | self.init_result['page_1']['所购车辆价格'] = vehicle_price | ||
| 841 | # 车架号 | ||
| 842 | vin = self.get_vin() | ||
| 843 | # print(vin) | ||
| 844 | self.init_result['page_1']['车架号'] = vin | ||
| 845 | # 贷款本金金额(如果是 ASP产品)则'贷款本金金额'项目中包含'车辆贷款本金金额'和'附加产品融资贷款本金总金额'两个项目 | ||
| 846 | upper, lower, asp_1, asp_2 = self.get_loan_principal() | ||
| 847 | # print(upper, lower, asp_1, asp_2) | ||
| 848 | self.init_result['page_1']['贷款本金金额']['大写'] = upper | ||
| 849 | self.init_result['page_1']['贷款本金金额']['小写'] = lower | ||
| 850 | self.init_result['page_1']['贷款本金金额']['车辆贷款本金金额'] = asp_1 | ||
| 851 | self.init_result['page_1']['贷款本金金额']['附加产品融资贷款本金总金额'] = asp_2 | ||
| 852 | # 贷款期限 | ||
| 853 | loan_term = self.get_loan_term() | ||
| 854 | # print(loan_term) | ||
| 855 | self.init_result['page_1']['贷款期限'] = loan_term | ||
| 856 | # 附加产品融资贷款本金总金额明细(ASP-表格) | ||
| 857 | asp_details_table = self.get_asp_details(page_num='0') | ||
| 858 | # print(asp_details_table) | ||
| 859 | self.init_result['page_1']['附加产品融资贷款本金总金额明细'] = asp_details_table | ||
| 860 | # 借款人签字及时间 | ||
| 861 | signature = self.get_signature() | ||
| 862 | # print(signature) | ||
| 863 | self.init_result['page_1']['借款人签字及时间'] = signature | ||
| 864 | ####################################### | ||
| 865 | # Page 2 | ||
| 866 | # 找合同编号 | ||
| 867 | contract_no = self.get_contract_no(page_num='0') | ||
| 868 | # print(contract_no) | ||
| 869 | self.init_result['page_2']['合同编号'] = contract_no | ||
| 870 | # 找借款人及抵押人(地址字段原本有空格) | ||
| 871 | borrower_name, borrower_id = self.get_somebody(top='借款人及抵押人:', bottom='共同借款人:') | ||
| 872 | # 这是为了同时兼容 8.1 版本 | ||
| 873 | if borrower_name['words'] == None: | ||
| 874 | borrower_name, borrower_id = self.get_somebody(top='借款人及抵押人:', bottom='共同借款人及共同抵押人:') | ||
| 875 | # 这是为了兼容车贷分离版本 | ||
| 876 | if borrower_name['words'] == None: | ||
| 877 | borrower_name, borrower_id = self.get_somebody(top='借款人:', bottom='共同借款人及抵押人:') | ||
| 878 | # print(borrower_name, borrower_id) | ||
| 879 | self.init_result['page_2']['借款人及抵押人']['name'] = borrower_name | ||
| 880 | self.init_result['page_2']['借款人及抵押人']['id'] = borrower_id | ||
| 881 | # 找共同借款人及共同抵押人 | ||
| 882 | co_borrower_name, co_borrower_id = self.get_somebody(top='共同借款人:', bottom='保证人1:') | ||
| 883 | # print(co_borrower_name, co_borrower_id) | ||
| 884 | self.init_result['page_2']['共同借款人及共同抵押人']['name'] = co_borrower_name | ||
| 885 | self.init_result['page_2']['共同借款人及共同抵押人']['id'] = co_borrower_id | ||
| 886 | # 保证人1 | ||
| 887 | first_guarantor_name, first_guarantor_id = self.get_somebody(top='保证人1:', bottom='保证人2:') | ||
| 888 | self.init_result['page_2']['保证人1']['name'] = first_guarantor_name | ||
| 889 | self.init_result['page_2']['保证人1']['id'] = first_guarantor_id | ||
| 890 | # 保证人2 | ||
| 891 | second_guarantor_name, second_guarantor_id = self.get_somebody(top='保证人2:', bottom='第一章') | ||
| 892 | self.init_result['page_2']['保证人2']['name'] = second_guarantor_name | ||
| 893 | self.init_result['page_2']['保证人2']['id'] = second_guarantor_id | ||
| 894 | # 所购车辆价格 | ||
| 895 | vehicle_price = self.get_vehicle_price(page_num='1') | ||
| 896 | # print(vehicle_price) | ||
| 897 | self.init_result['page_2']['所购车辆价格'] = vehicle_price | ||
| 898 | # 车架号 | ||
| 899 | vin = self.get_vin(page_num='1') | ||
| 900 | # print(vin) | ||
| 901 | self.init_result['page_2']['车架号'] = vin | ||
| 902 | # 经销商 | ||
| 903 | seller = self.get_seller() | ||
| 904 | # print(seller) | ||
| 905 | self.init_result['page_2']['经销商'] = seller | ||
| 906 | # 贷款本金金额(如果是 ASP产品)则'贷款本金金额'项目中包含'车辆贷款本金金额'和'附加产品融资贷款本金总金额'两个项目 | ||
| 907 | upper, lower, asp_1, asp_2 = self.get_loan_principal(page_num='1') | ||
| 908 | # print(upper, lower, asp_1, asp_2) | ||
| 909 | self.init_result['page_2']['贷款本金金额']['大写'] = upper | ||
| 910 | self.init_result['page_2']['贷款本金金额']['小写'] = lower | ||
| 911 | self.init_result['page_2']['贷款本金金额']['车辆贷款本金金额'] = asp_1 | ||
| 912 | self.init_result['page_2']['贷款本金金额']['附加产品融资贷款本金总金额'] = asp_2 | ||
| 913 | # 贷款期限 | ||
| 914 | loan_term = self.get_loan_term(page_num='1') | ||
| 915 | # print(loan_term) | ||
| 916 | self.init_result['page_2']['贷款期限'] = loan_term | ||
| 917 | # 本合同当期的标准利率 | ||
| 918 | standard_rate = self.get_standard_rate(page_num='1') | ||
| 919 | # print(standard_rate) | ||
| 920 | self.init_result['page_2']['标准利率'] = standard_rate | ||
| 921 | # 202212 release 新增借款人收款账户 | ||
| 922 | account, account_name, account_bank = self.get_borrower_collection_account() | ||
| 923 | # print(account, account_name, account_bank) | ||
| 924 | self.init_result['page_2']['借款人收款账户']['账号'] = account | ||
| 925 | self.init_result['page_2']['借款人收款账户']['户名'] = account_name | ||
| 926 | self.init_result['page_2']['借款人收款账户']['开户行'] = account_bank | ||
| 927 | # 还款账户 | ||
| 928 | account, account_name, account_bank = self.get_payback_account() | ||
| 929 | # print(account, account_name, account_bank) | ||
| 930 | self.init_result['page_2']['还款账户']['账号'] = account | ||
| 931 | self.init_result['page_2']['还款账户']['户名'] = account_name | ||
| 932 | self.init_result['page_2']['还款账户']['开户行'] = account_bank | ||
| 933 | ####################################### | ||
| 934 | # Page 3 | ||
| 935 | # 找合同编号 | ||
| 936 | contract_no = self.get_contract_no(page_num='2') | ||
| 937 | self.init_result['page_3']['合同编号'] = contract_no | ||
| 938 | # 还款计划表(表格) | ||
| 939 | repayment_schedule_table = self.get_repayment_schedule() | ||
| 940 | # print(repayment_schedule_table) | ||
| 941 | self.init_result['page_3']['还款计划表'] = repayment_schedule_table | ||
| 942 | # 车辆代理商 | ||
| 943 | cldls = self.get_cldls() | ||
| 944 | self.init_result['page_3']['车辆代理商'] = cldls | ||
| 945 | ####################################### | ||
| 946 | # Page 4 | ||
| 947 | # 找合同编号 | ||
| 948 | contract_no = self.get_contract_no(page_num='3') | ||
| 949 | # print(contract_no) | ||
| 950 | self.init_result['page_4']['合同编号'] = contract_no | ||
| 951 | # 附加产品融资贷款本金总金额明细(ASP-表格) | ||
| 952 | asp_details_table = self.get_asp_details(page_num='3') | ||
| 953 | # print(asp_details_table) | ||
| 954 | self.init_result['page_4']['附加产品融资贷款本金总金额明细'] = asp_details_table | ||
| 955 | ####################################### | ||
| 956 | # Page 5 | ||
| 957 | # 找合同编号 | ||
| 958 | contract_no = self.get_contract_no(page_num='4') | ||
| 959 | # print(contract_no) | ||
| 960 | self.init_result['page_5']['合同编号'] = contract_no | ||
| 961 | ####################################### | ||
| 962 | # Page 6 | ||
| 963 | # 找合同编号 | ||
| 964 | contract_no = self.get_contract_no(page_num='5') | ||
| 965 | # print(contract_no) | ||
| 966 | self.init_result['page_6']['合同编号'] = contract_no | ||
| 967 | |||
| 968 | if self.is_asp: | ||
| 969 | # Page 7 | ||
| 970 | # 找合同编号 | ||
| 971 | contract_no = self.get_contract_no(page_num='6') | ||
| 972 | self.init_result['page_7']['合同编号'] = contract_no | ||
| 973 | # Page 8 | ||
| 974 | # 找合同编号 | ||
| 975 | contract_no = self.get_contract_no(page_num='7') | ||
| 976 | self.init_result['page_8']['合同编号'] = contract_no | ||
| 977 | signature_name, signature_date = self.get_last_page_signature(page_num='7', | ||
| 978 | top='合同编号', bottom='共同借款人') | ||
| 979 | if signature_name['words'] == None: | ||
| 980 | signature_name, signature_date = self.get_last_page_signature(page_num='7', | ||
| 981 | top='合同编号', bottom='共同借款人(抵押人)') | ||
| 982 | # print(signature_name, signature_date) | ||
| 983 | self.init_result['page_8']['主借人签字']['签字'] = signature_name | ||
| 984 | self.init_result['page_8']['主借人签字']['日期'] = signature_date | ||
| 985 | signature_name, signature_date = self.get_last_page_signature(page_num='7', | ||
| 986 | top='共同借款人', bottom='保证人1') | ||
| 987 | if signature_name['words'] == None: | ||
| 988 | signature_name, signature_date = self.get_last_page_signature(page_num='7', | ||
| 989 | top='共同借款人(抵押人)', bottom='保证人1') | ||
| 990 | # print(signature_name, signature_date) | ||
| 991 | self.init_result['page_8']['共借人签字']['签字'] = signature_name | ||
| 992 | self.init_result['page_8']['共借人签字']['日期'] = signature_date | ||
| 993 | signature_name, signature_date = self.get_last_page_signature(page_num='7', | ||
| 994 | top='保证人1', bottom='保证人2') | ||
| 995 | self.init_result['page_8']['保证人1签字']['签字'] = signature_name | ||
| 996 | self.init_result['page_8']['保证人1签字']['日期'] = signature_date | ||
| 997 | signature_name, signature_date = self.get_last_page_signature(page_num='7', | ||
| 998 | top='保证人2', bottom='在本人面前亲笔签署本合同') | ||
| 999 | self.init_result['page_8']['保证人2签字']['签字'] = signature_name | ||
| 1000 | self.init_result['page_8']['保证人2签字']['日期'] = signature_date | ||
| 1001 | signature_name, signature_date = self.get_last_page_signature(page_num='7', | ||
| 1002 | top='在本人面前亲笔签署本合同', bottom='以下无正文') | ||
| 1003 | # print(signature_name, signature_date) | ||
| 1004 | self.init_result['page_8']['见证人签字']['签字'] = signature_name | ||
| 1005 | self.init_result['page_8']['见证人签字']['日期'] = signature_date | ||
| 1006 | else: | ||
| 1007 | # Page 7 | ||
| 1008 | # 找合同编号 | ||
| 1009 | contract_no = self.get_contract_no(page_num='6') | ||
| 1010 | self.init_result['page_7']['合同编号'] = contract_no | ||
| 1011 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
| 1012 | top='合同编号', bottom='共同借款人') | ||
| 1013 | if signature_name['words'] == None: | ||
| 1014 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
| 1015 | top='合同编号', bottom='共同借款人(抵押人)') | ||
| 1016 | # print(signature_name, signature_date) | ||
| 1017 | self.init_result['page_7']['主借人签字']['签字'] = signature_name | ||
| 1018 | self.init_result['page_7']['主借人签字']['日期'] = signature_date | ||
| 1019 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
| 1020 | top='共同借款人', bottom='保证人1') | ||
| 1021 | if signature_name['words'] == None: | ||
| 1022 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
| 1023 | top='共同借款人(抵押人)', bottom='保证人1') | ||
| 1024 | # print(signature_name, signature_date) | ||
| 1025 | self.init_result['page_7']['共借人签字']['签字'] = signature_name | ||
| 1026 | self.init_result['page_7']['共借人签字']['日期'] = signature_date | ||
| 1027 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
| 1028 | top='保证人1', bottom='保证人2') | ||
| 1029 | self.init_result['page_7']['保证人1签字']['签字'] = signature_name | ||
| 1030 | self.init_result['page_7']['保证人1签字']['日期'] = signature_date | ||
| 1031 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
| 1032 | top='保证人2', bottom='在本人面前亲笔签署本合同') | ||
| 1033 | self.init_result['page_7']['保证人2签字']['签字'] = signature_name | ||
| 1034 | self.init_result['page_7']['保证人2签字']['日期'] = signature_date | ||
| 1035 | signature_name, signature_date = self.get_last_page_signature(page_num='6', | ||
| 1036 | top='在本人面前亲笔签署本合同', bottom='以下无正文') | ||
| 1037 | # print(signature_name, signature_date) | ||
| 1038 | self.init_result['page_7']['见证人签字']['签字'] = signature_name | ||
| 1039 | self.init_result['page_7']['见证人签字']['日期'] = signature_date | ||
| 1040 | |||
| 1041 | |||
| 1042 | # 重新定制输出 | ||
| 1043 | new_results = {"is_asp": self.is_asp, | ||
| 1044 | "page_info": self.init_result | ||
| 1045 | } | ||
| 1046 | return new_results | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
| 1 | import re | ||
| 2 | import numpy as np | ||
| 3 | from fuzzywuzzy import fuzz | ||
| 4 | from shapely.geometry import Polygon | ||
| 5 | |||
| 6 | def caculate_iou(g, p): | ||
| 7 | g = Polygon(np.array(g).reshape((-1, 2))) | ||
| 8 | p = Polygon(np.array(p).reshape((-1, 2))) | ||
| 9 | inter = Polygon(g).intersection(Polygon(p)).area | ||
| 10 | union = g.area + p.area - inter | ||
| 11 | return inter/union | ||
| 12 | |||
| 13 | def get_table_info(bbox_1, bbox_2, ocr_result): | ||
| 14 | anchor = [bbox_2[0], bbox_1[1], bbox_2[2], bbox_1[3], | ||
| 15 | bbox_2[4], bbox_1[5], bbox_2[6], bbox_1[7]] | ||
| 16 | table_info = '' | ||
| 17 | for span in ocr_result: | ||
| 18 | iou = caculate_iou(anchor, span[0]) | ||
| 19 | if iou > 0: | ||
| 20 | table_info = span[1] | ||
| 21 | return table_info | ||
| 22 | |||
| 23 | class Finder: | ||
| 24 | |||
| 25 | def __init__(self, pdf_info): | ||
| 26 | self.pdf_info = pdf_info | ||
| 27 | self.item = {"words": None, | ||
| 28 | "page": None, | ||
| 29 | "position": None, | ||
| 30 | } | ||
| 31 | # 格式化算法输出 | ||
| 32 | self.init_result = {"合同编号": self.item, | ||
| 33 | "承租人-姓名": self.item, | ||
| 34 | "承租人-证件号码": self.item, | ||
| 35 | "承租人-法定代表人或授权代表": self.item, | ||
| 36 | |||
| 37 | "共同承租人-姓名": self.item, | ||
| 38 | "共同承租人-证件号码": self.item, | ||
| 39 | "共同承租人-法定代表人或授权代表": self.item, | ||
| 40 | |||
| 41 | "保证人1-姓名": self.item, | ||
| 42 | "保证人1-证件号码": self.item, | ||
| 43 | "保证人1-法定代表人或授权代表": self.item, | ||
| 44 | |||
| 45 | "保证人2-姓名": self.item, | ||
| 46 | "保证人2-证件号码": self.item, | ||
| 47 | "保证人2-法定代表人或授权代表": self.item, | ||
| 48 | "保证人3-姓名": self.item, | ||
| 49 | "保证人3-证件号码": self.item, | ||
| 50 | "保证人3-法定代表人或授权代表": self.item, | ||
| 51 | "合同编号(正文)": self.item, | ||
| 52 | "车辆识别代码": self.item, | ||
| 53 | "车辆卖方(经销商)": self.item, | ||
| 54 | "车辆代理商": self.item, | ||
| 55 | "车辆原始销售价格(《机动车销售统一发票》所列金额)": self.item, | ||
| 56 | "车辆附加产品明细表": self.item, | ||
| 57 | "融资成本总额": self.item, | ||
| 58 | "租期": self.item, | ||
| 59 | "付款计划表": self.item, | ||
| 60 | "收款银行账户-户名": self.item, | ||
| 61 | "收款银行账户-银行账号": self.item, | ||
| 62 | "收款银行账户-开户行": self.item, | ||
| 63 | "银行账户-户名": self.item, | ||
| 64 | "银行账户-银行账号": self.item, | ||
| 65 | "银行账户-开户行": self.item, | ||
| 66 | "签字页-承租人姓名": self.item, | ||
| 67 | "签字页-承租人签章": self.item, | ||
| 68 | |||
| 69 | "签字页-共同承租人姓名": self.item, | ||
| 70 | "签字页-共同承租人签章": self.item, | ||
| 71 | |||
| 72 | "签字页-保证人1姓名": self.item, | ||
| 73 | "签字页-保证人1签章": self.item, | ||
| 74 | |||
| 75 | "签字页-保证人2姓名": self.item, | ||
| 76 | "签字页-保证人2签章": self.item, | ||
| 77 | "签字页-保证人3姓名": self.item, | ||
| 78 | "签字页-保证人3签章": self.item, | ||
| 79 | } | ||
| 80 | |||
| 81 | # 格式化输出 车辆处置协议 要是别的字段 | ||
| 82 | self.init_result_1 = {"合同编号": self.item, | ||
| 83 | "承租人-姓名": self.item, | ||
| 84 | "承租人-证件号码": self.item, | ||
| 85 | "销售经销商": self.item, | ||
| 86 | "合同编号(正文)": self.item, | ||
| 87 | "签字页-承租人姓名": self.item, | ||
| 88 | "签字页-承租人证件号码": self.item, | ||
| 89 | "签字页-承租人签章": self.item, | ||
| 90 | "签字页-销售经销商": self.item, | ||
| 91 | "签字页-销售经销商签章": self.item, | ||
| 92 | } | ||
| 93 | |||
| 94 | # 格式化输出 车辆租赁抵押合同 | ||
| 95 | self.init_result_2 = {"合同编号": self.item, | ||
| 96 | "合同编号(正文)": self.item, | ||
| 97 | "抵押人姓名/名称": self.item, | ||
| 98 | "抵押人证件号码": self.item, | ||
| 99 | "抵押人配偶姓名/名称": self.item, | ||
| 100 | "抵押人配偶证件号码": self.item, | ||
| 101 | "车辆识别代码": self.item, | ||
| 102 | "租金总额": self.item, | ||
| 103 | "融资租赁期限": self.item, | ||
| 104 | "签字页-抵押人姓名": self.item, | ||
| 105 | "签字页-抵押人签章": self.item, | ||
| 106 | "签字页-抵押人配偶姓名": self.item, | ||
| 107 | "签字页-抵押人配偶签章": self.item, | ||
| 108 | } | ||
| 109 | |||
| 110 | def get_contract_no(self, page_num): | ||
| 111 | """传入页码,查看该页码右上角的编号 | ||
| 112 | |||
| 113 | Args: | ||
| 114 | page_num (string): | ||
| 115 | |||
| 116 | Returns: | ||
| 117 | sting: | ||
| 118 | """ | ||
| 119 | contract_no = self.item.copy() | ||
| 120 | # 只看第一页 | ||
| 121 | for block in self.pdf_info[page_num]['blocks']: | ||
| 122 | if block['type'] != 0: | ||
| 123 | continue | ||
| 124 | for line in block['lines']: | ||
| 125 | for span in line['spans']: | ||
| 126 | bbox, text = span['bbox'], span['text'] | ||
| 127 | if '合同编号:' in text: | ||
| 128 | words = text.split(':')[-1] | ||
| 129 | contract_no['position'] = bbox | ||
| 130 | contract_no['page'] = page_num | ||
| 131 | contract_no['words'] = words | ||
| 132 | if contract_no['words'] == '': | ||
| 133 | for block in self.pdf_info[page_num]['blocks']: | ||
| 134 | if block['type'] != 0: | ||
| 135 | continue | ||
| 136 | for line in block['lines']: | ||
| 137 | for span in line['spans']: | ||
| 138 | bbox, text = span['bbox'], span['text'] | ||
| 139 | if bbox[1] < contract_no['position'][3] and 'CH' in text: | ||
| 140 | contract_no['position'] = bbox | ||
| 141 | contract_no['page'] = page_num | ||
| 142 | contract_no['words'] = text | ||
| 143 | return contract_no | ||
| 144 | |||
| 145 | def get_vehicle_price(self, page_num='0'): | ||
| 146 | vehicle_price = self.item.copy() | ||
| 147 | for block in self.pdf_info[page_num]['blocks']: | ||
| 148 | if block['type'] != 0: | ||
| 149 | continue | ||
| 150 | for line in block['lines']: | ||
| 151 | for span in line['spans']: | ||
| 152 | bbox, text = span['bbox'], span['text'] | ||
| 153 | if '所购车辆价格为人民币' in text: | ||
| 154 | words = text.split('币')[-1] | ||
| 155 | vehicle_price['position'] = bbox | ||
| 156 | vehicle_price['words'] = words | ||
| 157 | return vehicle_price | ||
| 158 | |||
| 159 | def get_contract_no_one(self): | ||
| 160 | # 查找正文中的合同编号,有可能存在换行的情况 | ||
| 161 | contract_no = self.item.copy() | ||
| 162 | for pno in self.pdf_info: | ||
| 163 | all_text = '' | ||
| 164 | for block in self.pdf_info[pno]['blocks']: | ||
| 165 | if block['type'] != 0: | ||
| 166 | continue | ||
| 167 | for line in block['lines']: | ||
| 168 | for span in line['spans']: | ||
| 169 | bbox, text = span['bbox'], span['text'] | ||
| 170 | all_text += text | ||
| 171 | all_text = all_text.replace(' ', '') | ||
| 172 | matchObj = re.search(r'(合同编号:\[(.*?)\])', all_text) | ||
| 173 | if matchObj: | ||
| 174 | words = matchObj.group(1) | ||
| 175 | contract_no['position'] = None | ||
| 176 | contract_no['page'] = pno | ||
| 177 | # contract_no['words'] = words | ||
| 178 | contract_no['words'] = re.sub("\s", "", words).replace(")", "") | ||
| 179 | return contract_no | ||
| 180 | |||
| 181 | matchObj = re.search(r'编号为(.*?)的', all_text) | ||
| 182 | if matchObj: | ||
| 183 | words = matchObj.group(1).strip() | ||
| 184 | contract_no['position'] = None | ||
| 185 | contract_no['page'] = pno | ||
| 186 | # contract_no['words'] = words | ||
| 187 | contract_no['words'] = re.sub("\s", "", words).replace(")", "") | ||
| 188 | return contract_no | ||
| 189 | |||
| 190 | matchObj = re.search(r'编号为(.*?))的', all_text) | ||
| 191 | if matchObj: | ||
| 192 | words = matchObj.group(1).strip() | ||
| 193 | contract_no['position'] = None | ||
| 194 | contract_no['page'] = pno | ||
| 195 | # contract_no['words'] = words | ||
| 196 | contract_no['words'] = re.sub("\s", "", words) | ||
| 197 | return contract_no | ||
| 198 | |||
| 199 | def get_key_value(self, key, page_num=None): | ||
| 200 | value = self.item.copy() | ||
| 201 | if page_num is not None: | ||
| 202 | pno = page_num | ||
| 203 | for block in self.pdf_info[pno]['blocks']: | ||
| 204 | if block['type'] != 0: | ||
| 205 | continue | ||
| 206 | for line in block['lines']: | ||
| 207 | for span in line['spans']: | ||
| 208 | bbox, text = span['bbox'], span['text'] | ||
| 209 | if key in text: | ||
| 210 | words = text.split(':')[-1].replace("。", "") | ||
| 211 | value['position'] = bbox | ||
| 212 | value['page'] = pno | ||
| 213 | # value['words'] = words | ||
| 214 | value['words'] = re.sub("\s", "", words) | ||
| 215 | else: | ||
| 216 | for pno in self.pdf_info: | ||
| 217 | for block in self.pdf_info[pno]['blocks']: | ||
| 218 | if block['type'] != 0: | ||
| 219 | continue | ||
| 220 | for line in block['lines']: | ||
| 221 | for span in line['spans']: | ||
| 222 | bbox, text = span['bbox'], span['text'] | ||
| 223 | if key in text: | ||
| 224 | # print(self.pdf_info[pno]) | ||
| 225 | words = text.split(':')[-1].replace("。", "") | ||
| 226 | value['position'] = bbox | ||
| 227 | value['page'] = pno | ||
| 228 | # value['words'] = words | ||
| 229 | value['words'] = re.sub("\s", "", words) | ||
| 230 | return value | ||
| 231 | |||
| 232 | def get_loan_principal(self, page_num='0'): | ||
| 233 | chinese_keywords = ['壹', '贰', '叁', '肆', '伍', '陆', '柒', '捌', '玖', '拾', | ||
| 234 | '佰', '仟', '万', '亿', '元', '角', '分', '零', '整'] | ||
| 235 | upper = self.item.copy() | ||
| 236 | lower = self.item.copy() | ||
| 237 | asp_1 = self.item.copy() | ||
| 238 | asp_2 = self.item.copy() | ||
| 239 | anchor_bbox = None | ||
| 240 | for block in self.pdf_info[page_num]['blocks']: | ||
| 241 | if block['type'] != 0: | ||
| 242 | continue | ||
| 243 | for line in block['lines']: | ||
| 244 | for span in line['spans']: | ||
| 245 | bbox, text = span['bbox'], span['text'] | ||
| 246 | if fuzz.ratio(''.join(chinese_keywords), text) > 15: | ||
| 247 | text = text.split(':')[-1].strip() | ||
| 248 | upper['position'] = bbox | ||
| 249 | upper['words'] = text | ||
| 250 | if '小写:¥' in text: | ||
| 251 | words = text.split('¥')[-1].strip() | ||
| 252 | lower['position'] = bbox | ||
| 253 | lower['words'] = words | ||
| 254 | if '附加产品融资贷款本金总金额' == text: | ||
| 255 | anchor_bbox = bbox | ||
| 256 | if anchor_bbox: | ||
| 257 | for block in self.pdf_info[page_num]['blocks']: | ||
| 258 | if block['type'] != 0: | ||
| 259 | continue | ||
| 260 | for line in block['lines']: | ||
| 261 | for span in line['spans']: | ||
| 262 | bbox, text = span['bbox'], span['text'] | ||
| 263 | if np.mean(bbox[1::2]) < np.mean(anchor_bbox[1::2]) and '人民币:小写:' in text: | ||
| 264 | words = re.findall(r'人民币:小写:\[(.*)\]', text)[0] | ||
| 265 | asp_1['position'] = bbox | ||
| 266 | asp_1['words'] = words | ||
| 267 | if np.mean(bbox[1::2]) > np.mean(anchor_bbox[1::2]) and '人民币:小写:' in text: | ||
| 268 | words = re.findall(r'人民币:小写:\[(.*)\]', text)[0] | ||
| 269 | asp_2['position'] = bbox | ||
| 270 | asp_2['words'] = words | ||
| 271 | return upper, lower, asp_1, asp_2 | ||
| 272 | |||
| 273 | def get_loan_term(self, page_num='0'): | ||
| 274 | loan_term = self.item.copy() | ||
| 275 | all_text = '' | ||
| 276 | for block in self.pdf_info[page_num]['blocks']: | ||
| 277 | if block['type'] != 0: | ||
| 278 | continue | ||
| 279 | for line in block['lines']: | ||
| 280 | for span in line['spans']: | ||
| 281 | bbox, text = span['bbox'], span['text'] | ||
| 282 | all_text += text | ||
| 283 | matchs = re.search(r'贷款期限(\d+)个月', all_text) | ||
| 284 | if matchs: | ||
| 285 | words = matchs.group(1) | ||
| 286 | for block in self.pdf_info[page_num]['blocks']: | ||
| 287 | if block['type'] != 0: | ||
| 288 | continue | ||
| 289 | for line in block['lines']: | ||
| 290 | for span in line['spans']: | ||
| 291 | bbox, text = span['bbox'], span['text'] | ||
| 292 | if f'{words}个月' in text: | ||
| 293 | loan_term['position'] = bbox | ||
| 294 | loan_term['words'] = words | ||
| 295 | return loan_term | ||
| 296 | |||
| 297 | def get_asp_details(self, page_num): | ||
| 298 | asp_details_table_term = self.item.copy() | ||
| 299 | |||
| 300 | asp_details_table = [] | ||
| 301 | asp_details_text_list = [] | ||
| 302 | table = False | ||
| 303 | for block in self.pdf_info[page_num]['blocks']: | ||
| 304 | if block['type'] != 0: | ||
| 305 | continue | ||
| 306 | for line in block['lines']: | ||
| 307 | for span in line['spans']: | ||
| 308 | bbox, text = span['bbox'], span['text'] | ||
| 309 | if '附加产品融资贷款本金总金额明细' == text: | ||
| 310 | table = True | ||
| 311 | if '第二条' in text or '征信管理' in text: | ||
| 312 | table = False | ||
| 313 | if table == True: | ||
| 314 | asp_details_text_list.append(text) | ||
| 315 | |||
| 316 | for i in range((len(asp_details_text_list)+2)//3): | ||
| 317 | |||
| 318 | line = [] | ||
| 319 | if i == 0: | ||
| 320 | line = [asp_details_text_list[0]] | ||
| 321 | else: | ||
| 322 | for j in range(3): | ||
| 323 | line.append(asp_details_text_list[i*3-2+j]) | ||
| 324 | |||
| 325 | asp_details_table.append(line) | ||
| 326 | |||
| 327 | if len(asp_details_table) > 0: | ||
| 328 | asp_details_table_term['words'] = asp_details_table | ||
| 329 | return asp_details_table_term | ||
| 330 | |||
| 331 | def get_signature(self): | ||
| 332 | signature = self.item.copy() | ||
| 333 | |||
| 334 | for block in self.pdf_info['0']['blocks']: | ||
| 335 | if block['type'] != 0: | ||
| 336 | continue | ||
| 337 | for line in block['lines']: | ||
| 338 | for span in line['spans']: | ||
| 339 | bbox, text = span['bbox'], span['text'] | ||
| 340 | if '签署日期' in text: | ||
| 341 | words = text | ||
| 342 | signature['words'] = words | ||
| 343 | signature['position'] = bbox | ||
| 344 | return signature | ||
| 345 | |||
| 346 | def get_somebody(self, top, bottom): | ||
| 347 | # 指定上下边界后,返回上下边界内的客户信息 | ||
| 348 | _name = self.item.copy() | ||
| 349 | _id = self.item.copy() | ||
| 350 | # 只看第一页,先划定上下边界 | ||
| 351 | y_top = 0 | ||
| 352 | y_bottom = 0 | ||
| 353 | for block in self.pdf_info['1']['blocks']: | ||
| 354 | if block['type'] != 0: | ||
| 355 | continue | ||
| 356 | for line in block['lines']: | ||
| 357 | for span in line['spans']: | ||
| 358 | bbox, text = span['bbox'], span['text'] | ||
| 359 | if top in text: | ||
| 360 | y_top = bbox[3] | ||
| 361 | if bottom in text: | ||
| 362 | y_bottom = bbox[3] | ||
| 363 | for block in self.pdf_info['1']['blocks']: | ||
| 364 | if block['type'] != 0: | ||
| 365 | continue | ||
| 366 | for line in block['lines']: | ||
| 367 | for span in line['spans']: | ||
| 368 | bbox, text = span['bbox'], span['text'] | ||
| 369 | if y_top < bbox[3] < y_bottom: | ||
| 370 | if '姓名/名称' in text: | ||
| 371 | words = text.split(':')[-1] | ||
| 372 | _name['position'] = bbox | ||
| 373 | _name['words'] = words | ||
| 374 | if '自然人身份证件号码/法人执照号码' in text: | ||
| 375 | words = text.split(':')[-1] | ||
| 376 | _id['position'] = bbox | ||
| 377 | _id['words'] = words | ||
| 378 | return _name, _id | ||
| 379 | |||
| 380 | def get_seller(self): | ||
| 381 | seller = self.item.copy() | ||
| 382 | # 先找到 key | ||
| 383 | anchor_bbox = None | ||
| 384 | for block in self.pdf_info['1']['blocks']: | ||
| 385 | if block['type'] != 0: | ||
| 386 | continue | ||
| 387 | for line in block['lines']: | ||
| 388 | for span in line['spans']: | ||
| 389 | bbox, text = span['bbox'], span['text'] | ||
| 390 | if '经销商' == text: | ||
| 391 | anchor_bbox = bbox | ||
| 392 | # 当找到了 key, 则根据 key 去匹配 value | ||
| 393 | if anchor_bbox: | ||
| 394 | half_width = self.pdf_info['1']['width'] * 0.5 | ||
| 395 | for block in self.pdf_info['1']['blocks']: | ||
| 396 | if block['type'] != 0: | ||
| 397 | continue | ||
| 398 | for line in block['lines']: | ||
| 399 | for span in line['spans']: | ||
| 400 | bbox, text = span['bbox'], span['text'] | ||
| 401 | if anchor_bbox[2]<np.mean(bbox[::2])<half_width and \ | ||
| 402 | anchor_bbox[1]<np.mean(bbox[1::2])<anchor_bbox[3]: | ||
| 403 | seller['position'] = bbox | ||
| 404 | seller['words'] = text | ||
| 405 | return seller | ||
| 406 | |||
| 407 | def get_payback_account(self): | ||
| 408 | account = self.item.copy() | ||
| 409 | account_name = self.item.copy() | ||
| 410 | account_bank = self.item.copy() | ||
| 411 | all_text = '' | ||
| 412 | for block in self.pdf_info['1']['blocks']: | ||
| 413 | if block['type'] != 0: | ||
| 414 | continue | ||
| 415 | for line in block['lines']: | ||
| 416 | for span in line['spans']: | ||
| 417 | bbox, text = span['bbox'], span['text'] | ||
| 418 | all_text += text | ||
| 419 | # 首先确定账户信息是哪种,我们只输出非另行通知的格式 | ||
| 420 | if '☑账号' in all_text: | ||
| 421 | all_text = all_text.replace(' ', '') | ||
| 422 | matchs_1 = re.findall(r'账号:(.*)户名', all_text) | ||
| 423 | if matchs_1: | ||
| 424 | words = matchs_1[0] | ||
| 425 | for block in self.pdf_info['1']['blocks']: | ||
| 426 | if block['type'] != 0: | ||
| 427 | continue | ||
| 428 | for line in block['lines']: | ||
| 429 | for span in line['spans']: | ||
| 430 | bbox, text = span['bbox'], span['text'] | ||
| 431 | if f'{words}' in text: | ||
| 432 | account['position'] = bbox | ||
| 433 | account['words'] = words | ||
| 434 | matchs_2 = re.findall(r'户名:(.*)开户行', all_text) | ||
| 435 | if matchs_2: | ||
| 436 | words = matchs_2[0] | ||
| 437 | for block in self.pdf_info['1']['blocks']: | ||
| 438 | if block['type'] != 0: | ||
| 439 | continue | ||
| 440 | for line in block['lines']: | ||
| 441 | for span in line['spans']: | ||
| 442 | bbox, text = span['bbox'], span['text'] | ||
| 443 | if f'{words}' in text: | ||
| 444 | account_name['position'] = bbox | ||
| 445 | account_name['words'] = words | ||
| 446 | matchs_3 = re.findall(r'开户行:(.*);', all_text) | ||
| 447 | if matchs_3: | ||
| 448 | words = matchs_3[0] | ||
| 449 | for block in self.pdf_info['1']['blocks']: | ||
| 450 | if block['type'] != 0: | ||
| 451 | continue | ||
| 452 | for line in block['lines']: | ||
| 453 | for span in line['spans']: | ||
| 454 | bbox, text = span['bbox'], span['text'] | ||
| 455 | if f'开户行:{words};' in text.replace(' ', ''): | ||
| 456 | account_bank['position'] = bbox | ||
| 457 | account_bank['words'] = words | ||
| 458 | return account, account_name, account_bank | ||
| 459 | |||
| 460 | def get_repayment_schedule(self): | ||
| 461 | repayment_schedule = self.item.copy() | ||
| 462 | |||
| 463 | repayment_schedule_text_list = [] | ||
| 464 | table = False | ||
| 465 | page = None | ||
| 466 | left = 0 | ||
| 467 | right = 0 | ||
| 468 | for pno in self.pdf_info: | ||
| 469 | for block in self.pdf_info[pno]['blocks']: | ||
| 470 | if block['type'] != 0: | ||
| 471 | continue | ||
| 472 | for line in block['lines']: | ||
| 473 | for span in line['spans']: | ||
| 474 | bbox, text = span['bbox'], span['text'] | ||
| 475 | if '剩余融资' in text: | ||
| 476 | right = bbox[2] | ||
| 477 | if '以上表格中所列序号' in text: | ||
| 478 | table = False | ||
| 479 | if table == True: | ||
| 480 | # 过滤汉字 | ||
| 481 | if re.compile(r'[\u4e00-\u9fff]').search(text): | ||
| 482 | continue | ||
| 483 | # 过滤 1. - 61. 这些标题 | ||
| 484 | if re.findall("\d+", text): | ||
| 485 | if len(re.findall("\d+", text)) == 1: | ||
| 486 | continue | ||
| 487 | if not left < bbox[0] < right: | ||
| 488 | continue | ||
| 489 | repayment_schedule_text_list.append(text) | ||
| 490 | |||
| 491 | if text.strip() == "61.": | ||
| 492 | page = pno | ||
| 493 | table = True | ||
| 494 | left = bbox[0] | ||
| 495 | # print("repayment_schedule_text_list = ", repayment_schedule_text_list) | ||
| 496 | # repayment_schedule_table = [['序号', '融资租赁成本', '融资租赁费用', '租金', '剩余融资租赁成本']] | ||
| 497 | repayment_schedule_table = [['序号', '租金']] | ||
| 498 | for i in range(len(repayment_schedule_text_list)//4): | ||
| 499 | line = [f'{i+1}.'] | ||
| 500 | # 4表示4列的意思 | ||
| 501 | for j in range(4): | ||
| 502 | line.append(repayment_schedule_text_list[i*4+j]) | ||
| 503 | |||
| 504 | # 只保留序号和租金列 | ||
| 505 | line = [line[0].replace('.', ''), line[3]] | ||
| 506 | |||
| 507 | repayment_schedule_table.append(line) | ||
| 508 | |||
| 509 | repayment_schedule['words'] = repayment_schedule_table | ||
| 510 | repayment_schedule['page'] = page | ||
| 511 | return repayment_schedule | ||
| 512 | |||
| 513 | def get_signature_role_1(self): | ||
| 514 | signature_role_1 = self.item.copy() | ||
| 515 | for pno in self.pdf_info: | ||
| 516 | for block in self.pdf_info[pno]['blocks']: | ||
| 517 | if block['type'] != 0: | ||
| 518 | continue | ||
| 519 | for line in block['lines']: | ||
| 520 | for span in line['spans']: | ||
| 521 | bbox, text = span['bbox'], span['text'] | ||
| 522 | if '签署日期' in text: | ||
| 523 | signature_role_1['position'] = bbox | ||
| 524 | signature_role_1['page'] = pno | ||
| 525 | signature_role_1['words'] = text | ||
| 526 | return signature_role_1 | ||
| 527 | |||
| 528 | def get_signature_role_2(self): | ||
| 529 | signature_role_2 = self.init_item.copy() | ||
| 530 | # 先定位签字区域 | ||
| 531 | texts = [] | ||
| 532 | boxes = [] | ||
| 533 | page_num = None | ||
| 534 | position = None | ||
| 535 | words = None | ||
| 536 | region = False | ||
| 537 | for i in list(self.pdf_info.keys()): | ||
| 538 | for block in self.pdf_info[i]['blocks']: | ||
| 539 | if block['type'] != 0: | ||
| 540 | continue | ||
| 541 | for line in block['lines']: | ||
| 542 | for span in line['spans']: | ||
| 543 | bbox, text = span['bbox'], span['text'] | ||
| 544 | if '共同借款人(共同抵押人)' in text: | ||
| 545 | region = True | ||
| 546 | if '日期' in text: | ||
| 547 | region = False | ||
| 548 | if region == True: | ||
| 549 | page_num = i | ||
| 550 | texts.append(text) | ||
| 551 | boxes.append(bbox) | ||
| 552 | if len(texts) > 4: | ||
| 553 | words = '有' | ||
| 554 | else: | ||
| 555 | words = '无' | ||
| 556 | boxes = np.array(boxes).reshape((-1, 2)) | ||
| 557 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | ||
| 558 | signature_role_2['page_num'] = page_num | ||
| 559 | signature_role_2['position'] = position | ||
| 560 | signature_role_2['words'] = words | ||
| 561 | return signature_role_2 | ||
| 562 | |||
| 563 | def get_signature_role_3(self): | ||
| 564 | signature_role_3 = self.init_item.copy() | ||
| 565 | # 先定位签字区域 | ||
| 566 | texts = [] | ||
| 567 | boxes = [] | ||
| 568 | page_num = None | ||
| 569 | position = None | ||
| 570 | words = None | ||
| 571 | region = False | ||
| 572 | for i in list(self.pdf_info.keys()): | ||
| 573 | for block in self.pdf_info[i]['blocks']: | ||
| 574 | if block['type'] != 0: | ||
| 575 | continue | ||
| 576 | for line in block['lines']: | ||
| 577 | for span in line['spans']: | ||
| 578 | bbox, text = span['bbox'], span['text'] | ||
| 579 | if '保证人1' in text and int(i) != 0: | ||
| 580 | region = True | ||
| 581 | if '日期' in text: | ||
| 582 | region = False | ||
| 583 | if region == True: | ||
| 584 | page_num = i | ||
| 585 | texts.append(text) | ||
| 586 | boxes.append(bbox) | ||
| 587 | if len(texts) > 4: | ||
| 588 | words = '有' | ||
| 589 | else: | ||
| 590 | words = '无' | ||
| 591 | boxes = np.array(boxes).reshape((-1, 2)) | ||
| 592 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | ||
| 593 | signature_role_3['page_num'] = page_num | ||
| 594 | signature_role_3['position'] = position | ||
| 595 | signature_role_3['words'] = words | ||
| 596 | return signature_role_3 | ||
| 597 | |||
| 598 | def get_signature_role_4(self): | ||
| 599 | signature_role_4 = self.init_item.copy() | ||
| 600 | # 先定位签字区域 | ||
| 601 | texts = [] | ||
| 602 | boxes = [] | ||
| 603 | page_num = None | ||
| 604 | position = None | ||
| 605 | words = None | ||
| 606 | region = False | ||
| 607 | for i in list(self.pdf_info.keys()): | ||
| 608 | for block in self.pdf_info[i]['blocks']: | ||
| 609 | if block['type'] != 0: | ||
| 610 | continue | ||
| 611 | for line in block['lines']: | ||
| 612 | for span in line['spans']: | ||
| 613 | bbox, text = span['bbox'], span['text'] | ||
| 614 | if '保证人2' in text and int(i) != 0: | ||
| 615 | region = True | ||
| 616 | if '日期' in text: | ||
| 617 | region = False | ||
| 618 | if region == True: | ||
| 619 | page_num = i | ||
| 620 | texts.append(text) | ||
| 621 | boxes.append(bbox) | ||
| 622 | if len(texts) > 4: | ||
| 623 | words = '有' | ||
| 624 | else: | ||
| 625 | words = '无' | ||
| 626 | boxes = np.array(boxes).reshape((-1, 2)) | ||
| 627 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | ||
| 628 | signature_role_4['page_num'] = page_num | ||
| 629 | signature_role_4['position'] = position | ||
| 630 | signature_role_4['words'] = words | ||
| 631 | return signature_role_4 | ||
| 632 | |||
| 633 | def get_signature_role_5(self): | ||
| 634 | signature_role_5 = self.init_item.copy() | ||
| 635 | # 先定位签字区域 | ||
| 636 | texts = [] | ||
| 637 | boxes = [] | ||
| 638 | page_num = None | ||
| 639 | position = None | ||
| 640 | words = None | ||
| 641 | region = False | ||
| 642 | for i in list(self.pdf_info.keys()): | ||
| 643 | for block in self.pdf_info[i]['blocks']: | ||
| 644 | if block['type'] != 0: | ||
| 645 | continue | ||
| 646 | for line in block['lines']: | ||
| 647 | for span in line['spans']: | ||
| 648 | bbox, text = span['bbox'], span['text'] | ||
| 649 | if '见证人签字' in text and int(i) != 0: | ||
| 650 | region = True | ||
| 651 | if '年' in text: | ||
| 652 | region = False | ||
| 653 | if region == True: | ||
| 654 | page_num = i | ||
| 655 | texts.append(text) | ||
| 656 | boxes.append(bbox) | ||
| 657 | # print(texts) | ||
| 658 | if len(texts) > 4: | ||
| 659 | words = '有' | ||
| 660 | else: | ||
| 661 | words = '无' | ||
| 662 | boxes = np.array(boxes).reshape((-1, 2)) | ||
| 663 | position = [min(boxes[:,0]), min(boxes[:,1]), max(boxes[:,0]), max(boxes[:,1])] | ||
| 664 | signature_role_5['page_num'] = page_num | ||
| 665 | signature_role_5['position'] = position | ||
| 666 | signature_role_5['words'] = words | ||
| 667 | return signature_role_5 | ||
| 668 | |||
| 669 | def get_last_page_signature(self, page_num, top, bottom): | ||
| 670 | signature_name = self.item.copy() | ||
| 671 | signature_date = self.item.copy() | ||
| 672 | anchor_top = None | ||
| 673 | anchor_bottom = None | ||
| 674 | for block in self.pdf_info[page_num]['blocks']: | ||
| 675 | if block['type'] != 0: | ||
| 676 | continue | ||
| 677 | for line in block['lines']: | ||
| 678 | for span in line['spans']: | ||
| 679 | bbox, text = span['bbox'], span['text'] | ||
| 680 | if top in text: | ||
| 681 | anchor_top = bbox[1] | ||
| 682 | if bottom in text: | ||
| 683 | anchor_bottom = bbox[1] | ||
| 684 | if anchor_top is not None and anchor_bottom is not None: | ||
| 685 | for block in self.pdf_info[page_num]['blocks']: | ||
| 686 | if block['type'] != 0: | ||
| 687 | continue | ||
| 688 | for line in block['lines']: | ||
| 689 | for span in line['spans']: | ||
| 690 | bbox, text = span['bbox'], span['text'] | ||
| 691 | if '签署日期' in text and int(anchor_top)<np.mean(bbox[1::2])<int(anchor_bottom): | ||
| 692 | name = text.split(' ')[0] | ||
| 693 | date = text.split(':')[-1] | ||
| 694 | signature_name['words'] = name | ||
| 695 | signature_name['position'] = bbox | ||
| 696 | signature_date['words'] = date | ||
| 697 | signature_name['position'] = bbox | ||
| 698 | return signature_name, signature_date | ||
| 699 | |||
| 700 | def get_electronic_signature(self, top, bottom, t_pno=None): | ||
| 701 | signature = self.item.copy() | ||
| 702 | anchor_top = None | ||
| 703 | anchor_bottom = None | ||
| 704 | for pno in self.pdf_info: | ||
| 705 | if t_pno is not None and pno != t_pno: | ||
| 706 | continue | ||
| 707 | for block in self.pdf_info[pno]['blocks']: | ||
| 708 | if block['type'] != 0: | ||
| 709 | continue | ||
| 710 | for line in block['lines']: | ||
| 711 | for span in line['spans']: | ||
| 712 | bbox, text = span['bbox'], span['text'] | ||
| 713 | if top in text: | ||
| 714 | anchor_top = bbox[1] | ||
| 715 | elif bottom in text and anchor_top is not None and bbox[3] > anchor_top: | ||
| 716 | anchor_bottom = bbox[3] | ||
| 717 | if anchor_top is not None and anchor_bottom is not None: | ||
| 718 | # print('in') | ||
| 719 | # print(anchor_top) | ||
| 720 | # print(anchor_bottom) | ||
| 721 | for pno in self.pdf_info: | ||
| 722 | if t_pno is not None and pno != t_pno: | ||
| 723 | continue | ||
| 724 | for block in self.pdf_info[pno]['blocks']: | ||
| 725 | if block['type'] != 0: | ||
| 726 | continue | ||
| 727 | for line in block['lines']: | ||
| 728 | for span in line['spans']: | ||
| 729 | bbox, text = span['bbox'], span['text'] | ||
| 730 | # ------------ # | ||
| 731 | # print("--text = ", text) | ||
| 732 | if '签署日期' in text and int(anchor_top)<np.mean(bbox[1::2])<int(anchor_bottom): | ||
| 733 | words = text | ||
| 734 | signature['words'] = words | ||
| 735 | signature['page'] = pno | ||
| 736 | signature['position'] = bbox | ||
| 737 | return signature | ||
| 738 | |||
| 739 | def get_role_info(self, role_key, page_num='0'): | ||
| 740 | name = self.item.copy() | ||
| 741 | id_num = self.item.copy() | ||
| 742 | representative = self.item.copy() | ||
| 743 | |||
| 744 | # 以保证人3 的左上角为定位点 | ||
| 745 | anchor = None | ||
| 746 | for block in self.pdf_info[page_num]['blocks']: | ||
| 747 | if block['type'] != 0: | ||
| 748 | continue | ||
| 749 | for line in block['lines']: | ||
| 750 | for span in line['spans']: | ||
| 751 | bbox, text = span['bbox'], span['text'] | ||
| 752 | # 找到角色姓名 | ||
| 753 | if re.match('保证人3', text) is not None: | ||
| 754 | anchor = [bbox[0], bbox[1]] | ||
| 755 | |||
| 756 | if anchor is not None: | ||
| 757 | for block in self.pdf_info[page_num]['blocks']: | ||
| 758 | if block['type'] != 0: | ||
| 759 | continue | ||
| 760 | for line in block['lines']: | ||
| 761 | for span in line['spans']: | ||
| 762 | bbox, text = span['bbox'], span['text'] | ||
| 763 | # 找到角色姓名 | ||
| 764 | if re.match(role_key, text) is not None: | ||
| 765 | words = text.split(':')[-1] | ||
| 766 | name['words'] = words | ||
| 767 | name['page'] = page_num | ||
| 768 | name['position'] = bbox | ||
| 769 | if role_key == '承租人:': | ||
| 770 | # 找到证件号码且确定位置 | ||
| 771 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]: | ||
| 772 | words = text.split(':')[-1] | ||
| 773 | id_num['words'] = words | ||
| 774 | id_num['page'] = page_num | ||
| 775 | id_num['position'] = bbox | ||
| 776 | # 找到法人代表且确定位置 | ||
| 777 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]: | ||
| 778 | words = text.split(':')[-1] | ||
| 779 | representative['words'] = words | ||
| 780 | representative['page'] = page_num | ||
| 781 | representative['position'] = bbox | ||
| 782 | if role_key == '保证人1:': | ||
| 783 | # 找到证件号码且确定位置 | ||
| 784 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]: | ||
| 785 | words = text.split(':')[-1] | ||
| 786 | id_num['words'] = words | ||
| 787 | id_num['page'] = page_num | ||
| 788 | id_num['position'] = bbox | ||
| 789 | # 找到法人代表且确定位置 | ||
| 790 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]: | ||
| 791 | words = text.split(':')[-1] | ||
| 792 | representative['words'] = words | ||
| 793 | representative['page'] = page_num | ||
| 794 | representative['position'] = bbox | ||
| 795 | if role_key == '保证人2:': | ||
| 796 | # 找到证件号码且确定位置 | ||
| 797 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]: | ||
| 798 | words = text.split(':')[-1] | ||
| 799 | id_num['words'] = words | ||
| 800 | id_num['page'] = page_num | ||
| 801 | id_num['position'] = bbox | ||
| 802 | # 找到法人代表且确定位置 | ||
| 803 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]: | ||
| 804 | words = text.split(':')[-1] | ||
| 805 | representative['words'] = words | ||
| 806 | representative['page'] = page_num | ||
| 807 | representative['position'] = bbox | ||
| 808 | if role_key == '保证人3:': | ||
| 809 | # 找到证件号码且确定位置 | ||
| 810 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]: | ||
| 811 | words = text.split(':')[-1] | ||
| 812 | id_num['words'] = words | ||
| 813 | id_num['page'] = page_num | ||
| 814 | id_num['position'] = bbox | ||
| 815 | # 找到法人代表且确定位置 | ||
| 816 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]: | ||
| 817 | words = text.split(':')[-1] | ||
| 818 | representative['words'] = words | ||
| 819 | representative['page'] = page_num | ||
| 820 | representative['position'] = bbox | ||
| 821 | return name, id_num, representative | ||
| 822 | |||
| 823 | def get_table_add_product(self): | ||
| 824 | table_add_product = self.item.copy() | ||
| 825 | |||
| 826 | add_product_page_num = None | ||
| 827 | for pno in self.pdf_info: | ||
| 828 | for block in self.pdf_info[f'{pno}']['blocks']: | ||
| 829 | if block['type'] != 0: | ||
| 830 | continue | ||
| 831 | for line in block['lines']: | ||
| 832 | for span in line['spans']: | ||
| 833 | bbox, text = span['bbox'], span['text'] | ||
| 834 | if '车辆附加产品(明细见下表)' in text: | ||
| 835 | add_product_page_num = pno | ||
| 836 | ocr_results = [] | ||
| 837 | for block in self.pdf_info[f'{add_product_page_num}']['blocks']: | ||
| 838 | if block['type'] != 0: | ||
| 839 | continue | ||
| 840 | for line in block['lines']: | ||
| 841 | for span in line['spans']: | ||
| 842 | bbox, text = span['bbox'], span['text'] | ||
| 843 | xmin, ymin, xmax, ymax = bbox | ||
| 844 | bbox = [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax] | ||
| 845 | ocr_results.append([bbox, text]) | ||
| 846 | |||
| 847 | lines = [['项目', '购买价格', '实际融资金额']] | ||
| 848 | |||
| 849 | key_xm = None | ||
| 850 | key_gmjg = None | ||
| 851 | key_sjrzje = None | ||
| 852 | key_total = None | ||
| 853 | |||
| 854 | for index, span in enumerate(ocr_results): | ||
| 855 | if span[1] == '项目': | ||
| 856 | key_xm = index | ||
| 857 | if span[1] == '购买价格': | ||
| 858 | key_gmjg = index | ||
| 859 | if span[1] == '实际融资金额': | ||
| 860 | key_sjrzje = index | ||
| 861 | if span[1] == '总计': | ||
| 862 | key_total = index | ||
| 863 | |||
| 864 | bbox, text = ocr_results[key_xm] | ||
| 865 | rh = abs(bbox[1]-bbox[-1]) | ||
| 866 | anchor = np.array(bbox).reshape((-1, 2)) | ||
| 867 | anchor[:, 0] += 2*rh | ||
| 868 | anchor[:, 1] += rh | ||
| 869 | |||
| 870 | for i in range(5): | ||
| 871 | for span in ocr_results: | ||
| 872 | iou = caculate_iou(anchor, span[0]) | ||
| 873 | if iou > 0.01 and span[1].strip() != '所购': | ||
| 874 | x = get_table_info(span[0], ocr_results[key_gmjg][0], ocr_results) | ||
| 875 | y = get_table_info(span[0], ocr_results[key_sjrzje][0], ocr_results) | ||
| 876 | line = [span[1].replace('\u3000', ' '), x, y] | ||
| 877 | # print(line) | ||
| 878 | lines.append(line) | ||
| 879 | anchor = np.array(span[0]).reshape((-1, 2)) | ||
| 880 | anchor[:, 1] += rh | ||
| 881 | |||
| 882 | total = get_table_info(ocr_results[key_total][0], ocr_results[key_sjrzje][0], ocr_results) | ||
| 883 | lines.append(['总计', '', total]) | ||
| 884 | |||
| 885 | # 所购 BMW悦然焕 | ||
| 886 | # 新服务 | ||
| 887 | |||
| 888 | # 所购 BMW5年10 | ||
| 889 | # 万公里长悦保养套餐 | ||
| 890 | |||
| 891 | # 所购 事故维修补偿 | ||
| 892 | # 方案 | ||
| 893 | |||
| 894 | # 所购 BMW5年10万公里 | ||
| 895 | # 长悦保养套餐 | ||
| 896 | |||
| 897 | # 所购 MINI4年6万公里长悦 | ||
| 898 | # 保养套餐 | ||
| 899 | |||
| 900 | filtered_lines = [] | ||
| 901 | for line in lines: | ||
| 902 | if line[0][:2] not in ['所购', '项目', '总计']: | ||
| 903 | continue | ||
| 904 | if 'BMW悦然' in line[0]: | ||
| 905 | line[0] = '所购 BMW悦然焕新服务' | ||
| 906 | if 'BMW5年10' in line[0]: | ||
| 907 | line[0] = '所购 BMW5年10万公里长悦保养套餐' | ||
| 908 | if '事故维修补' in line[0]: | ||
| 909 | line[0] = '所购 事故维修补偿方案' | ||
| 910 | if 'MINI4年6万公里长悦' in line[0]: | ||
| 911 | line[0] = '所购 MINI4年6万公里长悦保养套餐' | ||
| 912 | filtered_lines.append(line) | ||
| 913 | table_add_product['words'] = filtered_lines | ||
| 914 | table_add_product['page'] = add_product_page_num | ||
| 915 | table_add_product['position'] = None | ||
| 916 | return table_add_product | ||
| 917 | |||
| 918 | def get_contract_no_dy(self): | ||
| 919 | # 查找抵押合同编号 | ||
| 920 | contract_no = self.item.copy() | ||
| 921 | |||
| 922 | key_box = None | ||
| 923 | for pno in self.pdf_info: | ||
| 924 | for block in self.pdf_info[pno]['blocks']: | ||
| 925 | if block['type'] != 0: | ||
| 926 | continue | ||
| 927 | for line in block['lines']: | ||
| 928 | for span in line['spans']: | ||
| 929 | bbox, text = span['bbox'], span['text'] | ||
| 930 | if '抵押合同编号' in text: | ||
| 931 | key_box = bbox | ||
| 932 | |||
| 933 | if key_box is not None: | ||
| 934 | for pno in self.pdf_info: | ||
| 935 | for block in self.pdf_info[pno]['blocks']: | ||
| 936 | if block['type'] != 0: | ||
| 937 | continue | ||
| 938 | for line in block['lines']: | ||
| 939 | for span in line['spans']: | ||
| 940 | bbox, text = span['bbox'], span['text'] | ||
| 941 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3] and 'CH-' in text: | ||
| 942 | contract_no['position'] = bbox | ||
| 943 | contract_no['page'] = pno | ||
| 944 | contract_no['words'] = text | ||
| 945 | return contract_no | ||
| 946 | |||
| 947 | def get_dyr_name_id(self): | ||
| 948 | name = self.item.copy() | ||
| 949 | _id = self.item.copy() | ||
| 950 | |||
| 951 | key_box = None | ||
| 952 | for pno in self.pdf_info: | ||
| 953 | for block in self.pdf_info[pno]['blocks']: | ||
| 954 | if block['type'] != 0: | ||
| 955 | continue | ||
| 956 | for line in block['lines']: | ||
| 957 | for span in line['spans']: | ||
| 958 | bbox, text = span['bbox'], span['text'] | ||
| 959 | if text == '抵押人': | ||
| 960 | key_box = bbox | ||
| 961 | |||
| 962 | if key_box is not None: | ||
| 963 | rh = abs(key_box[1]-key_box[3]) | ||
| 964 | for pno in self.pdf_info: | ||
| 965 | for block in self.pdf_info[pno]['blocks']: | ||
| 966 | if block['type'] != 0: | ||
| 967 | continue | ||
| 968 | for line in block['lines']: | ||
| 969 | for span in line['spans']: | ||
| 970 | bbox, text = span['bbox'], span['text'] | ||
| 971 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '姓名' in text: | ||
| 972 | words = text.split(':')[-1] | ||
| 973 | name['position'] = bbox | ||
| 974 | name['page'] = pno | ||
| 975 | name['words'] = words | ||
| 976 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '证件号码' in text: | ||
| 977 | words = text.split(':')[-1] | ||
| 978 | _id['position'] = bbox | ||
| 979 | _id['page'] = pno | ||
| 980 | _id['words'] = words | ||
| 981 | return name, _id | ||
| 982 | |||
| 983 | def get_dyrpo_name_id(self): | ||
| 984 | name = self.item.copy() | ||
| 985 | _id = self.item.copy() | ||
| 986 | |||
| 987 | key_box = None | ||
| 988 | for pno in self.pdf_info: | ||
| 989 | for block in self.pdf_info[pno]['blocks']: | ||
| 990 | if block['type'] != 0: | ||
| 991 | continue | ||
| 992 | for line in block['lines']: | ||
| 993 | for span in line['spans']: | ||
| 994 | bbox, text = span['bbox'], span['text'] | ||
| 995 | if text == '抵押人配偶(如适': | ||
| 996 | key_box = bbox | ||
| 997 | |||
| 998 | if key_box is not None: | ||
| 999 | rh = abs(key_box[1]-key_box[3]) | ||
| 1000 | for pno in self.pdf_info: | ||
| 1001 | for block in self.pdf_info[pno]['blocks']: | ||
| 1002 | if block['type'] != 0: | ||
| 1003 | continue | ||
| 1004 | for line in block['lines']: | ||
| 1005 | for span in line['spans']: | ||
| 1006 | bbox, text = span['bbox'], span['text'] | ||
| 1007 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '姓名' in text: | ||
| 1008 | words = text.split(':')[-1] | ||
| 1009 | name['position'] = bbox | ||
| 1010 | name['page'] = pno | ||
| 1011 | name['words'] = words | ||
| 1012 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3]+rh*3 and '证件号码' in text: | ||
| 1013 | words = text.split(':')[-1] | ||
| 1014 | _id['position'] = bbox | ||
| 1015 | _id['page'] = pno | ||
| 1016 | _id['words'] = words.strip() | ||
| 1017 | return name, _id | ||
| 1018 | |||
| 1019 | def get_key_value_position(self, key): | ||
| 1020 | value = self.item.copy() | ||
| 1021 | |||
| 1022 | key_box = None | ||
| 1023 | for pno in self.pdf_info: | ||
| 1024 | for block in self.pdf_info[pno]['blocks']: | ||
| 1025 | if block['type'] != 0: | ||
| 1026 | continue | ||
| 1027 | for line in block['lines']: | ||
| 1028 | for span in line['spans']: | ||
| 1029 | bbox, text = span['bbox'], span['text'] | ||
| 1030 | if text == key: | ||
| 1031 | key_box = bbox | ||
| 1032 | |||
| 1033 | if key_box is not None: | ||
| 1034 | rh = abs(key_box[1]-key_box[3]) | ||
| 1035 | for pno in self.pdf_info: | ||
| 1036 | for block in self.pdf_info[pno]['blocks']: | ||
| 1037 | if block['type'] != 0: | ||
| 1038 | continue | ||
| 1039 | for line in block['lines']: | ||
| 1040 | for span in line['spans']: | ||
| 1041 | bbox, text = span['bbox'], span['text'] | ||
| 1042 | if key_box[1] < np.mean(bbox[1::2]) < key_box[3] and key_box[0] < bbox[0] and abs(key_box[2]-bbox[0]) < rh*10: | ||
| 1043 | words = text | ||
| 1044 | value['position'] = bbox | ||
| 1045 | value['page'] = pno | ||
| 1046 | value['words'] = words | ||
| 1047 | return value | ||
| 1048 | |||
| 1049 | def get_role_info_3_3(self, role_key, page_num='0'): | ||
| 1050 | name = self.item.copy() | ||
| 1051 | id_num = self.item.copy() | ||
| 1052 | representative = self.item.copy() | ||
| 1053 | |||
| 1054 | # 以保证人2 的左上角为定位点 | ||
| 1055 | anchor = None | ||
| 1056 | for block in self.pdf_info[page_num]['blocks']: | ||
| 1057 | if block['type'] != 0: | ||
| 1058 | continue | ||
| 1059 | for line in block['lines']: | ||
| 1060 | for span in line['spans']: | ||
| 1061 | bbox, text = span['bbox'], span['text'] | ||
| 1062 | # 找到角色姓名 | ||
| 1063 | if re.match('保证人2', text) is not None: | ||
| 1064 | anchor = [bbox[0], bbox[1]] | ||
| 1065 | |||
| 1066 | if anchor is not None: | ||
| 1067 | for block in self.pdf_info[page_num]['blocks']: | ||
| 1068 | if block['type'] != 0: | ||
| 1069 | continue | ||
| 1070 | for line in block['lines']: | ||
| 1071 | for span in line['spans']: | ||
| 1072 | bbox, text = span['bbox'], span['text'] | ||
| 1073 | # 找到角色姓名 | ||
| 1074 | if re.match(role_key, text) is not None: | ||
| 1075 | words = text.split(':')[-1] | ||
| 1076 | name['words'] = words | ||
| 1077 | name['page'] = page_num | ||
| 1078 | name['position'] = bbox | ||
| 1079 | if role_key == '承租人一:': | ||
| 1080 | # 找到证件号码且确定位置 | ||
| 1081 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]: | ||
| 1082 | words = text.split(':')[-1] | ||
| 1083 | id_num['words'] = words | ||
| 1084 | id_num['page'] = page_num | ||
| 1085 | id_num['position'] = bbox | ||
| 1086 | # 找到法人代表且确定位置 | ||
| 1087 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) < anchor[1]: | ||
| 1088 | words = text.split(':')[-1] | ||
| 1089 | representative['words'] = words | ||
| 1090 | representative['page'] = page_num | ||
| 1091 | representative['position'] = bbox | ||
| 1092 | if role_key == '共同承租人:': | ||
| 1093 | # 找到证件号码且确定位置 | ||
| 1094 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]: | ||
| 1095 | words = text.split(':')[-1] | ||
| 1096 | id_num['words'] = words | ||
| 1097 | id_num['page'] = page_num | ||
| 1098 | id_num['position'] = bbox | ||
| 1099 | # 找到法人代表且确定位置 | ||
| 1100 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) < anchor[0] and np.mean(bbox[1::2]) > anchor[1]: | ||
| 1101 | words = text.split(':')[-1] | ||
| 1102 | representative['words'] = words | ||
| 1103 | representative['page'] = page_num | ||
| 1104 | representative['position'] = bbox | ||
| 1105 | if role_key == '保证人1:': | ||
| 1106 | # 找到证件号码且确定位置 | ||
| 1107 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]: | ||
| 1108 | words = text.split(':')[-1] | ||
| 1109 | id_num['words'] = words | ||
| 1110 | id_num['page'] = page_num | ||
| 1111 | id_num['position'] = bbox | ||
| 1112 | # 找到法人代表且确定位置 | ||
| 1113 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) < anchor[1]: | ||
| 1114 | words = text.split(':')[-1] | ||
| 1115 | representative['words'] = words | ||
| 1116 | representative['page'] = page_num | ||
| 1117 | representative['position'] = bbox | ||
| 1118 | if role_key == '保证人2:': | ||
| 1119 | # 找到证件号码且确定位置 | ||
| 1120 | if re.match('证件号码:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]: | ||
| 1121 | words = text.split(':')[-1] | ||
| 1122 | id_num['words'] = words | ||
| 1123 | id_num['page'] = page_num | ||
| 1124 | id_num['position'] = bbox | ||
| 1125 | # 找到法人代表且确定位置 | ||
| 1126 | if re.match('法定代表人或授权代表:', text) is not None and np.mean(bbox[::2]) > anchor[0] and np.mean(bbox[1::2]) > anchor[1]: | ||
| 1127 | words = text.split(':')[-1] | ||
| 1128 | representative['words'] = words | ||
| 1129 | representative['page'] = page_num | ||
| 1130 | representative['position'] = bbox | ||
| 1131 | return name, id_num, representative | ||
| 1132 | |||
| 1133 | def get_value_by_findall(self, prefix, suffix, page_num): | ||
| 1134 | value = self.item.copy() | ||
| 1135 | all_text = '' | ||
| 1136 | pno = page_num | ||
| 1137 | for block in self.pdf_info[pno]['blocks']: | ||
| 1138 | if block['type'] != 0: | ||
| 1139 | continue | ||
| 1140 | for line in block['lines']: | ||
| 1141 | for span in line['spans']: | ||
| 1142 | bbox, text = span['bbox'], span['text'] | ||
| 1143 | all_text += text | ||
| 1144 | words_list = re.findall(f"{prefix}(.*?){suffix}", all_text) | ||
| 1145 | if len(words_list) > 0: | ||
| 1146 | for block in self.pdf_info[pno]['blocks']: | ||
| 1147 | if block['type'] != 0: | ||
| 1148 | continue | ||
| 1149 | for line in block['lines']: | ||
| 1150 | for span in line['spans']: | ||
| 1151 | bbox, text = span['bbox'], span['text'] | ||
| 1152 | if words_list[0] in text: | ||
| 1153 | value['position'] = bbox | ||
| 1154 | value['page'] = pno | ||
| 1155 | value['words'] = words_list[0] | ||
| 1156 | return value | ||
| 1157 | |||
| 1158 | def get_info(self): | ||
| 1159 | """ | ||
| 1160 | block['type'] == 0 : 表示该元素为图片 | ||
| 1161 | |||
| 1162 | Returns: | ||
| 1163 | dict: Description | ||
| 1164 | """ | ||
| 1165 | if len(self.pdf_info) > 0: | ||
| 1166 | # 取 Page 1 上的合同编号 | ||
| 1167 | contract_no = self.get_contract_no(page_num='0') | ||
| 1168 | self.init_result['合同编号'] = contract_no | ||
| 1169 | |||
| 1170 | # 粗略判断是否是 ‘车贷分离版本’ 的合同 | ||
| 1171 | is_cdfl = False | ||
| 1172 | for block in self.pdf_info['0']['blocks']: | ||
| 1173 | if block['type'] != 0: | ||
| 1174 | continue | ||
| 1175 | for line in block['lines']: | ||
| 1176 | for span in line['spans']: | ||
| 1177 | bbox, text = span['bbox'], span['text'] | ||
| 1178 | if '共同承租人:' in text: | ||
| 1179 | is_cdfl = True | ||
| 1180 | |||
| 1181 | if is_cdfl == False: | ||
| 1182 | # 从第一页上取四个角色的姓名和证件号码 | ||
| 1183 | name, id_num, representative = self.get_role_info(role_key='承租人:', page_num='0') | ||
| 1184 | |||
| 1185 | if name["words"] == None: | ||
| 1186 | name, id_num, representative = self.get_role_info_3_3(role_key='承租人一:', page_num='0') | ||
| 1187 | self.init_result['承租人-姓名'] = name | ||
| 1188 | self.init_result['承租人-证件号码'] = id_num | ||
| 1189 | self.init_result['承租人-法定代表人或授权代表'] = representative | ||
| 1190 | |||
| 1191 | name, id_num, representative = self.get_role_info(role_key='保证人1:', page_num='0') | ||
| 1192 | self.init_result['保证人1-姓名'] = name | ||
| 1193 | self.init_result['保证人1-证件号码'] = id_num | ||
| 1194 | self.init_result['保证人1-法定代表人或授权代表'] = representative | ||
| 1195 | # if条件判别 对应3_3版本 | ||
| 1196 | if name["words"] == None: | ||
| 1197 | name, id_num, representative = self.get_role_info_3_3(role_key='共同承租人:', page_num='0') | ||
| 1198 | self.init_result['共同承租人-姓名'] = name | ||
| 1199 | self.init_result['共同承租人-证件号码'] = id_num | ||
| 1200 | self.init_result['共同承租人-法定代表人或授权代表'] = representative | ||
| 1201 | |||
| 1202 | name, id_num, representative = self.get_role_info(role_key='保证人2:', page_num='0') | ||
| 1203 | self.init_result['保证人2-姓名'] = name | ||
| 1204 | self.init_result['保证人2-证件号码'] = id_num | ||
| 1205 | self.init_result['保证人2-法定代表人或授权代表'] = representative | ||
| 1206 | # if条件判别 对应3_3版本 | ||
| 1207 | if name["words"] == None: | ||
| 1208 | name, id_num, representative = self.get_role_info_3_3(role_key='保证人1:', page_num='0') | ||
| 1209 | self.init_result['保证人2-姓名'] = name | ||
| 1210 | self.init_result['保证人2-证件号码'] = id_num | ||
| 1211 | self.init_result['保证人2-法定代表人或授权代表'] = representative | ||
| 1212 | |||
| 1213 | name, id_num, representative = self.get_role_info(role_key='保证人3:', page_num='0') | ||
| 1214 | self.init_result['保证人3-姓名'] = name | ||
| 1215 | self.init_result['保证人3-证件号码'] = id_num | ||
| 1216 | self.init_result['保证人3-法定代表人或授权代表'] = representative | ||
| 1217 | if name["words"] == None: | ||
| 1218 | name, id_num, representative = self.get_role_info_3_3(role_key='保证人2:', page_num='0') | ||
| 1219 | self.init_result['保证人3-姓名'] = name | ||
| 1220 | self.init_result['保证人3-证件号码'] = id_num | ||
| 1221 | self.init_result['保证人3-法定代表人或授权代表'] = representative | ||
| 1222 | else: | ||
| 1223 | name, id_num, representative = self.get_role_info_3_3(role_key='承租人一:', page_num='0') | ||
| 1224 | self.init_result['承租人-姓名'] = name | ||
| 1225 | self.init_result['承租人-证件号码'] = id_num | ||
| 1226 | self.init_result['承租人-法定代表人或授权代表'] = representative | ||
| 1227 | |||
| 1228 | name, id_num, representative = self.get_role_info_3_3(role_key='共同承租人:', page_num='0') | ||
| 1229 | self.init_result['共同承租人-姓名'] = name | ||
| 1230 | self.init_result['共同承租人-证件号码'] = id_num | ||
| 1231 | self.init_result['共同承租人-法定代表人或授权代表'] = representative | ||
| 1232 | |||
| 1233 | name, id_num, representative = self.get_role_info_3_3(role_key='保证人1:', page_num='0') | ||
| 1234 | self.init_result['保证人1-姓名'] = name | ||
| 1235 | self.init_result['保证人1-证件号码'] = id_num | ||
| 1236 | self.init_result['保证人1-法定代表人或授权代表'] = representative | ||
| 1237 | |||
| 1238 | name, id_num, representative = self.get_role_info_3_3(role_key='保证人2:', page_num='0') | ||
| 1239 | self.init_result['保证人2-姓名'] = name | ||
| 1240 | self.init_result['保证人2-证件号码'] = id_num | ||
| 1241 | self.init_result['保证人2-法定代表人或授权代表'] = representative | ||
| 1242 | |||
| 1243 | # 在所有页面中找正文中(第二部分 融资租赁主要条款及付款计划)的那个编号,因为存在换行的情况所以暂时不带位置输出 | ||
| 1244 | contract_no = self.get_contract_no_one() | ||
| 1245 | self.init_result['合同编号(正文)'] = contract_no | ||
| 1246 | # 找到车辆识别代码 | ||
| 1247 | vin = self.get_key_value(key='车辆识别代码:') | ||
| 1248 | self.init_result['车辆识别代码'] = vin | ||
| 1249 | # 找到经销商(车辆卖方(经销商)) | ||
| 1250 | seller = self.get_key_value(key='车辆卖方(经销商):') | ||
| 1251 | if seller['words'] == None: | ||
| 1252 | seller = self.get_key_value(key='车辆卖方:') | ||
| 1253 | self.init_result['车辆卖方(经销商)'] = seller | ||
| 1254 | # 找到车辆代理商 | ||
| 1255 | cldls = self.get_key_value(key='车辆代理商', page_num='4') | ||
| 1256 | self.init_result['车辆代理商'] = cldls | ||
| 1257 | # 找到 —— 车辆原始销售价格 | ||
| 1258 | vehicle_price = self.get_key_value(key='车辆原始销售价格(《机动车销售统一发票》所列金额):') | ||
| 1259 | self.init_result['车辆原始销售价格(《机动车销售统一发票》所列金额)'] = vehicle_price | ||
| 1260 | # 找车辆附加产品明细(表) | ||
| 1261 | table_add_product = self.get_table_add_product() | ||
| 1262 | self.init_result['车辆附加产品明细表'] = table_add_product | ||
| 1263 | # 找融资成本总额 | ||
| 1264 | financing_cost = self.get_key_value(key='融资成本总额:') | ||
| 1265 | self.init_result['融资成本总额'] = financing_cost | ||
| 1266 | # 找租期 | ||
| 1267 | lease_term = self.get_key_value(key='租期:') | ||
| 1268 | self.init_result['租期'] = lease_term | ||
| 1269 | # 找还款计划(表) | ||
| 1270 | repayment_schedule = self.get_repayment_schedule() | ||
| 1271 | self.init_result['付款计划表'] = repayment_schedule | ||
| 1272 | # 找承租人收款账户户名、银行账号、银行 | ||
| 1273 | name = self.get_key_value(key='户名:', page_num='4') | ||
| 1274 | self.init_result['收款银行账户-户名'] = name | ||
| 1275 | account = self.get_key_value(key='银行账号:', page_num='4') | ||
| 1276 | self.init_result['收款银行账户-银行账号'] = account | ||
| 1277 | bank = self.get_key_value(key='开户银行:', page_num='4') | ||
| 1278 | self.init_result['收款银行账户-开户行'] = bank | ||
| 1279 | # 找承租人扣款账户户名、银行账号、银行 | ||
| 1280 | name = self.get_key_value(key='户名:', page_num='5') | ||
| 1281 | self.init_result['银行账户-户名'] = name | ||
| 1282 | account = self.get_key_value(key='银行账号:', page_num='5') | ||
| 1283 | self.init_result['银行账户-银行账号'] = account | ||
| 1284 | bank = self.get_key_value(key='开户银行:', page_num='5') | ||
| 1285 | self.init_result['银行账户-开户行'] = bank | ||
| 1286 | |||
| 1287 | # 找签字页上的系列信息 | ||
| 1288 | # 承租人姓名、签章 | ||
| 1289 | if is_cdfl == False: | ||
| 1290 | name = self.get_key_value(key='承租人姓名:') | ||
| 1291 | electronic_signature = self.get_electronic_signature(top='承租人姓名:', bottom='保证人1姓名:', t_pno='5') | ||
| 1292 | |||
| 1293 | if name["words"] == None: | ||
| 1294 | name = self.get_key_value(key='承租人一姓名:') | ||
| 1295 | electronic_signature = self.get_electronic_signature(top='承租人一姓名:', bottom='共同承租人名称:', t_pno='5') | ||
| 1296 | |||
| 1297 | self.init_result['签字页-承租人姓名'] = name | ||
| 1298 | self.init_result['签字页-承租人签章'] = electronic_signature | ||
| 1299 | # 保证人1姓名、签章 | ||
| 1300 | name = self.get_key_value(key='保证人1姓名:') | ||
| 1301 | electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:', t_pno='5') | ||
| 1302 | self.init_result['签字页-保证人1姓名'] = name | ||
| 1303 | self.init_result['签字页-保证人1签章'] = electronic_signature | ||
| 1304 | # 这里用的是 name["words"] == "" | ||
| 1305 | if name["words"] == "": | ||
| 1306 | name = self.get_key_value(key='共同承租人名称:') | ||
| 1307 | electronic_signature = self.get_electronic_signature(top='共同承租人名称:', bottom='保证人1姓名:', t_pno='5') | ||
| 1308 | self.init_result['签字页-共同承租人姓名'] = name | ||
| 1309 | self.init_result['签字页-共同承租人签章'] = electronic_signature | ||
| 1310 | # 保证人2姓名、签章 | ||
| 1311 | name = self.get_key_value(key='保证人2姓名:') | ||
| 1312 | electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='保证人3姓名:', t_pno='5') | ||
| 1313 | self.init_result['签字页-保证人2姓名'] = name | ||
| 1314 | self.init_result['签字页-保证人2签章'] = electronic_signature | ||
| 1315 | # if判断条件对应3_3版本 | ||
| 1316 | if name["words"] == "": | ||
| 1317 | name = self.get_key_value(key='保证人1姓名:') | ||
| 1318 | electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:', t_pno='5') | ||
| 1319 | self.init_result['签字页-保证人1姓名'] = name | ||
| 1320 | self.init_result['签字页-保证人1签章'] = electronic_signature | ||
| 1321 | # 保证人3姓名、签章 | ||
| 1322 | name = self.get_key_value(key='保证人3姓名:') | ||
| 1323 | electronic_signature = self.get_electronic_signature(top='保证人3姓名:', bottom='日期:', t_pno='5') | ||
| 1324 | self.init_result['签字页-保证人3姓名'] = name | ||
| 1325 | self.init_result['签字页-保证人3签章'] = electronic_signature | ||
| 1326 | # if判断条件对应3_3版本 | ||
| 1327 | if name["words"] == None: | ||
| 1328 | name = self.get_key_value(key='保证人2姓名:') | ||
| 1329 | electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='日期:', t_pno='5') | ||
| 1330 | self.init_result['签字页-保证人2姓名'] = name | ||
| 1331 | self.init_result['签字页-保证人2签章'] = electronic_signature | ||
| 1332 | else: | ||
| 1333 | name = self.get_key_value(key='承租人一姓名:') | ||
| 1334 | electronic_signature = self.get_electronic_signature(top='承租人一姓名:', bottom='共同承租人名称:', t_pno='5') | ||
| 1335 | self.init_result['签字页-承租人姓名'] = name | ||
| 1336 | self.init_result['签字页-承租人签章'] = electronic_signature | ||
| 1337 | |||
| 1338 | name = self.get_key_value(key='共同承租人名称:') | ||
| 1339 | electronic_signature = self.get_electronic_signature(top='共同承租人名称:', bottom='保证人1姓名:', t_pno='5') | ||
| 1340 | self.init_result['签字页-共同承租人姓名'] = name | ||
| 1341 | self.init_result['签字页-共同承租人签章'] = electronic_signature | ||
| 1342 | |||
| 1343 | name = self.get_key_value(key='保证人1姓名:') | ||
| 1344 | electronic_signature = self.get_electronic_signature(top='保证人1姓名:', bottom='保证人2姓名:', t_pno='5') | ||
| 1345 | self.init_result['签字页-保证人1姓名'] = name | ||
| 1346 | self.init_result['签字页-保证人1签章'] = electronic_signature | ||
| 1347 | |||
| 1348 | name = self.get_key_value(key='保证人2姓名:') | ||
| 1349 | electronic_signature = self.get_electronic_signature(top='保证人2姓名:', bottom='保证人3姓名:', t_pno='5') | ||
| 1350 | self.init_result['签字页-保证人2姓名'] = name | ||
| 1351 | self.init_result['签字页-保证人2签章'] = electronic_signature | ||
| 1352 | |||
| 1353 | return self.init_result | ||
| 1354 | |||
| 1355 | def get_info_1(self): | ||
| 1356 | if len(self.pdf_info) > 0: | ||
| 1357 | contract_no = self.get_contract_no(page_num='0') | ||
| 1358 | self.init_result_1['合同编号'] = contract_no | ||
| 1359 | # 承租人姓名 | ||
| 1360 | name = self.get_key_value(key='承租人:', page_num='0') | ||
| 1361 | self.init_result_1['承租人-姓名'] = name | ||
| 1362 | # 承租人证件号码 | ||
| 1363 | _id = self.get_key_value(key='证件号码:', page_num='0') | ||
| 1364 | self.init_result_1['承租人-证件号码'] = _id | ||
| 1365 | # 销售经销商 | ||
| 1366 | seller = self.get_key_value(key='销售经销商:', page_num='0') | ||
| 1367 | if seller['words'] == "": | ||
| 1368 | seller = self.get_value_by_findall('销售经销商:', '地址:', page_num='0') | ||
| 1369 | self.init_result_1['销售经销商'] = seller | ||
| 1370 | # 合同编号(正文) | ||
| 1371 | contract_no = self.get_contract_no_one() | ||
| 1372 | self.init_result_1['合同编号(正文)'] = contract_no | ||
| 1373 | # 签字页-承租人姓名 | ||
| 1374 | name = self.get_key_value(key='姓名/名称:') | ||
| 1375 | self.init_result_1['签字页-承租人姓名'] = name | ||
| 1376 | # 签字页-承租人证件号码 | ||
| 1377 | _id = self.get_key_value(key='自然人身份证件号码/法人执照号码:') | ||
| 1378 | self.init_result_1['签字页-承租人证件号码'] = _id | ||
| 1379 | # 签字页-承租人签章 | ||
| 1380 | signature_role_1 = self.get_signature_role_1() | ||
| 1381 | self.init_result_1['签字页-承租人签章'] = signature_role_1 | ||
| 1382 | # 签字页-销售经销商 | ||
| 1383 | seller = self.get_key_value(key='销售经销商:') | ||
| 1384 | if seller['words'] == "": | ||
| 1385 | # 销售经销商:深圳市宝创汽车贸易有限公司南山分公司(请授权代表签字并请盖章) | ||
| 1386 | seller = self.get_value_by_findall('销售经销商:', '(请授权代表签字并请盖章)', page_num='3') | ||
| 1387 | self.init_result_1['签字页-销售经销商'] = seller | ||
| 1388 | # 经销商签章 | ||
| 1389 | pass | ||
| 1390 | return self.init_result_1 | ||
| 1391 | |||
| 1392 | def get_info_2(self): | ||
| 1393 | if len(self.pdf_info) > 0: | ||
| 1394 | contract_no = self.get_contract_no_dy() | ||
| 1395 | self.init_result_2['合同编号'] = contract_no | ||
| 1396 | # 合同编号(正文) | ||
| 1397 | contract_no = self.get_contract_no_one() | ||
| 1398 | self.init_result_2['合同编号(正文)'] = contract_no | ||
| 1399 | # 抵押人姓名/名称 | ||
| 1400 | name, _id = self.get_dyr_name_id() | ||
| 1401 | self.init_result_2['抵押人姓名/名称'] = name | ||
| 1402 | self.init_result_2['抵押人证件号码'] = _id | ||
| 1403 | # 抵押人配偶信息 | ||
| 1404 | name, _id = self.get_dyrpo_name_id() | ||
| 1405 | self.init_result_2['抵押人配偶姓名/名称'] = name | ||
| 1406 | self.init_result_2['抵押人配偶证件号码'] = _id | ||
| 1407 | # 车辆识别代码 | ||
| 1408 | vin = self.get_key_value(key='车辆识别代码:') | ||
| 1409 | self.init_result_2['车辆识别代码'] = vin | ||
| 1410 | # 租金总额 | ||
| 1411 | rent = self.get_key_value_position(key='租金总额') | ||
| 1412 | self.init_result_2['租金总额'] = rent | ||
| 1413 | # 融资租赁期限 | ||
| 1414 | lease_term = self.get_key_value_position(key='融资租赁期限') | ||
| 1415 | self.init_result_2['融资租赁期限'] = lease_term | ||
| 1416 | # 签字页抵押人姓名和签章 | ||
| 1417 | name = self.get_key_value(key='抵押人姓名:') | ||
| 1418 | electronic_signature = self.get_electronic_signature(top='抵押权人盖章', bottom='抵押人配偶姓名:', t_pno='1') | ||
| 1419 | self.init_result_2['签字页-抵押人姓名'] = name | ||
| 1420 | self.init_result_2['签字页-抵押人签章'] = electronic_signature | ||
| 1421 | # 签字页抵押人配偶姓名和签章 | ||
| 1422 | name = self.get_key_value(key='抵押人配偶姓名:') | ||
| 1423 | electronic_signature = self.get_electronic_signature(top='抵押人配偶姓名:', bottom='日期', t_pno='1') | ||
| 1424 | self.init_result_2['签字页-抵押人配偶姓名'] = name | ||
| 1425 | self.init_result_2['签字页-抵押人配偶签章'] = electronic_signature | ||
| 1426 | return self.init_result_2 | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
| ... | @@ -6,9 +6,10 @@ | ... | @@ -6,9 +6,10 @@ |
| 6 | # @Description : | 6 | # @Description : |
| 7 | 7 | ||
| 8 | from .get_char import Finder | 8 | from .get_char import Finder |
| 9 | from .get_char_fsm import Finder as FSMFinder | ||
| 9 | 10 | ||
| 10 | 11 | ||
| 11 | def predict(pdf_info, file_cls): | 12 | def predict(pdf_info, file_cls, is_fsm=False): |
| 12 | """Summary | 13 | """Summary |
| 13 | 14 | ||
| 14 | Args: | 15 | Args: |
| ... | @@ -58,6 +59,10 @@ def predict(pdf_info, file_cls): | ... | @@ -58,6 +59,10 @@ def predict(pdf_info, file_cls): |
| 58 | pdf_info = dict() | 59 | pdf_info = dict() |
| 59 | for pno, page_info in enumerate(pdf_info_1): | 60 | for pno, page_info in enumerate(pdf_info_1): |
| 60 | pdf_info[str(pno)] = page_info | 61 | pdf_info[str(pno)] = page_info |
| 62 | |||
| 63 | if is_fsm: | ||
| 64 | f = FSMFinder(pdf_info) | ||
| 65 | else: | ||
| 61 | f = Finder(pdf_info) | 66 | f = Finder(pdf_info) |
| 62 | if file_cls == 0: | 67 | if file_cls == 0: |
| 63 | results = f.get_info() | 68 | results = f.get_info() | ... | ... |
src/common/fsm_econtract/const.py
0 → 100644
| 1 | WEP_FIELD = { | ||
| 2 | "0": { | ||
| 3 | 'keys': { | ||
| 4 | '客户姓名': [('客户姓名', (r'^姓名.?$', r'^企业名称.?$'), 'top1', {})], | ||
| 5 | '证件类型': [('证件类型', (r'^证件类型.?$', ), 'top1', {})], | ||
| 6 | '证件号码': [('证件号码', (r'^证件号码.?$', r'^统一社会信用代码.?$'), 'top1', {})], | ||
| 7 | '合同价格(小写)': [('人民币', (r'^人民币¥.?$', ), 'top1', {})], | ||
| 8 | '客户签名': [('客户签名/盖章', (r'^客户签名/盖章.*$', ), 'top1', {})], | ||
| 9 | '签单日期': [('签单日期', (r'^签单日期.*签单日期.?$', ), 'top1', {})], | ||
| 10 | }, | ||
| 11 | 'value': { | ||
| 12 | '客户姓名': ('text', 'right', {'offset_tuple': (-1.1, 1, 0.3, 0)}, ''), | ||
| 13 | '证件类型': ('text', 'right', {'offset_tuple': (-1, 1, 0, 0)}, ''), | ||
| 14 | '证件号码': ('text', 'right', {'offset_tuple': (-1, 2, 0.3, 0)}, ''), | ||
| 15 | '合同价格(小写)': ('text', 'right', {'offset_tuple': (-1, 1, 0.3, 0)}, ''), | ||
| 16 | '客户签名': ('img', 'under', {'offset_tuple': (0, 0, 0, 4), 'rigorous': True}, '无'), | ||
| 17 | '签单日期': ('img', 'right', {'offset_tuple': (0, 0, 1.1, 0), 'rigorous': True}, '无'), | ||
| 18 | }, | ||
| 19 | } | ||
| 20 | |||
| 21 | } | ||
| 22 | |||
| 23 | MSI_FIELD = { | ||
| 24 | "0": { | ||
| 25 | 'keys': { | ||
| 26 | '客户姓名': [('客户姓名', (r'^客户姓名.?$', r'^企业名称.?$'), 'top1', {})], | ||
| 27 | '证件类型': [('证件类型', (r'^证件类型.?$', ), 'top1', {})], | ||
| 28 | '证件号码': [('证件号码', (r'^证件号码.?$', r'^统一社会信用代码.?$'), 'top1', {})], | ||
| 29 | '合同价格(小写)': [('人民币', (r'^人民币¥.?$', ), 'top1', {})], | ||
| 30 | }, | ||
| 31 | 'value': { | ||
| 32 | '客户姓名': ('text', 'right', {'offset_tuple': (-1.2, 1, 0.3, 0)}, ''), | ||
| 33 | '证件类型': ('text', 'right', {'offset_tuple': (-1, 1, 0, 0)}, ''), | ||
| 34 | '证件号码': ('text', 'right', {'offset_tuple': (-1, 2, 0.3, 0)}, ''), | ||
| 35 | '合同价格(小写)': ('text', 'right', {'offset_tuple': (-1, 1, 0.3, 0)}, ''), | ||
| 36 | }, | ||
| 37 | }, | ||
| 38 | "1": { | ||
| 39 | 'keys': { | ||
| 40 | '客户签名': [('客户签名/盖章', (r'^客户签名/盖章.*$', ), 'top1', {})], | ||
| 41 | '签单日期': [('签单日期', (r'^签单日期.*签单日期.?$', ), 'top1', {})], | ||
| 42 | }, | ||
| 43 | 'value': { | ||
| 44 | '客户签名': ('img', 'under', {'offset_tuple': (0, 0, 0, 4), 'rigorous': True}, '无'), | ||
| 45 | '签单日期': ('img', 'right', {'offset_tuple': (0, 0, 1.1, 0), 'rigorous': True}, '无'), | ||
| 46 | }, | ||
| 47 | } | ||
| 48 | } | ||
| 49 | |||
| 50 | SC_FIELD = { | ||
| 51 | "0": { | ||
| 52 | 'keys': { | ||
| 53 | '姓名': [('姓名', (r'^姓名.?$', r'^企业名称.?$'), 'top1', {})], | ||
| 54 | '证件类型': [('证件类型', (r'^证件类型.?$', ), 'top1', {})], | ||
| 55 | '证件号码': [('证件号码', (r'^证件号码.?$', r'^统一社会信用代码.?$'), 'top1', {})], | ||
| 56 | '总价': [('总价', (r'^总价.?$', ), 'top1', {})], | ||
| 57 | }, | ||
| 58 | 'value': { | ||
| 59 | '姓名': ('text', 'right', {'offset_tuple': (-2, 8, 0.5, 0)}, ''), | ||
| 60 | '证件类型': ('text', 'right', {'offset_tuple': (-2, 6, 0.5, 0)}, ''), | ||
| 61 | '证件号码': ('text', 'right', {'offset_tuple': (-2, 6, 0.5, 0)}, ''), | ||
| 62 | '总价': ('text', 'right', {'offset_tuple': (-2, 12, 0.5, 0)}, ''), | ||
| 63 | }, | ||
| 64 | }, | ||
| 65 | "-1": { | ||
| 66 | 'keys': { | ||
| 67 | '客户签名': [('客户签名/盖章', (r'^客户签名/盖章.*$', r'^客户签名/盖章.*$'), 'top1', {})], | ||
| 68 | '签单日期': [('签单日期', (r'^签单日期.*签单日期.?$', ), 'top1', {})], | ||
| 69 | }, | ||
| 70 | 'value': { | ||
| 71 | '客户签名': ('img', 'under', {'offset_tuple': (1.5, 1, 0, 4), 'rigorous': True}, '无'), | ||
| 72 | '签单日期': ('img', 'right', {'offset_tuple': (0, 0, 1.1, 0), 'rigorous': True}, '无'), | ||
| 73 | }, | ||
| 74 | } | ||
| 75 | } |
src/common/fsm_econtract/fsm_contract_ocr.py
0 → 100644
| 1 | from .retriever import Retriever | ||
| 2 | from .const import WEP_FIELD, MSI_FIELD, SC_FIELD | ||
| 3 | from .tools import pdf_info_rebuild | ||
| 4 | |||
| 5 | retriever_list = [Retriever(WEP_FIELD), Retriever(MSI_FIELD), Retriever(SC_FIELD)] | ||
| 6 | |||
| 7 | def predict(pdf_info, file_type=0): | ||
| 8 | retriever = retriever_list[file_type] | ||
| 9 | pdf_text_list, pdf_img_list = pdf_info_rebuild(pdf_info) | ||
| 10 | return retriever.get_target_fields(pdf_text_list, pdf_img_list) | ||
| 11 | |||
| 12 |
src/common/fsm_econtract/hmh_ocr.py
0 → 100644
src/common/fsm_econtract/retriever.py
0 → 100644
| 1 | import re | ||
| 2 | |||
| 3 | |||
| 4 | class HMHRetriever: | ||
| 5 | |||
| 6 | def __init__(self): | ||
| 7 | self.words_str = 'words' | ||
| 8 | self.position_str = 'location' | ||
| 9 | self.fix_hava_str = '有' | ||
| 10 | self.default_position = [0, 0, 0, 0] | ||
| 11 | self.search_fields_list = [ | ||
| 12 | ('借款/承租人姓名', ''), | ||
| 13 | ('证件号码', ''), | ||
| 14 | ('渠道', ''), | ||
| 15 | ('合同编号', ''), | ||
| 16 | ('借款人签字/盖章', '无'), | ||
| 17 | ] | ||
| 18 | |||
| 19 | def get_target_fields(self, pdf_text_list): | ||
| 20 | result = dict() | ||
| 21 | is_find_name_id_company, is_find_application_no, is_find_name_date = False, False, False | ||
| 22 | for bbox, text in pdf_text_list.pop(str(0), []): | ||
| 23 | # print(text) | ||
| 24 | if not is_find_name_id_company: | ||
| 25 | name_id_company_list = re.findall(r'姓名(.*)证件号码(.*)与(.*公司)', text) | ||
| 26 | for name_id_company_tuple in name_id_company_list: | ||
| 27 | if len(name_id_company_tuple) == 3: | ||
| 28 | result[self.search_fields_list[0][0]] = { | ||
| 29 | self.words_str: name_id_company_tuple[0].replace('\u3000', '').strip(), | ||
| 30 | self.position_str: bbox | ||
| 31 | } | ||
| 32 | result[self.search_fields_list[1][0]] = { | ||
| 33 | self.words_str: name_id_company_tuple[1].replace('\u3000', '').replace(')', '').replace(')', '').strip(), | ||
| 34 | self.position_str: bbox | ||
| 35 | } | ||
| 36 | result[self.search_fields_list[2][0]] = { | ||
| 37 | self.words_str: name_id_company_tuple[2], | ||
| 38 | self.position_str: bbox | ||
| 39 | } | ||
| 40 | is_find_name_id_company = True | ||
| 41 | break | ||
| 42 | if not is_find_application_no: | ||
| 43 | application_no_list = re.findall(r'合同编号.*(CH-B\d*-\d*).*', text) | ||
| 44 | if len(application_no_list) == 1: | ||
| 45 | result[self.search_fields_list[3][0]] = { | ||
| 46 | self.words_str: application_no_list[0], | ||
| 47 | self.position_str: bbox | ||
| 48 | } | ||
| 49 | is_find_application_no = True | ||
| 50 | if not is_find_name_date: | ||
| 51 | name_date_list = re.findall(r'(.*).*签署日期.*(\d{4}-\d{2}-\d{2})', text) | ||
| 52 | for name_date_tuple in name_date_list: | ||
| 53 | if len(name_date_tuple) == 2: | ||
| 54 | result[self.search_fields_list[4][0]] = { | ||
| 55 | # self.words_str: '{0} {1}'.format(name_date_tuple[0].replace('\u3000', '').strip(), name_date_tuple[1]), | ||
| 56 | self.words_str: self.fix_hava_str, | ||
| 57 | self.position_str: bbox | ||
| 58 | } | ||
| 59 | is_find_name_date = True | ||
| 60 | break | ||
| 61 | |||
| 62 | for find_key, default_value in self.search_fields_list: | ||
| 63 | if find_key not in result: | ||
| 64 | result[find_key] = { | ||
| 65 | self.words_str: default_value, | ||
| 66 | self.position_str: self.default_position, | ||
| 67 | } | ||
| 68 | # simple_result = [] | ||
| 69 | # for key, value_dict in result.items(): | ||
| 70 | # simple_result.append((key, value_dict[self.words_str])) | ||
| 71 | |||
| 72 | # return simple_result | ||
| 73 | return {"words_result": result} | ||
| 74 | |||
| 75 | class Retriever: | ||
| 76 | |||
| 77 | def __init__(self, target_fields): | ||
| 78 | self.keys_str = 'keys' | ||
| 79 | self.value_str = 'value' | ||
| 80 | self.text_str = 'text' | ||
| 81 | self.words_str = 'words' | ||
| 82 | self.position_str = 'position' | ||
| 83 | self.default_position = [-1, -1, -1, -1] | ||
| 84 | self.target_fields = target_fields | ||
| 85 | self.replace_map = { | ||
| 86 | 'int': { | ||
| 87 | '(': '0' | ||
| 88 | } | ||
| 89 | } | ||
| 90 | |||
| 91 | @staticmethod | ||
| 92 | def key_top1(coordinates_list, key_coordinates): | ||
| 93 | # 关键词查找方向:最上面 | ||
| 94 | coordinates_list.sort(key=lambda x: x[1]) | ||
| 95 | return coordinates_list[0] | ||
| 96 | |||
| 97 | def key_right(self, coordinates_list, key_coordinates, offset_tuple, rigorous=False): | ||
| 98 | # 关键词查找方向:右侧 | ||
| 99 | if len(coordinates_list) == 1: | ||
| 100 | return coordinates_list[0] | ||
| 101 | |||
| 102 | # 没有上一层关键词的坐标时,返回最上面的坐标 | ||
| 103 | if key_coordinates is None: | ||
| 104 | return self.key_top1(coordinates_list, key_coordinates) | ||
| 105 | |||
| 106 | x_min, y_min, x_max, y_max = self.get_target_bbox(key_coordinates, offset_tuple) | ||
| 107 | |||
| 108 | x_min_find, find_key_coordinates = None, None | ||
| 109 | for x0, y0, x1, y1 in coordinates_list: | ||
| 110 | if rigorous: | ||
| 111 | is_eligible = x_min < x0 and x1 < x_max and y_min < y0 and y1 < y_max | ||
| 112 | else: | ||
| 113 | cent_x = x0 + ((x1 - x0) / 2) | ||
| 114 | cent_y = y0 + ((y1 - y0) / 2) | ||
| 115 | is_eligible = x_min < cent_x < x_max and y_min < cent_y < y_max | ||
| 116 | if is_eligible: | ||
| 117 | if x_min_find is None or x0 < x_min_find: | ||
| 118 | x_min_find = x0 | ||
| 119 | find_key_coordinates = (x0, y0, x1, y1) | ||
| 120 | |||
| 121 | if find_key_coordinates is None: | ||
| 122 | return self.key_top1(coordinates_list, key_coordinates) | ||
| 123 | else: | ||
| 124 | return find_key_coordinates | ||
| 125 | |||
| 126 | def value_right(self, search_list, key_coordinates, offset_tuple, value_type=None, rigorous=False): | ||
| 127 | # 字段值查找方向:右侧 | ||
| 128 | x_min, y_min, x_max, y_max = self.get_target_bbox(key_coordinates, offset_tuple) | ||
| 129 | |||
| 130 | x_min_find, value, coordinates = None, None, None | ||
| 131 | for (x0, y0, x1, y1), text in search_list: | ||
| 132 | if rigorous: | ||
| 133 | is_eligible = x_min < x0 and x1 < x_max and y_min < y0 and y1 < y_max | ||
| 134 | else: | ||
| 135 | cent_x = x0 + ((x1 - x0) / 2) | ||
| 136 | cent_y = y0 + ((y1 - y0) / 2) | ||
| 137 | is_eligible = x_min < cent_x < x_max and y_min < cent_y < y_max | ||
| 138 | if is_eligible: | ||
| 139 | if x_min_find is None or x0 < x_min_find: | ||
| 140 | if len(text.strip()) > 0: | ||
| 141 | x_min_find = x0 | ||
| 142 | value = text | ||
| 143 | coordinates = (x0, y0, x1, y1) | ||
| 144 | |||
| 145 | if isinstance(value_type, str) and value_type in self.replace_map and isinstance(value, str): | ||
| 146 | new_value = value.translate(str.maketrans(self.replace_map.get(value_type, {}))) | ||
| 147 | return new_value, coordinates | ||
| 148 | |||
| 149 | return value, coordinates | ||
| 150 | |||
| 151 | def value_under(self, search_list, key_coordinates, offset_tuple, value_type=None, append=False, rigorous=False): | ||
| 152 | # 字段值查找方向:下方 | ||
| 153 | x_min, y_min, x_max, y_max = self.get_target_bbox(key_coordinates, offset_tuple) | ||
| 154 | |||
| 155 | find_list = [] | ||
| 156 | for (x0, y0, x1, y1), text in search_list: | ||
| 157 | if rigorous: | ||
| 158 | is_eligible = x_min < x0 and x1 < x_max and y_min < y0 and y1 < y_max | ||
| 159 | else: | ||
| 160 | cent_x = x0 + ((x1 - x0) / 2) | ||
| 161 | cent_y = y0 + ((y1 - y0) / 2) | ||
| 162 | is_eligible = x_min < cent_x < x_max and y_min < cent_y < y_max | ||
| 163 | if is_eligible: | ||
| 164 | if len(text.strip()) > 0: | ||
| 165 | find_list.append((x0, y0, x1, y1, text)) | ||
| 166 | |||
| 167 | if len(find_list) == 0: | ||
| 168 | return None, None | ||
| 169 | else: | ||
| 170 | find_list.sort(key=lambda x: (x[1], x[0])) | ||
| 171 | coordinates = find_list[0][:-1] | ||
| 172 | if append: | ||
| 173 | value = ''.join([text for _, _, _, _, text in find_list]) | ||
| 174 | else: | ||
| 175 | value = find_list[0][-1] | ||
| 176 | |||
| 177 | if isinstance(value_type, str) and value_type in self.replace_map and isinstance(value, str): | ||
| 178 | new_value = value.translate(str.maketrans(self.replace_map.get(value_type, {}))) | ||
| 179 | return new_value, coordinates | ||
| 180 | |||
| 181 | return value, coordinates | ||
| 182 | |||
| 183 | @staticmethod | ||
| 184 | def get_target_bbox(key_coordinates, offset_tuple): | ||
| 185 | offset_xmin, offset_xmax, offset_ymin, offset_ymax = offset_tuple | ||
| 186 | |||
| 187 | width = key_coordinates[2] - key_coordinates[0] | ||
| 188 | height = key_coordinates[-1] - key_coordinates[1] | ||
| 189 | |||
| 190 | x_min = key_coordinates[0] - (width * offset_xmin) # -1 | ||
| 191 | x_max = key_coordinates[2] + (width * offset_xmax) | ||
| 192 | y_min = key_coordinates[1] - (height * offset_ymin) # -1 | ||
| 193 | y_max = key_coordinates[-1] + (height * offset_ymax) | ||
| 194 | return x_min, y_min, x_max, y_max | ||
| 195 | |||
| 196 | def get_target_fields(self, pdf_text_list, pdf_img_list): | ||
| 197 | pdf_result = dict() | ||
| 198 | |||
| 199 | for pno_str, fields_dict in self.target_fields.items(): | ||
| 200 | is_last_pno = False | ||
| 201 | if pno_str == '-1': | ||
| 202 | is_last_pno = True | ||
| 203 | pno_int_list = [int(pno_str) for pno_str in pdf_text_list.keys()] | ||
| 204 | pno_str = str(max(pno_int_list)) | ||
| 205 | |||
| 206 | # 搜索关键词 | ||
| 207 | key_text_info = dict() | ||
| 208 | for key_text_list in fields_dict[self.keys_str].values(): | ||
| 209 | for key_text, key_re_tuple, _, _ in key_text_list: | ||
| 210 | for (x0, y0, x1, y1), text in pdf_text_list.get(pno_str, []): | ||
| 211 | for key_re in key_re_tuple: | ||
| 212 | if re.match(key_re, text): | ||
| 213 | key_text_info.setdefault(key_text, list()).append((x0, y0, x1, y1)) | ||
| 214 | |||
| 215 | # 搜索关键词 | ||
| 216 | key_coordinates_info = dict() | ||
| 217 | for field, key_text_list in fields_dict[self.keys_str].items(): | ||
| 218 | last_key_coordinates = None | ||
| 219 | for key_text, _, direction, kwargs in key_text_list: | ||
| 220 | if key_text not in key_text_info: | ||
| 221 | last_key_coordinates = None | ||
| 222 | continue | ||
| 223 | last_key_coordinates = getattr(self, 'key_{0}'.format(direction))( | ||
| 224 | key_text_info[key_text], | ||
| 225 | last_key_coordinates, | ||
| 226 | **kwargs) | ||
| 227 | |||
| 228 | key_coordinates_info[field] = last_key_coordinates | ||
| 229 | |||
| 230 | # 搜索字段值 | ||
| 231 | page_result = dict() | ||
| 232 | for field, (source, direction, kwargs, default_value) in fields_dict[self.value_str].items(): | ||
| 233 | if not isinstance(key_coordinates_info.get(field), tuple): | ||
| 234 | page_result[field] = { | ||
| 235 | self.words_str: default_value, | ||
| 236 | self.position_str: self.default_position, | ||
| 237 | } | ||
| 238 | continue | ||
| 239 | value, coordinates = getattr(self, 'value_{0}'.format(direction))( | ||
| 240 | pdf_text_list.get(pno_str, []) if source == self.text_str else pdf_img_list.get(pno_str, []), | ||
| 241 | key_coordinates_info[field], | ||
| 242 | **kwargs | ||
| 243 | ) | ||
| 244 | if not isinstance(value, str): | ||
| 245 | page_result[field] = { | ||
| 246 | self.words_str: default_value, | ||
| 247 | self.position_str: self.default_position, | ||
| 248 | } | ||
| 249 | else: | ||
| 250 | page_result[field] = { | ||
| 251 | self.words_str: value, | ||
| 252 | self.position_str: list(coordinates), | ||
| 253 | } | ||
| 254 | |||
| 255 | page_key = 'page_12' if is_last_pno else 'page_{0}'.format(int(pno_str) + 1) | ||
| 256 | pdf_result[page_key] = page_result | ||
| 257 | |||
| 258 | return pdf_result |
src/common/fsm_econtract/tools.py
0 → 100644
| 1 | def pdf_info_rebuild(pdf_info, fix_bbox=True): | ||
| 2 | pdf_text_info = dict() | ||
| 3 | pdf_img_info = dict() | ||
| 4 | for pno_str, page_info in pdf_info.items(): | ||
| 5 | text_set = set() | ||
| 6 | for block in page_info['blocks']: | ||
| 7 | if block['type'] == 0: | ||
| 8 | # text有重复的现象 | ||
| 9 | text_set.clear() | ||
| 10 | for line in block['lines']: | ||
| 11 | for span in line['spans']: | ||
| 12 | bbox, text = span['bbox'], span['text'].strip() | ||
| 13 | if len(text) != 0 and text not in text_set: | ||
| 14 | text_set.add(text) | ||
| 15 | # bbox的高,不准 | ||
| 16 | if fix_bbox and bbox[-1] - bbox[1] < span['size']: | ||
| 17 | bbox[-1] = bbox[-1] + span['size'] | ||
| 18 | pdf_text_info.setdefault(pno_str, list()).append([bbox, text]) | ||
| 19 | elif block['type'] == 1: | ||
| 20 | pdf_img_info.setdefault(pno_str, list()).append((block['bbox'], '有')) | ||
| 21 | |||
| 22 | return pdf_text_info, pdf_img_info | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
src/common/tools/mssql_script24.py
0 → 100644
| 1 | import pyodbc | ||
| 2 | |||
| 3 | hil_sql = """ | ||
| 4 | ALTER TABLE hil_ocr_result ADD fsm_wep_ocr nvarchar(max); | ||
| 5 | ALTER TABLE hil_ocr_result ADD fsm_msi_ocr nvarchar(max); | ||
| 6 | ALTER TABLE hil_ocr_result ADD fsm_sc_ocr nvarchar(max); | ||
| 7 | ALTER TABLE hil_se_ocr_result ADD fsm_wep_ocr nvarchar(max); | ||
| 8 | ALTER TABLE hil_se_ocr_result ADD fsm_msi_ocr nvarchar(max); | ||
| 9 | ALTER TABLE hil_se_ocr_result ADD fsm_sc_ocr nvarchar(max); | ||
| 10 | """ | ||
| 11 | |||
| 12 | afc_sql = """ | ||
| 13 | ALTER TABLE afc_ocr_result ADD fsm_wep_ocr nvarchar(max); | ||
| 14 | ALTER TABLE afc_ocr_result ADD fsm_msi_ocr nvarchar(max); | ||
| 15 | ALTER TABLE afc_ocr_result ADD fsm_sc_ocr nvarchar(max); | ||
| 16 | ALTER TABLE afc_se_ocr_result ADD fsm_wep_ocr nvarchar(max); | ||
| 17 | ALTER TABLE afc_se_ocr_result ADD fsm_msi_ocr nvarchar(max); | ||
| 18 | ALTER TABLE afc_se_ocr_result ADD fsm_sc_ocr nvarchar(max); | ||
| 19 | """ | ||
| 20 | |||
| 21 | hil_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit=True) | ||
| 22 | |||
| 23 | hil_cursor = hil_cnxn.cursor() | ||
| 24 | hil_cursor.execute(hil_sql) | ||
| 25 | |||
| 26 | hil_cursor.close() | ||
| 27 | hil_cnxn.close() | ||
| 28 | |||
| 29 | afc_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit=True) | ||
| 30 | |||
| 31 | afc_cursor = afc_cnxn.cursor() | ||
| 32 | afc_cursor.execute(afc_sql) | ||
| 33 | |||
| 34 | afc_cursor.close() | ||
| 35 | afc_cnxn.close() |
| ... | @@ -8,13 +8,16 @@ from common.tools.comparison import cp | ... | @@ -8,13 +8,16 @@ from common.tools.comparison import cp |
| 8 | from common.mixins import LoggerMixin | 8 | from common.mixins import LoggerMixin |
| 9 | from rest_framework.permissions import IsAuthenticated | 9 | from rest_framework.permissions import IsAuthenticated |
| 10 | from apps.account.authentication import OAuth2AuthenticationWithUser | 10 | from apps.account.authentication import OAuth2AuthenticationWithUser |
| 11 | from apps.doc.models import NscInvoice | ||
| 12 | import json | ||
| 13 | from datetime import datetime | ||
| 11 | 14 | ||
| 12 | params = { | 15 | params = { |
| 13 | 'invoiceCode': fields.Str(required=True, validate=validate.Length(max=128)), | 16 | 'invoiceCode': fields.Str(required=True, validate=validate.Length(max=128)), |
| 14 | 'invoiceNumber': fields.Str(required=True, validate=validate.Length(max=64)), | 17 | 'invoiceNumber': fields.Str(required=True, validate=validate.Length(max=64)), |
| 15 | 'issueDate': CustomDate(required=True), | 18 | 'issueDate': CustomDate(required=True), |
| 16 | 'buyerName': fields.Str(required=True, validate=validate.Length(max=64)), | 19 | 'buyerName': fields.Str(required=True, validate=validate.Length(max=64)), |
| 17 | "buyerId": fields.Int(required=True), | 20 | "buyerId": fields.Str(required=True, validate=validate.Length(max=64)), |
| 18 | 'vin': fields.Str(required=True, validate=validate.Length(max=128)), | 21 | 'vin': fields.Str(required=True, validate=validate.Length(max=128)), |
| 19 | 'dealer': fields.Str(required=False, validate=validate.Length(max=64)), | 22 | 'dealer': fields.Str(required=False, validate=validate.Length(max=64)), |
| 20 | 'priceWithVat': CustomDecimal(required=True), | 23 | 'priceWithVat': CustomDecimal(required=True), |
| ... | @@ -29,7 +32,7 @@ input_args = { | ... | @@ -29,7 +32,7 @@ input_args = { |
| 29 | } | 32 | } |
| 30 | 33 | ||
| 31 | 34 | ||
| 32 | # poss 接口接收NSC 发票信息 | 35 | # pos 接口接收NSC 发票信息 |
| 33 | class NSCInvoiceView(GenericView): | 36 | class NSCInvoiceView(GenericView): |
| 34 | permission_classes = [IsAuthenticated] | 37 | permission_classes = [IsAuthenticated] |
| 35 | authentication_classes = [OAuth2AuthenticationWithUser] | 38 | authentication_classes = [OAuth2AuthenticationWithUser] |
| ... | @@ -50,6 +53,7 @@ class NSCInvoiceView(GenericView): | ... | @@ -50,6 +53,7 @@ class NSCInvoiceView(GenericView): |
| 50 | vat = content.get('vat', 0.0) | 53 | vat = content.get('vat', 0.0) |
| 51 | vat_rate = content.get('vatRate', 0.0) | 54 | vat_rate = content.get('vatRate', 0.0) |
| 52 | 55 | ||
| 56 | NscInvoice.objects.create(vin=vin, content=json.dumps(content), create_time=datetime.now()) | ||
| 53 | return response.ok() | 57 | return response.ok() |
| 54 | 58 | ||
| 55 | 59 | ||
| ... | @@ -90,11 +94,17 @@ class DeMortgageView(GenericView): | ... | @@ -90,11 +94,17 @@ class DeMortgageView(GenericView): |
| 90 | 'applicationName': application_name, | 94 | 'applicationName': application_name, |
| 91 | 'deMortgageDate': de_mortgage_date | 95 | 'deMortgageDate': de_mortgage_date |
| 92 | } | 96 | } |
| 93 | de_mortgage_info = {} | 97 | de_mortgage_info = {'customer_name':'','applicationName':'','deMortgageDate':''} |
| 94 | # 绿本必须分开ocr | 98 | # 绿本必须分开ocr |
| 95 | for file_obj in files: | 99 | for file_obj in files: |
| 96 | info = PosHandler.de_mortgage_ocr_process1(file_obj) | 100 | info = PosHandler.de_mortgage_ocr_process1(file_obj) |
| 97 | de_mortgage_info.update(info) | 101 | if info.get('customerName') is not '': |
| 102 | de_mortgage_info['customerName'] = info.get('customerName') | ||
| 103 | if info.get('applicationName') is not '': | ||
| 104 | de_mortgage_info['applicationName'] = info.get('applicationName') | ||
| 105 | if info.get('deMortgageDate') is not '': | ||
| 106 | de_mortgage_info['deMortgageDate'] = info.get('deMortgageDate') | ||
| 107 | #de_mortgage_info.update(info) | ||
| 98 | 108 | ||
| 99 | request_pass = True | 109 | request_pass = True |
| 100 | fields_result = [] | 110 | fields_result = [] | ... | ... |
-
Please register or sign in to post a comment