e5f36af5 by 周伟奇

Merge branch 'feature/fsm-contract' into fix/report_ca

2 parents d7847808 dd49a253
......@@ -2360,17 +2360,24 @@ ECONTRACT_KEYWORDS_MAP = {
('抵押贷款合同', CONTRACT_CLASSIFY),
('送达地址确认书', CONTRACT_QRS_CLASSIFY),
('抵押登记豁免函', HMH_CLASSIFY),
('延长保修', FSM_CONTRACT_WEP_CLASSIFY),
('长悦保养', FSM_CONTRACT_MSI_CLASSIFY),
('汽车销售合同', FSM_CONTRACT_SC_CLASSIFY),
],
HIL_PREFIX: [
('售后回租合同', HIL_CONTRACT_1_CLASSIFY),
('租赁抵押合同', HIL_CONTRACT_2_CLASSIFY),
('车辆处置协议', HIL_CONTRACT_3_CLASSIFY),
('抵押登记豁免函', HMH_CLASSIFY),
('延长保修', FSM_CONTRACT_WEP_CLASSIFY),
('长悦保养', FSM_CONTRACT_MSI_CLASSIFY),
]
}
FSM_ECONTRACT_KEYWORDS_MAP = {
AFC_PREFIX: [
('延长保修条款与条件', FSM_CONTRACT_WEP_CLASSIFY),
('长悦保养套餐服务合同', FSM_CONTRACT_MSI_CLASSIFY),
('汽车销售合同', FSM_CONTRACT_SC_CLASSIFY),
],
HIL_PREFIX: [
('延长保修条款与条件', FSM_CONTRACT_WEP_CLASSIFY),
('长悦保养套餐服务合同', FSM_CONTRACT_MSI_CLASSIFY),
('汽车销售合同', FSM_CONTRACT_SC_CLASSIFY),
]
}
......
......@@ -608,7 +608,15 @@ class UploadDocView(GenericView, DocHandler):
if keyword in document_name:
classify_1 = classify_1_tmp
break
elif document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \
# FSM合同:WEP/MSI/SC
elif data_source == consts.DATA_SOURCE_LIST[0] and document_scheme == consts.DOC_SCHEME_LIST[0]:
for keyword, classify_1_tmp in consts.FSM_ECONTRACT_KEYWORDS_MAP.get(prefix):
if keyword in document_name:
classify_1 = classify_1_tmp
break
if document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \
or document_name.endswith('.RAR'):
is_zip = True
......
......@@ -21,8 +21,7 @@ class HMHRetriever:
for bbox, text in pdf_text_list.pop(str(0), []):
# print(text)
if not is_find_name_id_company:
# name_id_company_list = re.findall(r'借款人\(姓名(.*)证件号码(.*)与(.*公司)', text)
name_id_company_list = re.findall(r'承租人\(姓名(.*)证件号码(.*)与(.*公司)', text)
name_id_company_list = re.findall(r'姓名(.*)证件号码(.*)与(.*公司)', text)
for name_id_company_tuple in name_id_company_list:
if len(name_id_company_tuple) == 3:
result[self.search_fields_list[0][0]] = {
......@@ -30,26 +29,7 @@ class HMHRetriever:
self.position_str: bbox
}
result[self.search_fields_list[1][0]] = {
self.words_str: name_id_company_tuple[1].replace('\u3000', '').strip(),
self.position_str: bbox
}
result[self.search_fields_list[2][0]] = {
self.words_str: name_id_company_tuple[2],
self.position_str: bbox
}
is_find_name_id_company = True
break
if not is_find_name_id_company:
name_id_company_list = re.findall(r'借款人\(姓名(.*)证件号码(.*)与(.*公司)', text)
# name_id_company_list = re.findall(r'承租人\(姓名(.*)证件号码(.*)与(.*公司)', text)
for name_id_company_tuple in name_id_company_list:
if len(name_id_company_tuple) == 3:
result[self.search_fields_list[0][0]] = {
self.words_str: name_id_company_tuple[0].replace('\u3000', '').strip(),
self.position_str: bbox
}
result[self.search_fields_list[1][0]] = {
self.words_str: name_id_company_tuple[1].replace('\u3000', '').strip(),
self.words_str: name_id_company_tuple[1].replace('\u3000', '').replace(')', '').replace(')', '').strip(),
self.position_str: bbox
}
result[self.search_fields_list[2][0]] = {
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!