573f28d7 by 周伟奇

add qrs ocr

1 parent 3690e26d
......@@ -1042,6 +1042,10 @@ JYPZ_FIELD_ORDER = (("type", "标题"),
CONTRACT_CN_NAME = '合同'
CONTRACT_CLASSIFY = 41
# 合同-送达地址确认书
CONTRACT_QRS_CN_NAME = '送达地址确认书'
CONTRACT_QRS_CLASSIFY = 49
# 合同编号: 每页
HIL_CONTRACT_1_CN_NAME = '售后回租合同'
......@@ -1053,13 +1057,14 @@ HIL_CONTRACT_2_CLASSIFY = 44
HIL_CONTRACT_3_CN_NAME = '车辆处置协议'
HIL_CONTRACT_3_CLASSIFY = 45
CONTRACT_SET = {CONTRACT_CLASSIFY, HIL_CONTRACT_1_CLASSIFY, HIL_CONTRACT_2_CLASSIFY, HIL_CONTRACT_3_CLASSIFY}
CONTRACT_SET = {CONTRACT_QRS_CLASSIFY, CONTRACT_CLASSIFY, HIL_CONTRACT_1_CLASSIFY, HIL_CONTRACT_2_CLASSIFY, HIL_CONTRACT_3_CLASSIFY}
CONTRACT_MAP = {
HIL_CONTRACT_1_CLASSIFY: HIL_CONTRACT_1_CN_NAME,
HIL_CONTRACT_2_CLASSIFY: HIL_CONTRACT_2_CN_NAME,
HIL_CONTRACT_3_CLASSIFY: HIL_CONTRACT_3_CN_NAME,
CONTRACT_CLASSIFY: CONTRACT_CN_NAME,
CONTRACT_QRS_CLASSIFY: CONTRACT_QRS_CN_NAME,
}
# 保单
......@@ -1203,6 +1208,7 @@ DDA_OCR_FIELD = 'bs_ocr'
HMH_OCR_FIELD = 'hmh_ocr'
JYPZ_OCR_FIELD = 'jypz_ocr'
HT_FIELD = 'ht_ocr'
QRS_FIELD = 'qrs_ocr'
BD_FIELD = 'bd_ocr'
BS_FIELD = 'bss_ocr'
HIL_CONTRACT_1_FIELD = 'hil_contract_1_ocr'
......@@ -1226,6 +1232,7 @@ RESULT_MAPPING = {
HMH_CLASSIFY: HMH_OCR_FIELD,
JYPZ_CLASSIFY: JYPZ_OCR_FIELD,
CONTRACT_CLASSIFY: HT_FIELD,
CONTRACT_QRS_CLASSIFY: QRS_FIELD,
INSURANCE_CLASSIFY: BD_FIELD,
BS_CLASSIFY: BS_FIELD,
HIL_CONTRACT_1_CLASSIFY: HIL_CONTRACT_1_FIELD,
......@@ -1479,6 +1486,10 @@ AFC_CON_FIELD_ORDER_LTGT = (
('标准利率', '标准利率'),
)
SE_AFC_CON_QRS_MAP = {
'合同编号': (1, '合同编号'),
}
SE_AFC_CON_MAP = {
'合同编号-每页': (None, None, '合同编号', None),
'所购车辆价格-小写-重要条款': (1, 1, '所购车辆价格', None),
......@@ -2308,6 +2319,7 @@ FILE_NAME_PREFIX_MAP = {
ECONTRACT_KEYWORDS_MAP = {
AFC_PREFIX: [
('抵押贷款合同', CONTRACT_CLASSIFY),
('送达地址确认书', CONTRACT_QRS_CLASSIFY),
# ('电子签署-抵押登记豁免函', HMH_CLASSIFY, 0),
],
HIL_PREFIX: [
......
......@@ -987,6 +987,15 @@ class Command(BaseCommand, LoggerMixin):
# }
# }
license_summary[classify] = [res]
elif classify == consts.CONTRACT_QRS_CLASSIFY:
res = {}
for key, (pno, key1) in consts.SE_AFC_CON_QRS_MAP.items():
res[key] = page_info_dict.get(str(pno), {}).get(key1, '')
res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get(
consts.IMG_PATH_KEY, '')
res.setdefault(consts.ALL_POSITION_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get(
consts.ALL_POSITION_KEY, {}).get(key1, [])
license_summary[classify] = [res]
else:
res = {}
for key, (pno1, pno2, end_idx, key1, key2) in consts.SE_HIL_CON_MAP[classify].items():
......@@ -1474,6 +1483,16 @@ class Command(BaseCommand, LoggerMixin):
'page_num': page_num,
'page_info': page_info
}
elif classify_1_str == str(consts.CONTRACT_QRS_CLASSIFY):
ocr_result = afc_predict(pdf_handler.pdf_info, is_qrs=True)
page_num = 'page_1'
page_res = {
page_num: {
'classify': int(classify_1_str),
'page_num': page_num,
'page_info': ocr_result.pop(page_num, {})
}
}
else:
file_type_1 = consts.HIL_CONTRACT_TYPE_MAP.get(classify_1_str)
ocr_result_1 = hil_predict(pdf_handler.pdf_info, file_type_1)
......
......@@ -328,6 +328,7 @@ class AFCOCRResult(models.Model):
hil_contract_1_ocr = models.TextField(null=True, verbose_name="HIL合同1")
hil_contract_2_ocr = models.TextField(null=True, verbose_name="HIL合同2")
hil_contract_3_ocr = models.TextField(null=True, verbose_name="HIL合同3")
qrs_ocr = models.TextField(null=True, verbose_name="AFC合同确认书")
update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间')
create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间')
......@@ -363,6 +364,7 @@ class HILOCRResult(models.Model):
hil_contract_1_ocr = models.TextField(null=True, verbose_name="HIL合同1")
hil_contract_2_ocr = models.TextField(null=True, verbose_name="HIL合同2")
hil_contract_3_ocr = models.TextField(null=True, verbose_name="HIL合同3")
qrs_ocr = models.TextField(null=True, verbose_name="AFC合同确认书")
update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间')
create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间')
......@@ -397,6 +399,7 @@ class AFCSEOCRResult(models.Model):
hil_contract_1_ocr = models.TextField(null=True, verbose_name="HIL合同1")
hil_contract_2_ocr = models.TextField(null=True, verbose_name="HIL合同2")
hil_contract_3_ocr = models.TextField(null=True, verbose_name="HIL合同3")
qrs_ocr = models.TextField(null=True, verbose_name="AFC合同确认书")
update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间')
create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间')
......@@ -432,6 +435,7 @@ class HILSEOCRResult(models.Model):
hil_contract_1_ocr = models.TextField(null=True, verbose_name="HIL合同1")
hil_contract_2_ocr = models.TextField(null=True, verbose_name="HIL合同2")
hil_contract_3_ocr = models.TextField(null=True, verbose_name="HIL合同3")
qrs_ocr = models.TextField(null=True, verbose_name="AFC合同确认书")
update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间')
create_time = models.DateTimeField(auto_now_add=True, verbose_name='创建时间')
......
......@@ -9,7 +9,21 @@ from .get_char import Finder
import numpy as np
def predict(pdf_info):
def extract_info(ocr_results):
contract_no = {
"words": None,
"position": None
}
for bbox, text in ocr_results.get('0', {}).values():
if text.startswith('CH-B'):
contract_no['words'] = text
contract_no['position'] = [bbox[0], bbox[1], bbox[2], bbox[-1]]
break
return {'page_1': {'合同编号': contract_no}}
def predict(pdf_info, is_qrs=False):
ocr_results = {}
for pno in pdf_info:
ocr_results[pno] = {}
......@@ -32,9 +46,12 @@ def predict(pdf_info):
keys = list(range(len(ocr_result)))
ocr_result = dict(zip(keys, ocr_result))
ocr_results[pno] = ocr_result
# 输入是整个 PDF 中的信息
f = Finder(pdf_info, ocr_results=ocr_results)
results = f.get_info()
if is_qrs:
results = extract_info(ocr_results)
else:
# 输入是整个 PDF 中的信息
f = Finder(pdf_info, ocr_results=ocr_results)
results = f.get_info()
return results
......
import pyodbc
hil_sql = """
ALTER TABLE hil_ocr_result ADD qrs_ocr nvarchar(max);
ALTER TABLE hil_se_ocr_result ADD qrs_ocr nvarchar(max);
"""
afc_sql = """
ALTER TABLE afc_ocr_result ADD qrs_ocr nvarchar(max);
ALTER TABLE afc_se_ocr_result ADD qrs_ocr nvarchar(max);
"""
hil_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit=True)
hil_cursor = hil_cnxn.cursor()
hil_cursor.execute(hil_sql)
hil_cursor.close()
hil_cnxn.close()
afc_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit=True)
afc_cursor = afc_cnxn.cursor()
afc_cursor.execute(afc_sql)
afc_cursor.close()
afc_cnxn.close()
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!