From d8cc323a8b0af2b9c9583c407449e6c6c16ec537 Mon Sep 17 00:00:00 2001 From: zhouweiqi <zhouweiqi@situdata.com> Date: Thu, 10 Aug 2023 18:47:29 +0800 Subject: [PATCH] add new fsm contract sc2 --- src/apps/doc/consts.py | 22 +++++++++++++++++++++- src/apps/doc/management/commands/ocr_process.py | 2 +- src/apps/doc/models.py | 4 ++++ src/apps/doc/views.py | 2 +- src/common/fsm_econtract/const.py | 34 ++++++++++++++++++++++++++++++++++ src/common/fsm_econtract/fsm_contract_ocr.py | 4 ++-- src/common/tools/mssql_script26.py | 27 +++++++++++++++++++++++++++ 7 files changed, 90 insertions(+), 5 deletions(-) create mode 100644 src/common/tools/mssql_script26.py diff --git a/src/apps/doc/consts.py b/src/apps/doc/consts.py index e7207ae..d03008a 100644 --- a/src/apps/doc/consts.py +++ b/src/apps/doc/consts.py @@ -1066,6 +1066,9 @@ FSM_CONTRACT_MSI_CLASSIFY = 52 FSM_CONTRACT_SC_CN_NAME = '汽车销售合同' FSM_CONTRACT_SC_CLASSIFY = 53 +FSM_CONTRACT_SC2_CN_NAME = '汽车销售补充合同' +FSM_CONTRACT_SC2_CLASSIFY = 54 + CONTRACT_SET = { CONTRACT_QRS_CLASSIFY, CONTRACT_CLASSIFY, @@ -1075,6 +1078,7 @@ CONTRACT_SET = { FSM_CONTRACT_WEP_CLASSIFY, FSM_CONTRACT_MSI_CLASSIFY, FSM_CONTRACT_SC_CLASSIFY, + FSM_CONTRACT_SC2_CLASSIFY, } CONTRACT_MAP = { @@ -1086,9 +1090,10 @@ CONTRACT_MAP = { FSM_CONTRACT_WEP_CLASSIFY: FSM_CONTRACT_WEP_CN_NAME, FSM_CONTRACT_MSI_CLASSIFY: FSM_CONTRACT_MSI_CN_NAME, FSM_CONTRACT_SC_CLASSIFY: FSM_CONTRACT_SC_CN_NAME, + FSM_CONTRACT_SC2_CLASSIFY: FSM_CONTRACT_SC2_CN_NAME, } -FSM_CONTRACT_CLASSIFY_SET = {FSM_CONTRACT_WEP_CLASSIFY, FSM_CONTRACT_MSI_CLASSIFY, FSM_CONTRACT_SC_CLASSIFY} +FSM_CONTRACT_CLASSIFY_SET = {FSM_CONTRACT_WEP_CLASSIFY, FSM_CONTRACT_MSI_CLASSIFY, FSM_CONTRACT_SC_CLASSIFY, FSM_CONTRACT_SC2_CLASSIFY} # 保单 INSURANCE_CN_NAME = '保单' @@ -1241,6 +1246,7 @@ HIL_CONTRACT_3_FIELD = 'hil_contract_3_ocr' FSM_CONTRACT_WEP_FIELD = 'fsm_wep_ocr' FSM_CONTRACT_MSI_FIELD = 'fsm_msi_ocr' FSM_CONTRACT_SC_FIELD = 'fsm_sc_ocr' +FSM_CONTRACT_SC2_FIELD = 'fsm_sc2_ocr' BS_CLASSIFY = 10089 @@ -1270,6 +1276,7 @@ RESULT_MAPPING = { FSM_CONTRACT_WEP_CLASSIFY: FSM_CONTRACT_WEP_FIELD, FSM_CONTRACT_MSI_CLASSIFY: FSM_CONTRACT_MSI_FIELD, FSM_CONTRACT_SC_CLASSIFY: FSM_CONTRACT_SC_FIELD, + FSM_CONTRACT_SC2_CLASSIFY: FSM_CONTRACT_SC2_FIELD, } CA_ADD_COMPARE_FIELDS = (IC_OCR_FIELD, BL_OCR_FIELD, BS_FIELD) @@ -1675,10 +1682,20 @@ SE_FSM_SC_MAP = { '签单日期': (12, '签单日期'), } +SE_FSM_SC2_MAP = { + '姓名': (1, '姓名'), + '证件类型': (1, '证件类型'), + '证件号码': (1, '证件号码'), + '总价': (2, '总价'), + '客户签名': (4, '客户签名'), + '签单日期': (4, '签单日期'), +} + SE_FSM_CON_MAP = { FSM_CONTRACT_WEP_CLASSIFY: SE_FSM_WEP_MAP, FSM_CONTRACT_MSI_CLASSIFY: SE_FSM_MSI_MAP, FSM_CONTRACT_SC_CLASSIFY: SE_FSM_SC_MAP, + FSM_CONTRACT_SC2_CLASSIFY: SE_FSM_SC2_MAP, } SE_AFC_CON_QRS_FIELD = ['合同编号'] @@ -2417,11 +2434,13 @@ FSM_ECONTRACT_KEYWORDS_MAP = { AFC_PREFIX: [ ('延长保修条款与条件', FSM_CONTRACT_WEP_CLASSIFY), ('长悦保养套餐服务合约', FSM_CONTRACT_MSI_CLASSIFY), + ('汽车销售合同补充合同', FSM_CONTRACT_SC2_CLASSIFY), ('汽车销售合同', FSM_CONTRACT_SC_CLASSIFY), ], HIL_PREFIX: [ ('延长保修条款与条件', FSM_CONTRACT_WEP_CLASSIFY), ('长悦保养套餐服务合同', FSM_CONTRACT_MSI_CLASSIFY), + ('汽车销售合同补充合同', FSM_CONTRACT_SC2_CLASSIFY), ('汽车销售合同', FSM_CONTRACT_SC_CLASSIFY), ] } @@ -2436,6 +2455,7 @@ FSM_CONTRACT_TYPE_MAP = { str(FSM_CONTRACT_WEP_CLASSIFY): 0, str(FSM_CONTRACT_MSI_CLASSIFY): 1, str(FSM_CONTRACT_SC_CLASSIFY): 2, + str(FSM_CONTRACT_SC2_CLASSIFY): 2, } RESULT_MAP = { diff --git a/src/apps/doc/management/commands/ocr_process.py b/src/apps/doc/management/commands/ocr_process.py index b7e129f..fb8449e 100644 --- a/src/apps/doc/management/commands/ocr_process.py +++ b/src/apps/doc/management/commands/ocr_process.py @@ -1531,7 +1531,7 @@ class Command(BaseCommand, LoggerMixin): 'page_num': page_num, 'page_info': page_info } - # FSM合同 WEP MSI SC + # FSM合同 WEP MSI SC SC2 elif classify_1_str in consts.FSM_CONTRACT_TYPE_MAP: file_type = consts.FSM_CONTRACT_TYPE_MAP.get(classify_1_str) ocr_result = fsm_predict(pdf_handler.pdf_info, file_type) diff --git a/src/apps/doc/models.py b/src/apps/doc/models.py index 36c6a8e..5e51520 100644 --- a/src/apps/doc/models.py +++ b/src/apps/doc/models.py @@ -332,6 +332,7 @@ class AFCOCRResult(models.Model): fsm_wep_ocr = models.TextField(null=True, verbose_name="延长保修合同") fsm_msi_ocr = models.TextField(null=True, verbose_name="长悦保养合同") fsm_sc_ocr = models.TextField(null=True, verbose_name="汽车销售合同") + fsm_sc2_ocr = models.TextField(null=True, verbose_name="汽车销售合同补充合同") fsm_activited = models.IntegerField(null=False, default=0, verbose_name="fsm激活状态 1:激活") @@ -374,6 +375,7 @@ class HILOCRResult(models.Model): fsm_wep_ocr = models.TextField(null=True, verbose_name="延长保修合同") fsm_msi_ocr = models.TextField(null=True, verbose_name="长悦保养合同") fsm_sc_ocr = models.TextField(null=True, verbose_name="汽车销售合同") + fsm_sc2_ocr = models.TextField(null=True, verbose_name="汽车销售合同补充合同") fsm_activited = models.IntegerField(null=False, default=0, verbose_name="fsm激活状态 1:激活") update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间') @@ -414,6 +416,7 @@ class AFCSEOCRResult(models.Model): fsm_wep_ocr = models.TextField(null=True, verbose_name="延长保修合同") fsm_msi_ocr = models.TextField(null=True, verbose_name="长悦保养合同") fsm_sc_ocr = models.TextField(null=True, verbose_name="汽车销售合同") + fsm_sc2_ocr = models.TextField(null=True, verbose_name="汽车销售合同补充合同") fsm_activited = models.IntegerField(null=False, default=0, verbose_name="fsm激活状态 1:激活") update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间') @@ -454,6 +457,7 @@ class HILSEOCRResult(models.Model): fsm_wep_ocr = models.TextField(null=True, verbose_name="延长保修合同") fsm_msi_ocr = models.TextField(null=True, verbose_name="长悦保养合同") fsm_sc_ocr = models.TextField(null=True, verbose_name="汽车销售合同") + fsm_sc2_ocr = models.TextField(null=True, verbose_name="汽车销售合同补充合同") fsm_activited = models.IntegerField(null=False, default=0, verbose_name="fsm激活状态 1:激活") update_time = models.DateTimeField(auto_now=True, verbose_name='修改时间') diff --git a/src/apps/doc/views.py b/src/apps/doc/views.py index e29b511..134f514 100644 --- a/src/apps/doc/views.py +++ b/src/apps/doc/views.py @@ -655,7 +655,7 @@ class UploadDocView(GenericView, DocHandler): if keyword in document_name: classify_1 = classify_1_tmp break - # FSM合同:WEP/MSI/SC + # FSM合同:WEP/MSI/SC/SC2 elif data_source == consts.DATA_SOURCE_LIST[0] and document_scheme == consts.DOC_SCHEME_LIST[0]: for keyword, classify_1_tmp in consts.FSM_ECONTRACT_KEYWORDS_MAP.get(prefix): if keyword in document_name: diff --git a/src/common/fsm_econtract/const.py b/src/common/fsm_econtract/const.py index b97e5f7..61d7a3d 100644 --- a/src/common/fsm_econtract/const.py +++ b/src/common/fsm_econtract/const.py @@ -73,3 +73,37 @@ SC_FIELD = { }, } } + +# 汽车销售合同的补充协议 +SC2_FIELD = { + "0": { + 'keys': { + '姓名': [('姓名', (r'^姓名.?$', r'^企业名称.?$'), 'top1', {})], + '证件类型': [('证件类型', (r'^证件类型.?$', ), 'top1', {})], + '证件号码': [('证件号码', (r'^证件号码.?$', r'^统一社会信用代码.?$'), 'top1', {})], + }, + 'value': { + '姓名': ('text', 'right', {'offset_tuple': (-2, 8, 0.5, 0)}, ''), + '证件类型': ('text', 'right', {'offset_tuple': (-2, 6, 0.5, 0)}, ''), + '证件号码': ('text', 'right', {'offset_tuple': (-2, 6, 0.5, 0)}, ''), + }, + }, + "1": { + 'keys': { + '总价': [('总价', (r'^调整后.?$', ), 'top1', {})], + }, + 'value': { + '总价': ('text', 'under', {'offset_tuple': (1, 1, -1, 2)}, ''), + }, + }, + "3": { + 'keys': { + '客户签名': [('客户签名/盖章', (r'^客户签名/盖章.*$', r'^客户签名/盖章.*$'), 'top1', {})], + '签单日期': [('签单日期', (r'^签单日期.*签单日期.?$', ), 'top1', {})], + }, + 'value': { + '客户签名': ('img', 'under', {'offset_tuple': (1.5, 1, 0, 4), 'rigorous': True}, '无'), + '签单日期': ('img', 'right', {'offset_tuple': (0, 0, 1.1, 0), 'rigorous': True}, '无'), + }, + } +} diff --git a/src/common/fsm_econtract/fsm_contract_ocr.py b/src/common/fsm_econtract/fsm_contract_ocr.py index d8880fe..fa07b7a 100644 --- a/src/common/fsm_econtract/fsm_contract_ocr.py +++ b/src/common/fsm_econtract/fsm_contract_ocr.py @@ -1,8 +1,8 @@ from .retriever import Retriever -from .const import WEP_FIELD, MSI_FIELD, SC_FIELD +from .const import WEP_FIELD, MSI_FIELD, SC_FIELD, SC2_FIELD from .tools import pdf_info_rebuild -retriever_list = [Retriever(WEP_FIELD), Retriever(MSI_FIELD), Retriever(SC_FIELD)] +retriever_list = [Retriever(WEP_FIELD), Retriever(MSI_FIELD), Retriever(SC_FIELD), Retriever(SC2_FIELD)] def predict(pdf_info, file_type=0): retriever = retriever_list[file_type] diff --git a/src/common/tools/mssql_script26.py b/src/common/tools/mssql_script26.py new file mode 100644 index 0000000..ba1b087 --- /dev/null +++ b/src/common/tools/mssql_script26.py @@ -0,0 +1,27 @@ +import pyodbc + +hil_sql = """ + ALTER TABLE hil_ocr_result ADD fsm_sc2_ocr nvarchar(max); + ALTER TABLE hil_se_ocr_result ADD fsm_sc2_ocr nvarchar(max); +""" + +afc_sql = """ + ALTER TABLE afc_ocr_result ADD fsm_sc2_ocr nvarchar(max); + ALTER TABLE afc_se_ocr_result ADD fsm_sc2_ocr nvarchar(max); +""" + +hil_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit=True) + +hil_cursor = hil_cnxn.cursor() +hil_cursor.execute(hil_sql) + +hil_cursor.close() +hil_cnxn.close() + +afc_cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};', autocommit=True) + +afc_cursor = afc_cnxn.cursor() +afc_cursor.execute(afc_sql) + +afc_cursor.close() +afc_cnxn.close() -- libgit2 0.24.0