Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
b7f5124c
authored
2023-09-14 10:37:04 +0800
by
冯轩
Browse Files
Options
Browse Files
Tag
Download
Plain Diff
Merge remote-tracking branch 'origin/feature/sc'
2 parents
1a54c4a0
9f1723cc
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
90 additions
and
5 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/models.py
src/apps/doc/views.py
src/common/fsm_econtract/const.py
src/common/fsm_econtract/fsm_contract_ocr.py
src/common/tools/mssql_script26.py
src/apps/doc/consts.py
View file @
b7f5124
...
...
@@ -1066,6 +1066,9 @@ FSM_CONTRACT_MSI_CLASSIFY = 52
FSM_CONTRACT_SC_CN_NAME
=
'汽车销售合同'
FSM_CONTRACT_SC_CLASSIFY
=
53
FSM_CONTRACT_SC2_CN_NAME
=
'汽车销售补充合同'
FSM_CONTRACT_SC2_CLASSIFY
=
54
CONTRACT_SET
=
{
CONTRACT_QRS_CLASSIFY
,
CONTRACT_CLASSIFY
,
...
...
@@ -1075,6 +1078,7 @@ CONTRACT_SET = {
FSM_CONTRACT_WEP_CLASSIFY
,
FSM_CONTRACT_MSI_CLASSIFY
,
FSM_CONTRACT_SC_CLASSIFY
,
FSM_CONTRACT_SC2_CLASSIFY
,
}
CONTRACT_MAP
=
{
...
...
@@ -1086,9 +1090,10 @@ CONTRACT_MAP = {
FSM_CONTRACT_WEP_CLASSIFY
:
FSM_CONTRACT_WEP_CN_NAME
,
FSM_CONTRACT_MSI_CLASSIFY
:
FSM_CONTRACT_MSI_CN_NAME
,
FSM_CONTRACT_SC_CLASSIFY
:
FSM_CONTRACT_SC_CN_NAME
,
FSM_CONTRACT_SC2_CLASSIFY
:
FSM_CONTRACT_SC2_CN_NAME
,
}
FSM_CONTRACT_CLASSIFY_SET
=
{
FSM_CONTRACT_WEP_CLASSIFY
,
FSM_CONTRACT_MSI_CLASSIFY
,
FSM_CONTRACT_SC_CLASSIFY
}
FSM_CONTRACT_CLASSIFY_SET
=
{
FSM_CONTRACT_WEP_CLASSIFY
,
FSM_CONTRACT_MSI_CLASSIFY
,
FSM_CONTRACT_SC_CLASSIFY
,
FSM_CONTRACT_SC2_CLASSIFY
}
# 保单
INSURANCE_CN_NAME
=
'保单'
...
...
@@ -1241,6 +1246,7 @@ HIL_CONTRACT_3_FIELD = 'hil_contract_3_ocr'
FSM_CONTRACT_WEP_FIELD
=
'fsm_wep_ocr'
FSM_CONTRACT_MSI_FIELD
=
'fsm_msi_ocr'
FSM_CONTRACT_SC_FIELD
=
'fsm_sc_ocr'
FSM_CONTRACT_SC2_FIELD
=
'fsm_sc2_ocr'
BS_CLASSIFY
=
10089
...
...
@@ -1270,6 +1276,7 @@ RESULT_MAPPING = {
FSM_CONTRACT_WEP_CLASSIFY
:
FSM_CONTRACT_WEP_FIELD
,
FSM_CONTRACT_MSI_CLASSIFY
:
FSM_CONTRACT_MSI_FIELD
,
FSM_CONTRACT_SC_CLASSIFY
:
FSM_CONTRACT_SC_FIELD
,
FSM_CONTRACT_SC2_CLASSIFY
:
FSM_CONTRACT_SC2_FIELD
,
}
CA_ADD_COMPARE_FIELDS
=
(
IC_OCR_FIELD
,
BL_OCR_FIELD
,
BS_FIELD
)
...
...
@@ -1677,10 +1684,20 @@ SE_FSM_SC_MAP = {
'签单日期'
:
(
12
,
'签单日期'
),
}
SE_FSM_SC2_MAP
=
{
'姓名'
:
(
1
,
'姓名'
),
'证件类型'
:
(
1
,
'证件类型'
),
'证件号码'
:
(
1
,
'证件号码'
),
'总价'
:
(
2
,
'总价'
),
'客户签名'
:
(
4
,
'客户签名'
),
'签单日期'
:
(
4
,
'签单日期'
),
}
SE_FSM_CON_MAP
=
{
FSM_CONTRACT_WEP_CLASSIFY
:
SE_FSM_WEP_MAP
,
FSM_CONTRACT_MSI_CLASSIFY
:
SE_FSM_MSI_MAP
,
FSM_CONTRACT_SC_CLASSIFY
:
SE_FSM_SC_MAP
,
FSM_CONTRACT_SC2_CLASSIFY
:
SE_FSM_SC2_MAP
,
}
SE_AFC_CON_QRS_FIELD
=
[
'合同编号'
]
...
...
@@ -2419,11 +2436,13 @@ FSM_ECONTRACT_KEYWORDS_MAP = {
AFC_PREFIX
:
[
(
'延长保修条款与条件'
,
FSM_CONTRACT_WEP_CLASSIFY
),
(
'长悦保养套餐服务合约'
,
FSM_CONTRACT_MSI_CLASSIFY
),
(
'汽车销售合同补充合同'
,
FSM_CONTRACT_SC2_CLASSIFY
),
(
'汽车销售合同'
,
FSM_CONTRACT_SC_CLASSIFY
),
],
HIL_PREFIX
:
[
(
'延长保修条款与条件'
,
FSM_CONTRACT_WEP_CLASSIFY
),
(
'长悦保养套餐服务合同'
,
FSM_CONTRACT_MSI_CLASSIFY
),
(
'汽车销售合同补充合同'
,
FSM_CONTRACT_SC2_CLASSIFY
),
(
'汽车销售合同'
,
FSM_CONTRACT_SC_CLASSIFY
),
]
}
...
...
@@ -2438,6 +2457,7 @@ FSM_CONTRACT_TYPE_MAP = {
str
(
FSM_CONTRACT_WEP_CLASSIFY
):
0
,
str
(
FSM_CONTRACT_MSI_CLASSIFY
):
1
,
str
(
FSM_CONTRACT_SC_CLASSIFY
):
2
,
str
(
FSM_CONTRACT_SC2_CLASSIFY
):
3
,
}
RESULT_MAP
=
{
...
...
src/apps/doc/management/commands/ocr_process.py
View file @
b7f5124
...
...
@@ -1531,7 +1531,7 @@ class Command(BaseCommand, LoggerMixin):
'page_num'
:
page_num
,
'page_info'
:
page_info
}
# FSM合同 WEP MSI SC
# FSM合同 WEP MSI SC
SC2
elif
classify_1_str
in
consts
.
FSM_CONTRACT_TYPE_MAP
:
file_type
=
consts
.
FSM_CONTRACT_TYPE_MAP
.
get
(
classify_1_str
)
ocr_result
=
fsm_predict
(
pdf_handler
.
pdf_info
,
file_type
)
...
...
src/apps/doc/models.py
View file @
b7f5124
...
...
@@ -332,6 +332,7 @@ class AFCOCRResult(models.Model):
fsm_wep_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"延长保修合同"
)
fsm_msi_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"长悦保养合同"
)
fsm_sc_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"汽车销售合同"
)
fsm_sc2_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"汽车销售合同补充合同"
)
fsm_activited
=
models
.
IntegerField
(
null
=
False
,
default
=
0
,
verbose_name
=
"fsm激活状态 1:激活"
)
...
...
@@ -374,6 +375,7 @@ class HILOCRResult(models.Model):
fsm_wep_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"延长保修合同"
)
fsm_msi_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"长悦保养合同"
)
fsm_sc_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"汽车销售合同"
)
fsm_sc2_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"汽车销售合同补充合同"
)
fsm_activited
=
models
.
IntegerField
(
null
=
False
,
default
=
0
,
verbose_name
=
"fsm激活状态 1:激活"
)
update_time
=
models
.
DateTimeField
(
auto_now
=
True
,
verbose_name
=
'修改时间'
)
...
...
@@ -414,6 +416,7 @@ class AFCSEOCRResult(models.Model):
fsm_wep_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"延长保修合同"
)
fsm_msi_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"长悦保养合同"
)
fsm_sc_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"汽车销售合同"
)
fsm_sc2_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"汽车销售合同补充合同"
)
fsm_activited
=
models
.
IntegerField
(
null
=
False
,
default
=
0
,
verbose_name
=
"fsm激活状态 1:激活"
)
update_time
=
models
.
DateTimeField
(
auto_now
=
True
,
verbose_name
=
'修改时间'
)
...
...
@@ -454,6 +457,7 @@ class HILSEOCRResult(models.Model):
fsm_wep_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"延长保修合同"
)
fsm_msi_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"长悦保养合同"
)
fsm_sc_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"汽车销售合同"
)
fsm_sc2_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"汽车销售合同补充合同"
)
fsm_activited
=
models
.
IntegerField
(
null
=
False
,
default
=
0
,
verbose_name
=
"fsm激活状态 1:激活"
)
update_time
=
models
.
DateTimeField
(
auto_now
=
True
,
verbose_name
=
'修改时间'
)
...
...
src/apps/doc/views.py
View file @
b7f5124
...
...
@@ -655,7 +655,7 @@ class UploadDocView(GenericView, DocHandler):
if
keyword
in
document_name
:
classify_1
=
classify_1_tmp
break
# FSM合同:WEP/MSI/SC
# FSM合同:WEP/MSI/SC
/SC2
elif
data_source
==
consts
.
DATA_SOURCE_LIST
[
0
]
and
document_scheme
==
consts
.
DOC_SCHEME_LIST
[
0
]:
for
keyword
,
classify_1_tmp
in
consts
.
FSM_ECONTRACT_KEYWORDS_MAP
.
get
(
prefix
):
if
keyword
in
document_name
:
...
...
src/common/fsm_econtract/const.py
View file @
b7f5124
...
...
@@ -73,3 +73,37 @@ SC_FIELD = {
},
}
}
# 汽车销售合同的补充协议
SC2_FIELD
=
{
"0"
:
{
'keys'
:
{
'姓名'
:
[(
'姓名'
,
(
r'^姓名.?$'
,
r'^企业名称.?$'
),
'top1'
,
{})],
'证件类型'
:
[(
'证件类型'
,
(
r'^证件类型.?$'
,
),
'top1'
,
{})],
'证件号码'
:
[(
'证件号码'
,
(
r'^证件号码.?$'
,
r'^统一社会信用代码.?$'
),
'top1'
,
{})],
},
'value'
:
{
'姓名'
:
(
'text'
,
'right'
,
{
'offset_tuple'
:
(
-
2
,
8
,
0.5
,
0
)},
''
),
'证件类型'
:
(
'text'
,
'right'
,
{
'offset_tuple'
:
(
-
2
,
6
,
0.5
,
0
)},
''
),
'证件号码'
:
(
'text'
,
'right'
,
{
'offset_tuple'
:
(
-
2
,
6
,
0.5
,
0
)},
''
),
},
},
"1"
:
{
'keys'
:
{
'总价'
:
[(
'总价'
,
(
r'^调整后.?$'
,
),
'top1'
,
{})],
},
'value'
:
{
'总价'
:
(
'text'
,
'under'
,
{
'offset_tuple'
:
(
1
,
1
,
-
1
,
2
)},
''
),
},
},
"3"
:
{
'keys'
:
{
'客户签名'
:
[(
'客户签名/盖章'
,
(
r'^客户签名/盖章.*$'
,
r'^客户签名/盖章.*$'
),
'top1'
,
{})],
'签单日期'
:
[(
'签单日期'
,
(
r'^签单日期.*签单日期.?$'
,
),
'top1'
,
{})],
},
'value'
:
{
'客户签名'
:
(
'img'
,
'under'
,
{
'offset_tuple'
:
(
1.5
,
1
,
0
,
4
),
'rigorous'
:
True
},
'无'
),
'签单日期'
:
(
'img'
,
'right'
,
{
'offset_tuple'
:
(
0
,
0
,
1.1
,
0
),
'rigorous'
:
True
},
'无'
),
},
}
}
...
...
src/common/fsm_econtract/fsm_contract_ocr.py
View file @
b7f5124
from
.retriever
import
Retriever
from
.const
import
WEP_FIELD
,
MSI_FIELD
,
SC_FIELD
from
.const
import
WEP_FIELD
,
MSI_FIELD
,
SC_FIELD
,
SC2_FIELD
from
.tools
import
pdf_info_rebuild
retriever_list
=
[
Retriever
(
WEP_FIELD
),
Retriever
(
MSI_FIELD
),
Retriever
(
SC_FIELD
)]
retriever_list
=
[
Retriever
(
WEP_FIELD
),
Retriever
(
MSI_FIELD
),
Retriever
(
SC_FIELD
)
,
Retriever
(
SC2_FIELD
)
]
def
predict
(
pdf_info
,
file_type
=
0
):
retriever
=
retriever_list
[
file_type
]
...
...
src/common/tools/mssql_script26.py
0 → 100644
View file @
b7f5124
import
pyodbc
hil_sql
=
"""
ALTER TABLE hil_ocr_result ADD fsm_sc2_ocr nvarchar(max);
ALTER TABLE hil_se_ocr_result ADD fsm_sc2_ocr nvarchar(max);
"""
afc_sql
=
"""
ALTER TABLE afc_ocr_result ADD fsm_sc2_ocr nvarchar(max);
ALTER TABLE afc_se_ocr_result ADD fsm_sc2_ocr nvarchar(max);
"""
hil_cnxn
=
pyodbc
.
connect
(
'DRIVER={ODBC Driver 17 for SQL Server};'
,
autocommit
=
True
)
hil_cursor
=
hil_cnxn
.
cursor
()
hil_cursor
.
execute
(
hil_sql
)
hil_cursor
.
close
()
hil_cnxn
.
close
()
afc_cnxn
=
pyodbc
.
connect
(
'DRIVER={ODBC Driver 17 for SQL Server};'
,
autocommit
=
True
)
afc_cursor
=
afc_cnxn
.
cursor
()
afc_cursor
.
execute
(
afc_sql
)
afc_cursor
.
close
()
afc_cnxn
.
close
()
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment