Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
573f28d7
authored
2022-11-09 16:10:06 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
add qrs ocr
1 parent
3690e26d
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
84 additions
and
5 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/models.py
src/common/electronic_afc_contract/afc_contract_ocr.py
src/common/tools/mssql_script23.py
src/apps/doc/consts.py
View file @
573f28d
...
...
@@ -1042,6 +1042,10 @@ JYPZ_FIELD_ORDER = (("type", "标题"),
CONTRACT_CN_NAME
=
'合同'
CONTRACT_CLASSIFY
=
41
# 合同-送达地址确认书
CONTRACT_QRS_CN_NAME
=
'送达地址确认书'
CONTRACT_QRS_CLASSIFY
=
49
# 合同编号: 每页
HIL_CONTRACT_1_CN_NAME
=
'售后回租合同'
...
...
@@ -1053,13 +1057,14 @@ HIL_CONTRACT_2_CLASSIFY = 44
HIL_CONTRACT_3_CN_NAME
=
'车辆处置协议'
HIL_CONTRACT_3_CLASSIFY
=
45
CONTRACT_SET
=
{
CONTRACT_CLASSIFY
,
HIL_CONTRACT_1_CLASSIFY
,
HIL_CONTRACT_2_CLASSIFY
,
HIL_CONTRACT_3_CLASSIFY
}
CONTRACT_SET
=
{
CONTRACT_
QRS_CLASSIFY
,
CONTRACT_
CLASSIFY
,
HIL_CONTRACT_1_CLASSIFY
,
HIL_CONTRACT_2_CLASSIFY
,
HIL_CONTRACT_3_CLASSIFY
}
CONTRACT_MAP
=
{
HIL_CONTRACT_1_CLASSIFY
:
HIL_CONTRACT_1_CN_NAME
,
HIL_CONTRACT_2_CLASSIFY
:
HIL_CONTRACT_2_CN_NAME
,
HIL_CONTRACT_3_CLASSIFY
:
HIL_CONTRACT_3_CN_NAME
,
CONTRACT_CLASSIFY
:
CONTRACT_CN_NAME
,
CONTRACT_QRS_CLASSIFY
:
CONTRACT_QRS_CN_NAME
,
}
# 保单
...
...
@@ -1203,6 +1208,7 @@ DDA_OCR_FIELD = 'bs_ocr'
HMH_OCR_FIELD
=
'hmh_ocr'
JYPZ_OCR_FIELD
=
'jypz_ocr'
HT_FIELD
=
'ht_ocr'
QRS_FIELD
=
'qrs_ocr'
BD_FIELD
=
'bd_ocr'
BS_FIELD
=
'bss_ocr'
HIL_CONTRACT_1_FIELD
=
'hil_contract_1_ocr'
...
...
@@ -1226,6 +1232,7 @@ RESULT_MAPPING = {
HMH_CLASSIFY
:
HMH_OCR_FIELD
,
JYPZ_CLASSIFY
:
JYPZ_OCR_FIELD
,
CONTRACT_CLASSIFY
:
HT_FIELD
,
CONTRACT_QRS_CLASSIFY
:
QRS_FIELD
,
INSURANCE_CLASSIFY
:
BD_FIELD
,
BS_CLASSIFY
:
BS_FIELD
,
HIL_CONTRACT_1_CLASSIFY
:
HIL_CONTRACT_1_FIELD
,
...
...
@@ -1479,6 +1486,10 @@ AFC_CON_FIELD_ORDER_LTGT = (
(
'标准利率'
,
'标准利率'
),
)
SE_AFC_CON_QRS_MAP
=
{
'合同编号'
:
(
1
,
'合同编号'
),
}
SE_AFC_CON_MAP
=
{
'合同编号-每页'
:
(
None
,
None
,
'合同编号'
,
None
),
'所购车辆价格-小写-重要条款'
:
(
1
,
1
,
'所购车辆价格'
,
None
),
...
...
@@ -2308,6 +2319,7 @@ FILE_NAME_PREFIX_MAP = {
ECONTRACT_KEYWORDS_MAP
=
{
AFC_PREFIX
:
[
(
'抵押贷款合同'
,
CONTRACT_CLASSIFY
),
(
'送达地址确认书'
,
CONTRACT_QRS_CLASSIFY
),
# ('电子签署-抵押登记豁免函', HMH_CLASSIFY, 0),
],
HIL_PREFIX
:
[
...
...
src/apps/doc/management/commands/ocr_process.py
View file @
573f28d
...
...
@@ -987,6 +987,15 @@ class Command(BaseCommand, LoggerMixin):
# }
# }
license_summary
[
classify
]
=
[
res
]
elif
classify
==
consts
.
CONTRACT_QRS_CLASSIFY
:
res
=
{}
for
key
,
(
pno
,
key1
)
in
consts
.
SE_AFC_CON_QRS_MAP
.
items
():
res
[
key
]
=
page_info_dict
.
get
(
str
(
pno
),
{})
.
get
(
key1
,
''
)
res
.
setdefault
(
consts
.
IMG_PATH_KEY
,
dict
())[
key
]
=
page_info_dict
.
get
(
str
(
pno
),
{})
.
get
(
consts
.
IMG_PATH_KEY
,
''
)
res
.
setdefault
(
consts
.
ALL_POSITION_KEY
,
dict
())[
key
]
=
page_info_dict
.
get
(
str
(
pno
),
{})
.
get
(
consts
.
ALL_POSITION_KEY
,
{})
.
get
(
key1
,
[])
license_summary
[
classify
]
=
[
res
]
else
:
res
=
{}
for
key
,
(
pno1
,
pno2
,
end_idx
,
key1
,
key2
)
in
consts
.
SE_HIL_CON_MAP
[
classify
]
.
items
():
...
...
@@ -1474,6 +1483,16 @@ class Command(BaseCommand, LoggerMixin):
'page_num'
:
page_num
,
'page_info'
:
page_info
}
elif
classify_1_str
==
str
(
consts
.
CONTRACT_QRS_CLASSIFY
):
ocr_result
=
afc_predict
(
pdf_handler
.
pdf_info
,
is_qrs
=
True
)
page_num
=
'page_1'
page_res
=
{
page_num
:
{
'classify'
:
int
(
classify_1_str
),
'page_num'
:
page_num
,
'page_info'
:
ocr_result
.
pop
(
page_num
,
{})
}
}
else
:
file_type_1
=
consts
.
HIL_CONTRACT_TYPE_MAP
.
get
(
classify_1_str
)
ocr_result_1
=
hil_predict
(
pdf_handler
.
pdf_info
,
file_type_1
)
...
...
src/apps/doc/models.py
View file @
573f28d
...
...
@@ -328,6 +328,7 @@ class AFCOCRResult(models.Model):
hil_contract_1_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"HIL合同1"
)
hil_contract_2_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"HIL合同2"
)
hil_contract_3_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"HIL合同3"
)
qrs_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"AFC合同确认书"
)
update_time
=
models
.
DateTimeField
(
auto_now
=
True
,
verbose_name
=
'修改时间'
)
create_time
=
models
.
DateTimeField
(
auto_now_add
=
True
,
verbose_name
=
'创建时间'
)
...
...
@@ -363,6 +364,7 @@ class HILOCRResult(models.Model):
hil_contract_1_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"HIL合同1"
)
hil_contract_2_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"HIL合同2"
)
hil_contract_3_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"HIL合同3"
)
qrs_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"AFC合同确认书"
)
update_time
=
models
.
DateTimeField
(
auto_now
=
True
,
verbose_name
=
'修改时间'
)
create_time
=
models
.
DateTimeField
(
auto_now_add
=
True
,
verbose_name
=
'创建时间'
)
...
...
@@ -397,6 +399,7 @@ class AFCSEOCRResult(models.Model):
hil_contract_1_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"HIL合同1"
)
hil_contract_2_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"HIL合同2"
)
hil_contract_3_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"HIL合同3"
)
qrs_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"AFC合同确认书"
)
update_time
=
models
.
DateTimeField
(
auto_now
=
True
,
verbose_name
=
'修改时间'
)
create_time
=
models
.
DateTimeField
(
auto_now_add
=
True
,
verbose_name
=
'创建时间'
)
...
...
@@ -432,6 +435,7 @@ class HILSEOCRResult(models.Model):
hil_contract_1_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"HIL合同1"
)
hil_contract_2_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"HIL合同2"
)
hil_contract_3_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"HIL合同3"
)
qrs_ocr
=
models
.
TextField
(
null
=
True
,
verbose_name
=
"AFC合同确认书"
)
update_time
=
models
.
DateTimeField
(
auto_now
=
True
,
verbose_name
=
'修改时间'
)
create_time
=
models
.
DateTimeField
(
auto_now_add
=
True
,
verbose_name
=
'创建时间'
)
...
...
src/common/electronic_afc_contract/afc_contract_ocr.py
View file @
573f28d
...
...
@@ -9,7 +9,21 @@ from .get_char import Finder
import
numpy
as
np
def
predict
(
pdf_info
):
def
extract_info
(
ocr_results
):
contract_no
=
{
"words"
:
None
,
"position"
:
None
}
for
bbox
,
text
in
ocr_results
.
get
(
'0'
,
{})
.
values
():
if
text
.
startswith
(
'CH-B'
):
contract_no
[
'words'
]
=
text
contract_no
[
'position'
]
=
[
bbox
[
0
],
bbox
[
1
],
bbox
[
2
],
bbox
[
-
1
]]
break
return
{
'page_1'
:
{
'合同编号'
:
contract_no
}}
def
predict
(
pdf_info
,
is_qrs
=
False
):
ocr_results
=
{}
for
pno
in
pdf_info
:
ocr_results
[
pno
]
=
{}
...
...
@@ -32,9 +46,12 @@ def predict(pdf_info):
keys
=
list
(
range
(
len
(
ocr_result
)))
ocr_result
=
dict
(
zip
(
keys
,
ocr_result
))
ocr_results
[
pno
]
=
ocr_result
# 输入是整个 PDF 中的信息
f
=
Finder
(
pdf_info
,
ocr_results
=
ocr_results
)
results
=
f
.
get_info
()
if
is_qrs
:
results
=
extract_info
(
ocr_results
)
else
:
# 输入是整个 PDF 中的信息
f
=
Finder
(
pdf_info
,
ocr_results
=
ocr_results
)
results
=
f
.
get_info
()
return
results
...
...
src/common/tools/mssql_script23.py
0 → 100644
View file @
573f28d
import
pyodbc
hil_sql
=
"""
ALTER TABLE hil_ocr_result ADD qrs_ocr nvarchar(max);
ALTER TABLE hil_se_ocr_result ADD qrs_ocr nvarchar(max);
"""
afc_sql
=
"""
ALTER TABLE afc_ocr_result ADD qrs_ocr nvarchar(max);
ALTER TABLE afc_se_ocr_result ADD qrs_ocr nvarchar(max);
"""
hil_cnxn
=
pyodbc
.
connect
(
'DRIVER={ODBC Driver 17 for SQL Server};'
,
autocommit
=
True
)
hil_cursor
=
hil_cnxn
.
cursor
()
hil_cursor
.
execute
(
hil_sql
)
hil_cursor
.
close
()
hil_cnxn
.
close
()
afc_cnxn
=
pyodbc
.
connect
(
'DRIVER={ODBC Driver 17 for SQL Server};'
,
autocommit
=
True
)
afc_cursor
=
afc_cnxn
.
cursor
()
afc_cursor
.
execute
(
afc_sql
)
afc_cursor
.
close
()
afc_cnxn
.
close
()
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment