Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
36ca3f9a
authored
2022-01-26 18:33:04 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
fix hil e-contract
1 parent
560316ec
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
11 additions
and
26 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/views.py
src/apps/doc/consts.py
View file @
36ca3f9
...
...
@@ -2209,12 +2209,13 @@ FILE_NAME_PREFIX_MAP = {
ECONTRACT_KEYWORDS_MAP
=
{
AFC_PREFIX
:
[
(
'
电子签署-汽车抵押贷款合同'
,
CONTRACT_CLASSIFY
,
0
),
(
'
抵押贷款合同'
,
CONTRACT_CLASSIFY
),
# ('电子签署-抵押登记豁免函', HMH_CLASSIFY, 0),
],
HIL_PREFIX
:
[
(
'电子签署-售后回租合同'
,
HIL_CONTRACT_1_CLASSIFY
,
HIL_CONTRACT_3_CLASSIFY
),
(
'电子签署-汽车租赁抵押合同'
,
HIL_CONTRACT_2_CLASSIFY
,
0
),
(
'售后回租合同'
,
HIL_CONTRACT_1_CLASSIFY
),
(
'租赁抵押合同'
,
HIL_CONTRACT_2_CLASSIFY
),
(
'车辆处置协议'
,
HIL_CONTRACT_3_CLASSIFY
),
# ('电子签署-抵押登记豁免函', HMH_CLASSIFY, 0),
]
}
...
...
src/apps/doc/management/commands/ocr_process.py
View file @
36ca3f9
...
...
@@ -102,10 +102,10 @@ class Command(BaseCommand, LoggerMixin):
info_tuple
=
task_str
.
split
(
consts
.
SPLIT_STR
)
if
len
(
info_tuple
)
==
2
:
business_type
,
doc_id_str
=
info_tuple
classify_1_str
=
classify_2_str
=
'0'
classify_1_str
=
'0'
rebuild_task_str
=
task_str
else
:
business_type
,
doc_id_str
,
classify_1_str
,
classify_2_str
=
info_tuple
business_type
,
doc_id_str
,
classify_1_str
=
info_tuple
rebuild_task_str
=
'{0}{1}{2}'
.
format
(
business_type
,
consts
.
SPLIT_STR
,
doc_id_str
)
doc_id
=
int
(
doc_id_str
)
doc_class
=
HILDoc
if
business_type
==
consts
.
HIL_PREFIX
else
AFCDoc
...
...
@@ -130,7 +130,7 @@ class Command(BaseCommand, LoggerMixin):
else
:
self
.
online_log
.
info
(
'{0} [get_doc_info] [db save end] [task_str={1}] [is_priority={2}]'
.
format
(
self
.
log_base
,
task_str
,
is_priority
))
return
doc
,
business_type
,
rebuild_task_str
,
classify_1_str
,
classify_2_str
return
doc
,
business_type
,
rebuild_task_str
,
classify_1_str
# def pdf_download(self, doc, pdf_path):
# if not doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX):
...
...
@@ -1014,7 +1014,7 @@ class Command(BaseCommand, LoggerMixin):
while
self
.
switch
:
try
:
# 1. 从队列获取文件信息
doc
,
business_type
,
task_str
,
classify_1_str
,
classify_2_str
=
self
.
get_doc_info
()
doc
,
business_type
,
task_str
,
classify_1_str
=
self
.
get_doc_info
()
# 队列为空时的处理
if
doc
is
None
:
time
.
sleep
(
self
.
sleep_time_doc_get
)
...
...
@@ -1188,10 +1188,8 @@ class Command(BaseCommand, LoggerMixin):
'page_num'
:
page_num
,
'page_info'
:
page_info
}
else
:
file_type_1
=
consts
.
HIL_CONTRACT_TYPE_MAP
.
get
(
classify_1_str
)
file_type_2
=
consts
.
HIL_CONTRACT_TYPE_MAP
.
get
(
classify_2_str
)
ocr_result_1
=
hil_predict
(
pdf_handler
.
pdf_info
,
file_type_1
)
rebuild_res_1
=
{}
page_res
=
{}
...
...
@@ -1205,19 +1203,6 @@ class Command(BaseCommand, LoggerMixin):
'page_num'
:
page_num
,
'page_info'
:
page_info
}
if
isinstance
(
file_type_2
,
int
):
rebuild_res_2
=
{}
ocr_result_2
=
hil_predict
(
pdf_handler
.
pdf_info
,
file_type_2
)
for
field_name
,
field_info
in
ocr_result_2
.
items
():
page_num
=
field_info
.
pop
(
'page'
,
'page_1'
)
rebuild_res_2
.
setdefault
(
page_num
,
dict
())[
field_name
]
=
field_info
for
page_num
,
page_info
in
ocr_result_2
.
items
():
if
isinstance
(
page_num
,
str
)
and
page_num
.
startswith
(
'page_'
):
page_res
[
page_num
]
=
{
'classify'
:
int
(
classify_2_str
),
'page_num'
:
page_num
,
'page_info'
:
page_info
}
contract_res
=
{}
for
img_path_tmp
,
page_key
in
pdf_handler
.
img_path_pno_list
:
...
...
src/apps/doc/views.py
View file @
36ca3f9
...
...
@@ -586,14 +586,13 @@ class UploadDocView(GenericView, DocHandler):
# 3. 选择队列进入
is_priority
=
PriorityApplication
.
objects
.
filter
(
application_id
=
application_id
,
on_off
=
True
)
.
exists
()
classify_1
=
classify_2
=
0
classify_1
=
0
if
data_source
==
consts
.
DATA_SOURCE_LIST
[
-
1
]
and
document_scheme
==
consts
.
DOC_SCHEME_LIST
[
1
]:
for
keyword
,
classify_1_tmp
,
classify_2_tmp
in
consts
.
ECONTRACT_KEYWORDS_MAP
.
get
(
prefix
):
for
keyword
,
classify_1_tmp
in
consts
.
ECONTRACT_KEYWORDS_MAP
.
get
(
prefix
):
if
keyword
in
document_name
:
classify_1
=
classify_1_tmp
classify_2
=
classify_2_tmp
break
task
=
consts
.
SPLIT_STR
.
join
([
prefix
,
str
(
doc
.
id
),
str
(
classify_1
)
,
str
(
classify_2
)
])
task
=
consts
.
SPLIT_STR
.
join
([
prefix
,
str
(
doc
.
id
),
str
(
classify_1
)])
enqueue_res
=
rh
.
enqueue
([
task
],
is_priority
)
self
.
running_log
.
info
(
'[doc upload success] [args={0}] [business_type={1}] [doc_id={2}] '
'[is_priority={3}] [enqueue_res={4}]'
.
format
(
args
,
prefix
,
doc
.
id
,
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment