Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
f4fdd56f
authored
2021-10-07 15:46:48 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
add hil contract
1 parent
8f6f6de3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
34 additions
and
13 deletions
src/apps/doc/consts.py
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/ocr/wb.py
src/apps/doc/consts.py
View file @
f4fdd56
...
...
@@ -1008,6 +1008,24 @@ JYPZ_FIELD_ORDER = (("type", "标题"),
CONTRACT_CN_NAME
=
'合同'
CONTRACT_CLASSIFY
=
41
HIL_CONTRACT_1_CN_NAME
=
'售后回租合同'
HIL_CONTRACT_1_CLASSIFY
=
43
HIL_CONTRACT_2_CN_NAME
=
'车辆租赁抵押合同'
HIL_CONTRACT_2_CLASSIFY
=
44
HIL_CONTRACT_3_CN_NAME
=
'车辆处置协议'
HIL_CONTRACT_3_CLASSIFY
=
45
CONTRACT_SET
=
{
CONTRACT_CLASSIFY
,
HIL_CONTRACT_1_CLASSIFY
,
HIL_CONTRACT_2_CLASSIFY
,
HIL_CONTRACT_3_CLASSIFY
}
CONTRACT_MAP
=
{
HIL_CONTRACT_1_CLASSIFY
:
HIL_CONTRACT_1_CN_NAME
,
HIL_CONTRACT_2_CLASSIFY
:
HIL_CONTRACT_2_CN_NAME
,
HIL_CONTRACT_3_CLASSIFY
:
HIL_CONTRACT_3_CN_NAME
,
CONTRACT_CLASSIFY
:
CONTRACT_CN_NAME
,
}
# 保单
INSURANCE_CN_NAME
=
'保单'
INSURANCE_CLASSIFY
=
42
...
...
src/apps/doc/management/commands/ocr_process.py
View file @
f4fdd56
...
...
@@ -207,7 +207,7 @@ class Command(BaseCommand, LoggerMixin):
else
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_EMPTY
))
def
contract_process
(
self
,
ocr_data
,
contract_result
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
):
def
contract_process
(
self
,
classify
,
ocr_data
,
contract_result
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
):
contract_dict
=
ocr_data
.
get
(
'data'
)
if
not
contract_dict
or
contract_dict
.
get
(
'page_num'
)
is
None
or
contract_dict
.
get
(
'page_info'
)
is
None
:
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_SUCCESS_EMPTY
))
...
...
@@ -243,7 +243,8 @@ class Command(BaseCommand, LoggerMixin):
for
row_list
in
sub_value
[
text_key
]:
rebuild_page_info
.
append
(
row_list
)
contract_result
.
setdefault
(
page_num
,
[])
.
append
(
rebuild_page_info
)
# contract_result.setdefault(page_num, []).append(rebuild_page_info)
contract_result
.
setdefault
(
classify
,
dict
())
.
setdefault
(
page_num
,
[])
.
append
(
rebuild_page_info
)
def
license1_process
(
self
,
ocr_data
,
license_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
,
img_path
,
do_dda
,
dda_id_bc_mapping
):
...
...
@@ -745,7 +746,7 @@ class Command(BaseCommand, LoggerMixin):
text_list
=
[]
else
:
text_list
=
[]
img_queue
.
put
((
img_path
,
text_list
))
img_queue
.
put
((
business_type
,
img_path
,
text_list
))
# except EDMSException as e:
# try:
# doc.status = DocStatus.PROCESS_FAILED.value
...
...
@@ -790,7 +791,7 @@ class Command(BaseCommand, LoggerMixin):
def
img_2_ocr_1
(
self
,
img_queue
,
todo_count_dict
,
res_dict
,
finish_queue
,
lock
,
url
,
error_list
):
while
len
(
error_list
)
==
0
or
not
img_queue
.
empty
():
try
:
img_path
,
text_list
=
img_queue
.
get
(
block
=
False
)
channel
,
img_path
,
text_list
=
img_queue
.
get
(
block
=
False
)
except
Exception
as
e
:
# self.online_log.info('{0} [img_2_ocr_1] [queue empty]'.format(self.log_base))
time
.
sleep
(
self
.
sleep_time_img_get
)
...
...
@@ -806,7 +807,8 @@ class Command(BaseCommand, LoggerMixin):
# 获取解码后的base64值
file_data
=
base64_data
.
decode
()
json_data_1
=
{
"file"
:
file_data
"file"
:
file_data
,
"channel"
:
channel
,
}
if
len
(
text_list
)
>
0
:
json_data_1
[
'text_list'
]
=
text_list
...
...
@@ -984,9 +986,9 @@ class Command(BaseCommand, LoggerMixin):
res_list
.
append
((
pno
,
ino
,
part_idx
,
consts
.
RES_FAILED_2
))
self
.
online_log
.
warn
(
'{0} [ocr_2 failed] [img_path={1}]'
.
format
(
self
.
log_base
,
img_path
))
elif
classify
==
consts
.
CONTRACT_CLASSIFY
:
self
.
contract_process
(
ocr_data
,
contract_result
,
res_list
,
pno
,
i
no
,
part_idx
,
img_path
)
elif
classify
in
consts
.
CONTRACT_SET
:
self
.
contract_process
(
classify
,
ocr_data
,
contract_result
,
res_list
,
p
no
,
ino
,
part_idx
,
img_path
)
else
:
# 流水处理
bs_classify_set
.
add
(
classify
)
self
.
bs_process
(
wb
,
ocr_data
,
bs_summary
,
unknown_summary
,
classify
,
res_list
,
pno
,
ino
,
part_idx
)
...
...
@@ -1018,7 +1020,7 @@ class Command(BaseCommand, LoggerMixin):
self
.
online_log
.
info
(
'{0} [task={1}] [merged_bs_summary={2}] [license_summary={3}] [contract={4}] '
'[res_list={5}]'
.
format
(
self
.
log_base
,
task_str
,
merged_bs_summary
,
contract_result
,
license_summary
,
res_list
))
license_summary
,
contract_result
,
res_list
))
except
Exception
as
e
:
...
...
src/apps/doc/ocr/wb.py
View file @
f4fdd56
...
...
@@ -702,11 +702,12 @@ class BSWorkbook(Workbook):
if
field_str
is
not
None
:
count_list
.
append
((
field_str
,
count
))
def
contract_rebuild
(
self
,
contract_result
):
def
contract_rebuild
(
self
,
contract_result_dict
):
for
classify
,
contract_result
in
contract_result_dict
.
items
():
if
len
(
contract_result
)
==
0
:
return
ws
=
self
.
create_sheet
(
consts
.
CONTRACT_SHEET_NAME
)
for
i
in
range
(
15
):
continue
ws
=
self
.
create_sheet
(
consts
.
CONTRACT_MAP
.
get
(
classify
)
)
for
i
in
range
(
30
):
if
str
(
i
)
in
contract_result
:
page_num
=
str
(
i
)
info_list
=
contract_result
.
get
(
page_num
)
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment