Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
550d86c1
authored
2020-11-12 22:23:52 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
fix bug
1 parent
6934c592
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
22 additions
and
5 deletions
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/management/commands/ocr_process.py
View file @
550d86c
...
...
@@ -71,6 +71,7 @@ class Command(BaseCommand, LoggerMixin):
self
.
cronjob_log
.
info
(
'{0} [get_doc_info] [queue empty]'
.
format
(
self
.
log_base
))
return
None
,
None
,
None
self
.
cronjob_log
.
info
(
'{0} [get_doc_info success] [task={1}] [is_priority={2}]'
.
format
(
self
.
log_base
,
task_str
,
is_priority
))
doc
,
business_type
=
self
.
get_doc_object
(
task_str
)
if
doc
is
None
:
...
...
@@ -421,6 +422,7 @@ class Command(BaseCommand, LoggerMixin):
time
.
sleep
(
self
.
sleep_time_img_get
)
continue
else
:
try
:
self
.
cronjob_log
.
info
(
'{0} [img_2_ocr_1] [get img] [img_path={1}]'
.
format
(
self
.
log_base
,
img_path
))
for
times
in
range
(
consts
.
RETRY_TIMES
):
...
...
@@ -451,6 +453,9 @@ class Command(BaseCommand, LoggerMixin):
ocr_1_res
=
{}
self
.
cronjob_log
.
warn
(
'{0} [ocr_1 failed] [img_path={1}]'
.
format
(
self
.
log_base
,
img_path
))
# continue
except
Exception
as
e
:
self
.
cronjob_log
.
error
(
'{0} [process error (ocr fetch)] [img_path={1}] [error={2}]'
.
format
(
self
.
log_base
,
img_path
,
traceback
.
format_exc
()))
try
:
del
json_data_1
...
...
@@ -470,7 +475,7 @@ class Command(BaseCommand, LoggerMixin):
else
:
todo_count_dict
[
task_str
]
=
todo_count
-
1
except
Exception
as
e
:
self
.
cronjob_log
.
error
(
'{0} [process
failed
(store ocr res)] [img_path={1}] [error={2}]'
.
format
(
self
.
cronjob_log
.
error
(
'{0} [process
error
(store ocr res)] [img_path={1}] [error={2}]'
.
format
(
self
.
log_base
,
img_path
,
traceback
.
format_exc
()))
def
res_2_wb
(
self
,
res_dict
,
finish_queue
,
lock
):
...
...
@@ -482,12 +487,12 @@ class Command(BaseCommand, LoggerMixin):
time
.
sleep
(
self
.
sleep_time_task_get
)
continue
else
:
try
:
self
.
cronjob_log
.
info
(
'{0} [res_2_wb] [get task] [task={1}]'
.
format
(
self
.
log_base
,
task_str
))
ocr_1_res
=
res_dict
.
get
(
task_str
,
{})
# self.cronjob_log.info('{0} [res_2_wb] [get task res] [task={1}]'.format(
# self.log_base, task_str))
try
:
# 4.OCR结果并且构建excel文件
bs_summary
=
{}
license_summary
=
{}
...
...
@@ -605,6 +610,7 @@ class Command(BaseCommand, LoggerMixin):
count_list
=
wb
.
rebuild
(
merged_bs_summary
,
license_summary
,
res_list
,
doc
.
document_scheme
)
wb
.
save
(
excel_path
)
except
Exception
as
e
:
try
:
with
lock
:
if
task_str
in
res_dict
:
del
res_dict
[
task_str
]
...
...
@@ -613,6 +619,10 @@ class Command(BaseCommand, LoggerMixin):
doc
.
save
()
self
.
cronjob_log
.
error
(
'{0} [process failed (res to wb)] [task={1}] [error={2}]'
.
format
(
self
.
log_base
,
task_str
,
traceback
.
format_exc
()))
except
Exception
as
e
:
self
.
cronjob_log
.
error
(
'{0} [process error (wb end)] [task={1}] [error={2}]'
.
format
(
self
.
log_base
,
task_str
,
traceback
.
format_exc
()))
try
:
doc_data_path
=
os
.
path
.
join
(
self
.
data_dir
,
business_type
,
consts
.
TMP_DIR_NAME
,
str
(
doc
.
id
))
img_save_path
=
os
.
path
.
join
(
doc_data_path
,
'img'
)
...
...
@@ -620,7 +630,7 @@ class Command(BaseCommand, LoggerMixin):
pdf_path
=
os
.
path
.
join
(
doc_data_path
,
'{0}.pdf'
.
format
(
doc
.
id
))
os
.
remove
(
pdf_path
)
except
Exception
as
e
:
self
.
cronjob_log
.
error
(
'{0} [
file remove failed
] [task={1}] [error={2}]'
.
format
(
self
.
cronjob_log
.
error
(
'{0} [
process error (file remove 1)
] [task={1}] [error={2}]'
.
format
(
self
.
log_base
,
task_str
,
traceback
.
format_exc
()))
else
:
try
:
...
...
@@ -631,7 +641,7 @@ class Command(BaseCommand, LoggerMixin):
# os.remove(pdf_path)
# os.remove(src_excel_path)
except
Exception
as
e
:
self
.
cronjob_log
.
error
(
'{0} [
file remove failed
] [task={1}] [error={2}]'
.
format
(
self
.
cronjob_log
.
error
(
'{0} [
process error (file remove 2)
] [task={1}] [error={2}]'
.
format
(
self
.
log_base
,
task_str
,
traceback
.
format_exc
()))
try
:
# 5.上传至EDMS
...
...
@@ -648,6 +658,7 @@ class Command(BaseCommand, LoggerMixin):
else
:
raise
EDMSException
(
edms_exc
)
except
Exception
as
e
:
try
:
doc
.
status
=
DocStatus
.
UPLOAD_FAILED
.
value
doc
.
end_time
=
timezone
.
now
()
doc
.
duration
=
min
((
doc
.
end_time
-
doc
.
start_time
)
.
seconds
,
32760
)
...
...
@@ -657,7 +668,11 @@ class Command(BaseCommand, LoggerMixin):
doc
.
save
()
self
.
cronjob_log
.
error
(
'{0} [process failed (edms upload)] [task={1}] [error={2}]'
.
format
(
self
.
log_base
,
task_str
,
traceback
.
format_exc
()))
except
Exception
as
e
:
self
.
cronjob_log
.
error
(
'{0} [process error (edms upload)] [task={1}] [error={2}]'
.
format
(
self
.
log_base
,
task_str
,
traceback
.
format_exc
()))
else
:
try
:
doc
.
status
=
DocStatus
.
COMPLETE
.
value
doc
.
end_time
=
timezone
.
now
()
doc
.
duration
=
min
((
doc
.
end_time
-
doc
.
start_time
)
.
seconds
,
32760
)
...
...
@@ -667,7 +682,9 @@ class Command(BaseCommand, LoggerMixin):
doc
.
save
()
self
.
cronjob_log
.
info
(
'{0} [process complete] [task={1}]'
.
format
(
self
.
log_base
,
task_str
))
# os.remove(excel_path)
except
Exception
as
e
:
self
.
cronjob_log
.
error
(
'{0} [process error (completed)] [task={1}] [error={2}]'
.
format
(
self
.
log_base
,
task_str
,
traceback
.
format_exc
()))
# TODO 细化文件状态,不同异常状态,归还队列,重试时采取不同的处理
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment