Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
4076848e
authored
2020-11-11 10:51:49 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
fix file remove
1 parent
e1670d0f
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
26 additions
and
6 deletions
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/management/commands/ocr_process.py
View file @
4076848
import
os
import
time
import
json
import
shutil
import
base64
import
signal
import
asyncio
...
...
@@ -591,21 +592,40 @@ class Command(BaseCommand, LoggerMixin):
doc
,
business_type
=
self
.
get_doc_object
(
task_str
)
doc_data_path
=
os
.
path
.
join
(
self
.
data_dir
,
business_type
,
consts
.
TMP_DIR_NAME
,
str
(
doc
.
id
))
excel_path
=
os
.
path
.
join
(
doc_data_path
,
'{0}.xlsx'
.
format
(
doc
.
id
))
img_save_path
=
os
.
path
.
join
(
doc_data_path
,
'img
'
)
#
wb.save(src_excel_path)
src_excel_path
=
os
.
path
.
join
(
doc_data_path
,
'src.xlsx
'
)
wb
.
save
(
src_excel_path
)
count_list
=
wb
.
rebuild
(
merged_bs_summary
,
license_summary
,
res_list
,
doc
.
document_scheme
)
wb
.
save
(
excel_path
)
except
Exception
as
e
:
with
lock
:
if
task_str
in
res_dict
:
del
res_dict
[
task_str
]
doc
,
_
=
self
.
get_doc_object
(
task_str
)
doc
,
business_type
=
self
.
get_doc_object
(
task_str
)
doc
.
status
=
DocStatus
.
PROCESS_FAILED
.
value
doc
.
save
()
self
.
cronjob_log
.
error
(
'{0} [process failed (res to wb)] [task={1}] [error={2}]'
.
format
(
self
.
log_base
,
task_str
,
traceback
.
format_exc
()))
try
:
doc_data_path
=
os
.
path
.
join
(
self
.
data_dir
,
business_type
,
consts
.
TMP_DIR_NAME
,
str
(
doc
.
id
))
img_save_path
=
os
.
path
.
join
(
doc_data_path
,
'img'
)
shutil
.
rmtree
(
img_save_path
,
ignore_errors
=
True
)
pdf_path
=
os
.
path
.
join
(
doc_data_path
,
'{0}.pdf'
.
format
(
doc
.
id
))
os
.
remove
(
pdf_path
)
except
Exception
as
e
:
self
.
cronjob_log
.
error
(
'{0} [file remove failed] [task={1}] [error={2}]'
.
format
(
self
.
log_base
,
task_str
,
traceback
.
format_exc
()))
else
:
try
:
img_save_path
=
os
.
path
.
join
(
doc_data_path
,
'img'
)
write_zip_file
(
img_save_path
,
os
.
path
.
join
(
doc_data_path
,
'{0}_img.zip'
.
format
(
doc
.
id
)))
shutil
.
rmtree
(
img_save_path
,
ignore_errors
=
True
)
# pdf_path = os.path.join(doc_data_path, '{0}.pdf'.format(doc.id))
# os.remove(pdf_path)
# os.remove(src_excel_path)
except
Exception
as
e
:
self
.
cronjob_log
.
error
(
'{0} [file remove failed] [task={1}] [error={2}]'
.
format
(
self
.
log_base
,
task_str
,
traceback
.
format_exc
()))
try
:
# 5.上传至EDMS
for
times
in
range
(
consts
.
RETRY_TIMES
):
try
:
...
...
@@ -629,8 +649,6 @@ class Command(BaseCommand, LoggerMixin):
doc
.
save
()
self
.
cronjob_log
.
error
(
'{0} [process failed (edms upload)] [task={1}] [error={2}]'
.
format
(
self
.
log_base
,
task_str
,
traceback
.
format_exc
()))
write_zip_file
(
img_save_path
,
os
.
path
.
join
(
doc_data_path
,
'{0}_img.zip'
.
format
(
doc
.
id
)))
else
:
doc
.
status
=
DocStatus
.
COMPLETE
.
value
doc
.
end_time
=
timezone
.
now
()
...
...
@@ -640,7 +658,9 @@ class Command(BaseCommand, LoggerMixin):
setattr
(
doc
,
field
,
count
)
doc
.
save
()
self
.
cronjob_log
.
info
(
'{0} [process complete] [task={1}]'
.
format
(
self
.
log_base
,
task_str
))
write_zip_file
(
img_save_path
,
os
.
path
.
join
(
doc_data_path
,
'{0}_img.zip'
.
format
(
doc
.
id
)))
# os.remove(excel_path)
# TODO 细化文件状态,不同异常状态,归还队列,重试时采取不同的处理
# TODO 异常邮件通知
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment