Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
c25ced3c
authored
2020-11-11 22:03:44 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
fix bug
1 parent
97b0b2ed
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
9 deletions
src/apps/doc/management/commands/folder_ocr_process.py
src/apps/doc/management/commands/folder_ocr_process.py
View file @
c25ced3
...
...
@@ -56,13 +56,14 @@ class Command(BaseCommand, LoggerMixin):
return
img_name
,
1
,
1
@staticmethod
def
get_path
(
name
,
img_output_dir
,
wb_output_dir
):
def
get_path
(
name
,
img_output_dir
,
wb_output_dir
,
pdf_output_dir
):
time_stamp
=
int
(
time
.
time
())
new_name
=
'{0}_{1}'
.
format
(
time_stamp
,
name
)
img_save_path
=
os
.
path
.
join
(
img_output_dir
,
new_name
)
pdf_save_path
=
os
.
path
.
join
(
pdf_output_dir
,
new_name
)
excel_name
=
'{0}.xlsx'
.
format
(
os
.
path
.
splitext
(
new_name
)[
0
])
excel_path
=
os
.
path
.
join
(
wb_output_dir
,
excel_name
)
return
img_save_path
,
excel_path
return
img_save_path
,
excel_path
,
pdf_save_path
def
res_process
(
self
,
all_res
,
classify
,
excel_path
):
try
:
...
...
@@ -131,7 +132,7 @@ class Command(BaseCommand, LoggerMixin):
def
pdf_process
(
self
,
name
,
path
,
classify
,
img_output_dir
,
wb_output_dir
,
pdf_output_dir
):
if
os
.
path
.
exists
(
path
):
try
:
img_save_path
,
excel_path
=
self
.
get_path
(
name
,
img_output_dir
,
wb
_output_dir
)
img_save_path
,
excel_path
,
pdf_save_path
=
self
.
get_path
(
name
,
img_output_dir
,
wb_output_dir
,
pdf
_output_dir
)
self
.
cronjob_log
.
info
(
'{0} [pdf to img start] [path={1}]'
.
format
(
self
.
log_base
,
path
))
pdf_handler
=
PDFHandler
(
path
,
img_save_path
)
pdf_handler
.
extract_image
()
...
...
@@ -145,15 +146,14 @@ class Command(BaseCommand, LoggerMixin):
ocr_res
=
self
.
ocr_process
(
img_path
,
classify
)
all_res
[
img_path
]
=
ocr_res
self
.
res_process
(
all_res
,
classify
,
excel_path
)
shutil
.
move
(
pdf_save_path
,
pdf_output_dir
)
shutil
.
move
(
path
,
pdf_output_dir
)
def
img_process
(
self
,
name
,
path
,
classify
,
wb_output_dir
,
img_output_dir
):
def
img_process
(
self
,
name
,
path
,
classify
,
wb_output_dir
,
img_output_dir
,
pdf_output_dir
):
ocr_res
=
self
.
ocr_process
(
path
,
classify
)
all_res
=
{
path
:
ocr_res
}
try
:
img_save_path
,
excel_path
=
self
.
get_path
(
name
,
img_output_dir
,
wb
_output_dir
)
img_save_path
,
excel_path
,
_
=
self
.
get_path
(
name
,
img_output_dir
,
wb_output_dir
,
pdf
_output_dir
)
except
Exception
as
e
:
self
.
cronjob_log
.
error
(
'{0} [get path error] [path={1}] [error={2}]'
.
format
(
self
.
log_base
,
path
,
traceback
.
format_exc
()))
...
...
@@ -174,7 +174,7 @@ class Command(BaseCommand, LoggerMixin):
# 1. 从input dir获取pdf or image
list_dir
=
os
.
listdir
(
input_dir
)
if
not
list_dir
:
self
.
cronjob_log
.
error
(
'{0} [input dir empty] [input_dir={1}]'
.
format
(
self
.
log_base
,
input_dir
))
self
.
cronjob_log
.
info
(
'{0} [input dir empty] [input_dir={1}]'
.
format
(
self
.
log_base
,
input_dir
))
time
.
sleep
(
self
.
sleep_time
)
for
name
in
list_dir
:
path
=
os
.
path
.
join
(
input_dir
,
name
)
...
...
@@ -183,7 +183,7 @@ class Command(BaseCommand, LoggerMixin):
if
name
.
endswith
(
'.pdf'
):
self
.
pdf_process
(
name
,
path
,
classify
,
img_output_dir
,
wb_output_dir
,
pdf_output_dir
)
else
:
self
.
img_process
(
name
,
path
,
classify
,
wb_output_dir
,
img_output_dir
)
self
.
img_process
(
name
,
path
,
classify
,
wb_output_dir
,
img_output_dir
,
pdf_output_dir
)
self
.
cronjob_log
.
info
(
'{0} [file end] [path={1}]'
.
format
(
self
.
log_base
,
path
))
def
handle
(
self
,
*
args
,
**
kwargs
):
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment