Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
cf8331db
authored
2021-03-12 18:29:57 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
folder add tiff
1 parent
6cf3b86d
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
47 additions
and
0 deletions
src/apps/doc/management/commands/folder_ocr_process.py
src/apps/doc/management/commands/folder_ocr_process.py
View file @
cf8331d
...
...
@@ -6,6 +6,7 @@ import base64
import
signal
import
requests
import
traceback
from
PIL
import
Image
from
datetime
import
datetime
from
django.core.management
import
BaseCommand
from
multiprocessing
import
Process
...
...
@@ -18,6 +19,27 @@ from apps.doc.exceptions import OCR1Exception, OCR4Exception
from
apps.doc.ocr.wb
import
BSWorkbook
class
TIFFHandler
:
def
__init__
(
self
,
path
,
img_save_path
):
self
.
path
=
path
self
.
img_save_path
=
img_save_path
self
.
img_path_list
=
[]
def
extract_image
(
self
):
tiff
=
Image
.
open
(
self
.
path
)
tiff
.
load
()
for
i
in
range
(
tiff
.
n_frames
):
try
:
save_path
=
os
.
path
.
join
(
self
.
img_save_path
,
'page_{0}'
.
format
(
i
))
tiff
.
seek
(
i
)
tiff
.
save
(
save_path
)
self
.
img_path_list
.
append
(
save_path
)
except
EOFError
:
break
class
Command
(
BaseCommand
,
LoggerMixin
):
def
__init__
(
self
):
...
...
@@ -225,6 +247,7 @@ class Command(BaseCommand, LoggerMixin):
except
Exception
as
e
:
self
.
folder_log
.
error
(
'{0} [pdf to img error] [path={1}] [error={2}]'
.
format
(
self
.
log_base
,
path
,
traceback
.
format_exc
()))
raise
e
else
:
all_res
=
{}
for
img_path
in
pdf_handler
.
img_path_list
:
...
...
@@ -233,6 +256,26 @@ class Command(BaseCommand, LoggerMixin):
self
.
res_process
(
all_res
,
classify
,
excel_path
)
shutil
.
move
(
path
,
pdf_save_path
)
def
tif_process
(
self
,
name
,
path
,
classify
,
img_output_dir
,
wb_output_dir
,
tiff_output_dir
):
if
os
.
path
.
exists
(
path
):
try
:
img_save_path
,
excel_path
,
tiff_save_path
=
self
.
get_path
(
name
,
img_output_dir
,
wb_output_dir
,
tiff_output_dir
)
self
.
folder_log
.
info
(
'{0} [tiff to img start] [path={1}]'
.
format
(
self
.
log_base
,
path
))
tiff_handler
=
TIFFHandler
(
path
,
img_save_path
)
tiff_handler
.
extract_image
()
self
.
folder_log
.
info
(
'{0} [tiff to img end] [path={1}]'
.
format
(
self
.
log_base
,
path
))
except
Exception
as
e
:
self
.
folder_log
.
error
(
'{0} [tiff to img error] [path={1}] [error={2}]'
.
format
(
self
.
log_base
,
path
,
traceback
.
format_exc
()))
raise
e
else
:
all_res
=
{}
for
img_path
in
tiff_handler
.
img_path_list
:
ocr_res
=
self
.
ocr_process
(
img_path
,
classify
)
all_res
[
img_path
]
=
ocr_res
self
.
res_process
(
all_res
,
classify
,
excel_path
)
shutil
.
move
(
path
,
tiff_save_path
)
def
img_process
(
self
,
name
,
path
,
classify
,
wb_output_dir
,
img_output_dir
,
pdf_output_dir
):
ocr_res
=
self
.
ocr_process
(
path
,
classify
)
all_res
=
{
path
:
ocr_res
}
...
...
@@ -258,11 +301,13 @@ class Command(BaseCommand, LoggerMixin):
img_output_dir
=
os
.
path
.
join
(
output_dir
,
'image'
)
wb_output_dir
=
os
.
path
.
join
(
output_dir
,
'excel'
)
pdf_output_dir
=
os
.
path
.
join
(
output_dir
,
'pdf'
)
tiff_output_dir
=
os
.
path
.
join
(
output_dir
,
'tiff'
)
failed_output_dir
=
os
.
path
.
join
(
output_dir
,
'failed'
)
os
.
makedirs
(
output_dir
,
exist_ok
=
True
)
os
.
makedirs
(
img_output_dir
,
exist_ok
=
True
)
os
.
makedirs
(
wb_output_dir
,
exist_ok
=
True
)
os
.
makedirs
(
pdf_output_dir
,
exist_ok
=
True
)
os
.
makedirs
(
tiff_output_dir
,
exist_ok
=
True
)
os
.
makedirs
(
failed_output_dir
,
exist_ok
=
True
)
while
self
.
switch
:
if
not
os
.
path
.
isdir
(
input_dir
):
...
...
@@ -282,6 +327,8 @@ class Command(BaseCommand, LoggerMixin):
self
.
folder_log
.
info
(
'{0} [file start] [path={1}]'
.
format
(
self
.
log_base
,
path
))
if
name
.
endswith
(
'.pdf'
):
self
.
pdf_process
(
name
,
path
,
classify
,
img_output_dir
,
wb_output_dir
,
pdf_output_dir
)
elif
name
.
endswith
(
'.tif'
):
self
.
tif_process
(
name
,
path
,
classify
,
img_output_dir
,
wb_output_dir
,
tiff_output_dir
)
else
:
self
.
img_process
(
name
,
path
,
classify
,
wb_output_dir
,
img_output_dir
,
pdf_output_dir
)
self
.
folder_log
.
info
(
'{0} [file end] [path={1}]'
.
format
(
self
.
log_base
,
path
))
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment