Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
a02a957e
authored
2020-06-23 15:31:28 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
ocr process
1 parent
f8904dcb
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
29 additions
and
4 deletions
requirements/base.txt
src/apps/doc/management/commands/doc_process.py
src/apps/doc/mixins.py
src/common/tools/file_tools.py
requirements/base.txt
View file @
a02a957
aiohttp==3.6.2
async-timeout==3.0.1
attrs==19.3.0
certifi==2016.2.28
chardet==3.0.4
Django==2.1
# django-mysqlpool @ https://github.com/smartfile/django-mysqlpool/archive/master.zip
djangorestframework==3.9.0
djangorestframework-jwt==1.11.0
idna==2.9
idna-ssl==1.1.0
marshmallow==3.6.1
multidict==4.7.6
pdfminer3k==1.3.4
Pillow==7.1.2
ply==3.11
...
...
@@ -17,4 +24,7 @@ redis==3.4.1
# situlogger @ http://gitlab.situdata.com/zhouweiqi/situlogger/repository/archive.tar.gz?ref=master
six==1.14.0
SQLAlchemy==0.9.10
typing-extensions==3.7.4.2
webargs==6.1.0
xlwt==1.3.0
yarl==1.4.2
...
...
src/apps/doc/management/commands/doc_process.py
View file @
a02a957
This diff is collapsed.
Click to expand it.
src/apps/doc/mixins.py
View file @
a02a957
...
...
@@ -7,13 +7,12 @@ class DocHandler:
@staticmethod
def
get_link
(
doc_id
,
file
=
'pdf'
):
data_path
=
os
.
path
.
join
(
conf
.
DATA_DIR
,
str
(
doc_id
))
if
file
==
'pdf'
:
return
os
.
path
.
join
(
data_path
,
'{0}.pdf'
.
format
(
str
(
doc_id
))
)
return
'/data/{0}/{0}.pdf'
.
format
(
doc_id
)
elif
file
==
'img'
:
return
os
.
path
.
join
(
data_path
,
'{0}_img.zip'
.
format
(
str
(
doc_id
))
)
return
'/data/{0}/{0}_img.zip'
.
format
(
doc_id
)
else
:
return
os
.
path
.
join
(
data_path
,
'{0}.xlsx'
.
format
(
str
(
doc_id
))
)
return
'/data/{0}/{0}.xls'
.
format
(
doc_id
)
def
get_doc_list
(
self
,
doc_queryset
):
for
doc_dict
in
doc_queryset
:
...
...
src/common/tools/file_tools.py
View file @
a02a957
import
os
from
zipfile
import
ZipFile
def
file_write
(
file
,
file_path
):
with
open
(
file_path
,
'wb+'
)
as
f
:
for
chunk
in
file
.
chunks
():
f
.
write
(
chunk
)
def
write_zip_file
(
dir_name
,
zipfile_path
):
if
not
os
.
path
.
isdir
(
dir_name
):
return
with
ZipFile
(
zipfile_path
,
'w'
)
as
z
:
for
root
,
dirs
,
files
in
os
.
walk
(
dir_name
):
root_target_path
=
root
.
replace
(
dir_name
,
''
)
for
single_file
in
files
:
src_file_path
=
os
.
path
.
join
(
root
,
single_file
)
file_target_path
=
os
.
path
.
join
(
root_target_path
,
single_file
)
z
.
write
(
src_file_path
,
file_target_path
)
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment