Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
6966f069
authored
2022-09-01 16:51:52 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Plain Diff
Merge branch 'feature/zip'
2 parents
7cf03ec9
6010c32f
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
107 additions
and
12 deletions
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/views.py
src/common/redis_cache/handler.py
src/common/tools/file_tools.py
src/apps/doc/management/commands/ocr_process.py
View file @
6966f06
This diff is collapsed.
Click to expand it.
src/apps/doc/views.py
View file @
6966f06
...
...
@@ -570,15 +570,14 @@ class UploadDocView(GenericView, DocHandler):
data_source
=
self
.
fix_data_source
(
data_source
)
document_scheme
=
self
.
fix_scheme
(
document_scheme
)
if
document_name
.
endswith
(
'.zip'
):
self
.
running_log
.
info
(
'[doc upload success] [zip file skip] [args={0}]'
.
format
(
args
))
return
response
.
ok
()
#
if document_name.endswith('.zip'):
#
self.running_log.info('[doc upload success] [zip file skip] [args={0}]'.format(args))
#
return response.ok()
if
data_source
==
consts
.
DATA_SOURCE_LIST
[
1
]:
if
isinstance
(
document_name
,
str
):
if
document_name
.
endswith
(
'-证书.pdf'
)
or
document_name
.
endswith
(
'-证书'
):
self
.
running_log
.
info
(
'[doc upload success] [eapp license skip] [args={0}]'
.
format
(
args
))
return
response
.
ok
()
if
document_name
.
endswith
(
'-证书.pdf'
)
or
document_name
.
endswith
(
'-证书'
):
self
.
running_log
.
info
(
'[doc upload success] [eapp license skip] [args={0}]'
.
format
(
args
))
return
response
.
ok
()
# 2. 根据业务类型分库存储
doc_class
,
prefix
=
self
.
get_doc_class
(
business_type
)
...
...
@@ -594,17 +593,24 @@ class UploadDocView(GenericView, DocHandler):
data_source
=
data_source
,
upload_finish_time
=
document
.
get
(
'uploadFinishTime'
),
)
# 3. 选择队列进入
is_priority
=
PriorityApplication
.
objects
.
filter
(
application_id
=
application_id
,
on_off
=
True
)
.
exists
()
is_zip
=
False
classify_1
=
0
# 电子合同
if
data_source
==
consts
.
DATA_SOURCE_LIST
[
-
1
]
and
document_scheme
==
consts
.
DOC_SCHEME_LIST
[
1
]:
for
keyword
,
classify_1_tmp
in
consts
.
ECONTRACT_KEYWORDS_MAP
.
get
(
prefix
):
if
keyword
in
document_name
:
classify_1
=
classify_1_tmp
break
elif
document_name
.
endswith
(
'.zip'
)
or
document_name
.
endswith
(
'.rar'
)
or
document_name
.
endswith
(
'.ZIP'
)
\
or
document_name
.
endswith
(
'.RAR'
):
is_zip
=
True
task
=
consts
.
SPLIT_STR
.
join
([
prefix
,
str
(
doc
.
id
),
str
(
classify_1
)])
enqueue_res
=
rh
.
enqueue
([
task
],
is_priority
)
enqueue_res
=
rh
.
enqueue
([
task
],
is_priority
,
is_zip
)
self
.
running_log
.
info
(
'[doc upload success] [args={0}] [business_type={1}] [doc_id={2}] '
'[is_priority={3}] [enqueue_res={4}]'
.
format
(
args
,
prefix
,
doc
.
id
,
is_priority
,
enqueue_res
))
...
...
@@ -669,7 +675,7 @@ class PriorityDocView(GenericView, DocHandler):
self
.
running_log
.
info
(
'[priority doc success] [args={0}]'
.
format
(
args
))
else
:
enqueue_res
=
rh
.
enqueue
(
tasks_list
,
is_priority
=
True
)
enqueue_res
=
rh
.
enqueue
(
tasks_list
,
is_priority
=
True
)
# TODO 可能把压缩文件放入优先队列
self
.
running_log
.
info
(
'[priority doc success] [args={0}] [tasks_list={1}] [enqueue_res={2}]'
.
format
(
args
,
tasks_list
,
enqueue_res
))
return
response
.
ok
()
...
...
src/common/redis_cache/handler.py
View file @
6966f06
...
...
@@ -35,16 +35,27 @@ class RedisHandler:
self
.
prefix
=
'bwm_ocr'
self
.
common_queue_key
=
'{0}:common_queue'
.
format
(
self
.
prefix
)
self
.
priority_queue_key
=
'{0}:priority_queue'
.
format
(
self
.
prefix
)
self
.
zip_queue_key
=
'{0}:zip_queue'
.
format
(
self
.
prefix
)
self
.
session_id_key
=
'{0}:session_id'
.
format
(
self
.
prefix
)
self
.
cms_token_key
=
'{0}:cms_token'
.
format
(
self
.
prefix
)
self
.
ecm_token_key
=
'{0}:ecm_token'
.
format
(
self
.
prefix
)
self
.
login_limit_key
=
'{0}:login_limit'
.
format
(
self
.
prefix
)
def
enqueue
(
self
,
tasks
,
is_priority
=
False
):
def
enqueue
(
self
,
tasks
,
is_priority
=
False
,
is_zip
=
False
):
# 1
key
=
self
.
priority_queue_key
if
is_priority
else
self
.
common_queue_key
if
is_zip
:
key
=
self
.
zip_queue_key
elif
is_priority
:
key
=
self
.
priority_queue_key
else
:
key
=
self
.
common_queue_key
return
self
.
redis
.
lpush
(
key
,
tasks
)
def
dequeue_zip
(
self
):
# task or None
task
=
self
.
redis
.
rpop
(
self
.
zip_queue_key
)
return
task
def
dequeue
(
self
):
# task or None
task
=
self
.
redis
.
rpop
(
self
.
priority_queue_key
)
...
...
src/common/tools/file_tools.py
View file @
6966f06
import
os
import
re
import
zipfile
import
rarfile
from
zipfile
import
ZipFile
...
...
@@ -18,3 +22,77 @@ def write_zip_file(dir_name, zipfile_path):
src_file_path
=
os
.
path
.
join
(
root
,
single_file
)
file_target_path
=
os
.
path
.
join
(
root_target_path
,
single_file
)
z
.
write
(
src_file_path
,
file_target_path
)
def
get_pwd_list_from_str
(
doc_name
):
try
:
pwd_list
=
re
.
findall
(
r'\d{6}'
,
doc_name
)
return
pwd_list
except
Exception
as
e
:
return
[]
def
extract_zip_or_rar
(
file_path
,
extract_path
,
pwd_list
=
[]):
if
file_path
.
endswith
(
'.zip'
)
or
file_path
.
endswith
(
'.ZIP'
):
if
len
(
pwd_list
)
>
0
:
for
password
in
pwd_list
:
try
:
with
zipfile
.
ZipFile
(
file_path
)
as
zf
:
zf
.
extractall
(
extract_path
,
pwd
=
bytes
(
password
,
'utf-8'
))
except
Exception
as
e
:
continue
else
:
return
True
else
:
return
False
else
:
try
:
with
zipfile
.
ZipFile
(
file_path
)
as
zf
:
zf
.
extractall
(
extract_path
)
except
Exception
as
e
:
return
False
else
:
return
True
elif
file_path
.
endswith
(
'.rar'
)
or
file_path
.
endswith
(
'.RAR'
):
if
len
(
pwd_list
)
>
0
:
for
password
in
pwd_list
:
try
:
with
rarfile
.
RarFile
(
file_path
)
as
rf
:
rf
.
extractall
(
extract_path
,
pwd
=
password
)
except
Exception
as
e
:
continue
else
:
return
True
else
:
return
False
else
:
try
:
with
rarfile
.
RarFile
(
file_path
)
as
rf
:
rf
.
extractall
(
extract_path
)
except
Exception
as
e
:
return
False
else
:
return
True
else
:
return
False
def
get_file_paths
(
input_path
,
suffix_list
):
"""
Args:
input_path: str 目标目录
suffix_list: list 搜索的文件的后缀列表
Returns: list 搜索到的相关文件绝对路径列表
"""
for
parent
,
_
,
filenames
in
os
.
walk
(
input_path
):
for
filename
in
filenames
:
for
suffix
in
suffix_list
:
if
filename
.
endswith
(
suffix
):
file_path
=
os
.
path
.
join
(
parent
,
filename
)
break
else
:
continue
yield
file_path
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment