Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
8f4c3205
authored
2023-04-21 15:40:02 +0800
by
冯轩
Browse Files
Options
Browse Files
Tag
Download
Plain Diff
Merge branch 'feature/CHINARPA-3529'
2 parents
9f4b3645
8fa2ba72
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
109 additions
and
20 deletions
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/views.py
src/common/mixins.py
src/apps/doc/management/commands/ocr_process.py
View file @
8f4c320
...
...
@@ -1458,15 +1458,20 @@ class Command(BaseCommand, LoggerMixin):
# pdf下载 处理 图片存储 识别
for
times
in
range
(
consts
.
RETRY_TIMES
):
try
:
self
.
edms
.
download
(
pdf_path
,
doc
.
metadata_version_id
,
doc
.
document_scheme
,
business_type
)
self
.
online_log
.
info
(
'{0} [edms download success] [task={1}] [times={2}] '
'[pdf_path={3}]'
.
format
(
self
.
e_log_base
,
task_str
,
times
,
pdf_path
))
self
.
online_log
.
info
(
'{0} [pdf to img start] [task={1}] [times={2}]'
.
format
(
self
.
e_log_base
,
task_str
,
times
))
pdf_handler
.
e_contract_process
()
self
.
online_log
.
info
(
'{0} [pdf to img end] [task={1}] [times={2}]'
.
format
(
self
.
e_log_base
,
task_str
,
times
))
if
doc
.
application_id
.
startswith
(
consts
.
FIXED_APPLICATION_ID_PREFIX
):
self
.
online_log
.
info
(
'{0} [mo ni xia dan] [task={1}] [times={2}] '
'[pdf_path={3}]'
.
format
(
self
.
log_base
,
task_str
,
times
,
pdf_path
))
else
:
self
.
edms
.
download
(
pdf_path
,
doc
.
metadata_version_id
,
doc
.
document_scheme
,
business_type
)
self
.
online_log
.
info
(
'{0} [edms download success] [task={1}] [times={2}] '
'[pdf_path={3}]'
.
format
(
self
.
e_log_base
,
task_str
,
times
,
pdf_path
))
self
.
online_log
.
info
(
'{0} [pdf to img start] [task={1}] [times={2}]'
.
format
(
self
.
e_log_base
,
task_str
,
times
))
pdf_handler
.
e_contract_process
()
self
.
online_log
.
info
(
'{0} [pdf to img end] [task={1}] [times={2}]'
.
format
(
self
.
e_log_base
,
task_str
,
times
))
except
Exception
as
e
:
self
.
online_log
.
warn
(
'{0} [download or pdf to img failed] [task={1}] [times={2}] '
'[error={3}]'
.
format
(
self
.
e_log_base
,
task_str
,
times
,
...
...
@@ -1973,7 +1978,7 @@ class Command(BaseCommand, LoggerMixin):
self
.
log_base
,
task_str
,
traceback
.
format_exc
()))
else
:
self
.
online_log
.
info
(
'{0} [process complete] [task={1}]'
.
format
(
self
.
log_base
,
task_str
))
os
.
remove
(
excel_path
)
#
os.remove(excel_path)
# report新增流水真伪
try
:
...
...
src/apps/doc/views.py
View file @
8f4c320
...
...
@@ -17,7 +17,7 @@ from webargs import fields, validate
from
webargs.djangoparser
import
use_args
,
parser
from
settings
import
conf
from
common
import
response
from
common.mixins
import
GenericView
from
common.mixins
import
GenericView
,
DocGenericView
from
common.tools.file_tools
import
file_write
from
common.redis_cache
import
redis_handler
as
rh
from
.models
import
(
...
...
@@ -360,6 +360,10 @@ compare_result_args = {
upload_pdf_args
=
{
'pdf_file'
:
fields
.
Raw
(
required
=
True
),
'business_type'
:
fields
.
Str
(
required
=
True
),
'document_scheme'
:
fields
.
Str
(
required
=
True
),
'data_source'
:
fields
.
Str
(
required
=
True
),
'document_name'
:
fields
.
Str
(
required
=
True
),
}
application_information
=
{
...
...
@@ -1035,7 +1039,7 @@ class CompareOfflineView(GenericView):
'''
class
DocView
(
GenericView
,
DocHandler
):
class
DocView
(
Doc
GenericView
,
DocHandler
):
# 文件列表页
@use_args
(
doc_list_args
,
location
=
'querystring'
)
...
...
@@ -1079,6 +1083,8 @@ class DocView(GenericView, DocHandler):
application_link
=
'{0}/showList/showList?entity={1}&scheme={2}&case_id={3}'
.
format
(
conf
.
BASE_URL
,
prefix
,
tmp_scheme
,
doc_dict
[
'application_id'
])
doc_dict
[
'target_url'
]
=
application_link
doc_id
=
doc_dict
.
get
(
'id'
)
doc_dict
[
'excel_link'
]
=
self
.
get_link
(
doc_id
,
business_type
,
file
=
'excel'
)
# total = len(doc_list)
pagination
=
{
'current'
:
page
,
'total'
:
total
,
'page_size'
:
page_size
}
...
...
@@ -1092,18 +1098,23 @@ class DocView(GenericView, DocHandler):
return
response
.
ok
(
data
=
res
)
# 上传pdf,模拟下单
@use_args
(
upload_pdf_args
,
location
=
'files'
)
def
post
(
self
,
request
,
args
):
# @use_args(upload_pdf_args
)
def
post
(
self
,
request
):
random_int
=
random
.
randint
(
0
,
consts
.
TIME_NUM
)
metadata_version_id
=
str
(
int
(
time
.
time
())
-
random_int
)
pdf_file
=
args
.
get
(
'pdf_file'
)
pdf_file
=
request
.
FILES
.
get
(
'pdf_file'
)
if
isinstance
(
pdf_file
.
name
,
str
):
if
not
pdf_file
.
name
.
endswith
(
'pdf'
)
and
not
pdf_file
.
name
.
endswith
(
'PDF'
):
self
.
invalid_params
(
msg
=
'invalid params: not a PDF file'
)
business_type
=
request
.
POST
.
get
(
'business_type'
,
''
)
document_scheme
=
request
.
POST
.
get
(
'document_scheme'
,
''
)
data_source
=
request
.
POST
.
get
(
'data_source'
,
''
)
document_name
=
request
.
POST
.
get
(
'document_name'
,
''
)
args
=
{
'business_type'
:
business_type
,
'document_scheme'
:
document_scheme
,
'data_source'
:
data_source
,
'document_name'
:
document_name
,
'pdf_file'
:
pdf_file
}
# business_type = random.choice(consts.BUSINESS_TYPE_LIST)
business_type
=
consts
.
BUSINESS_TYPE_LIST
[
0
]
#
business_type = consts.BUSINESS_TYPE_LIST[0]
tmp_save_path
=
os
.
path
.
join
(
conf
.
DATA_DIR
,
business_type
,
'{0}.pdf'
.
format
(
metadata_version_id
))
file_write
(
pdf_file
,
tmp_save_path
)
...
...
@@ -1126,8 +1137,8 @@ class DocView(GenericView, DocHandler):
application_id
=
'{0}{1}'
.
format
(
consts
.
FIXED_APPLICATION_ID_PREFIX
,
metadata_version_id
)
upload_finish_time
=
timezone
.
now
()
# document_scheme = random.choice(consts.DOC_SCHEME_LIST)
document_scheme
=
consts
.
DOC_SCHEME_LIST
[
1
]
data_source
=
random
.
choice
(
consts
.
DATA_SOURCE_LIST
)
#
document_scheme = consts.DOC_SCHEME_LIST[1]
#
data_source = random.choice(consts.DATA_SOURCE_LIST)
# UploadDocRecords.objects.create(
# metadata_version_id=metadata_version_id,
# application_id=application_id,
...
...
@@ -1166,8 +1177,25 @@ class DocView(GenericView, DocHandler):
# 4. 选择队列进入
is_priority
=
False
tasks
=
[
'{0}{1}{2}'
.
format
(
prefix
,
consts
.
SPLIT_STR
,
doc
.
id
)]
enqueue_res
=
rh
.
enqueue
(
tasks
,
is_priority
)
classify_1
=
0
# 电子合同 Econtract or OVP(FSM)
if
data_source
==
consts
.
DATA_SOURCE_LIST
[
2
]
or
data_source
==
consts
.
DATA_SOURCE_LIST
[
3
]:
if
document_scheme
==
consts
.
DOC_SCHEME_LIST
[
1
]:
for
keyword
,
classify_1_tmp
in
consts
.
ECONTRACT_KEYWORDS_MAP
.
get
(
prefix
):
if
keyword
in
document_name
:
classify_1
=
classify_1_tmp
break
# FSM合同:WEP/MSI/SC
elif
data_source
==
consts
.
DATA_SOURCE_LIST
[
0
]
and
document_scheme
==
consts
.
DOC_SCHEME_LIST
[
0
]:
for
keyword
,
classify_1_tmp
in
consts
.
FSM_ECONTRACT_KEYWORDS_MAP
.
get
(
prefix
):
if
keyword
in
document_name
:
classify_1
=
classify_1_tmp
break
# tasks = ['{0}{1}{2}'.format(prefix, consts.SPLIT_STR, doc.id)]
task
=
consts
.
SPLIT_STR
.
join
([
prefix
,
str
(
doc
.
id
),
str
(
classify_1
)])
enqueue_res
=
rh
.
enqueue
([
task
],
is_priority
)
self
.
running_log
.
info
(
'[mock doc upload success] [args={0}] [business_type={1}] [doc_id={2}] '
'[is_priority={3}] [enqueue_res={4}]'
.
format
(
args
,
prefix
,
doc
.
id
,
...
...
src/common/mixins.py
View file @
8f4c320
...
...
@@ -104,6 +104,62 @@ class GenericView(LoggerMixin, GenericExceptionMixin, GenericAPIView):
def
get_object
(
self
):
return
None
class
DocGenericView
(
LoggerMixin
,
GenericExceptionMixin
,
GenericAPIView
):
need_print_logger
=
True
def
print_logger
(
self
,
request
):
# parameters = getattr(request, request.method, {})
parameters
=
None
if
not
parameters
:
parameters
=
getattr
(
request
,
'data'
,
{})
if
not
parameters
:
parameters
=
{}
parameters_string
=
''
for
key
,
value
in
parameters
.
items
():
parameters_string
+=
'[
%
s=
%
s] '
%
(
key
,
value
)
for
key
,
value
in
self
.
kwargs
.
items
():
parameters_string
+=
'[
%
s=
%
s] '
%
(
key
,
value
)
if
request
.
user
and
not
isinstance
(
request
.
user
,
AnonymousUser
):
user_id
=
request
.
user
.
id
else
:
user_id
=
0
self
.
running_log
.
info
(
'[
%
s_
%
s_request] with parameters [user_id=
%
s]
%
s'
%
(
self
.
__class__
.
__name__
,
request
.
method
,
user_id
,
parameters_string
))
def
dispatch
(
self
,
request
,
*
args
,
**
kwargs
):
"""
`.dispatch()` is pretty much the same as Django's regular dispatch,
but with extra hooks for startup, finalize, and exception handling.
"""
self
.
args
=
args
self
.
kwargs
=
kwargs
request
=
self
.
initialize_request
(
request
,
*
args
,
**
kwargs
)
self
.
request
=
request
self
.
headers
=
self
.
default_response_headers
# deprecate?
try
:
self
.
initial
(
request
,
*
args
,
**
kwargs
)
# Get the appropriate handler method
if
request
.
method
.
lower
()
in
self
.
http_method_names
:
handler
=
getattr
(
self
,
request
.
method
.
lower
(),
self
.
http_method_not_allowed
)
else
:
handler
=
self
.
http_method_not_allowed
if
self
.
need_print_logger
:
self
.
print_logger
(
request
)
response
=
handler
(
request
,
*
args
,
**
kwargs
)
except
Exception
as
exc
:
response
=
self
.
handle_exception
(
exc
)
self
.
response
=
self
.
finalize_response
(
request
,
response
,
*
args
,
**
kwargs
)
return
self
.
response
def
get_object
(
self
):
return
None
class
IWABaseView
:
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment