Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
160ac57d
authored
2025-08-29 14:26:52 +0800
by
冯轩
Browse Files
Options
Browse Files
Tag
Download
Plain Diff
merge
2 parents
88f01673
d619642f
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
297 additions
and
93 deletions
src/apps/doc/internal_urls.py
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/views.py
src/apps/doc/internal_urls.py
View file @
160ac57
...
...
@@ -10,4 +10,7 @@ urlpatterns = [
path
(
r'invoice/downloadExcel'
,
views
.
InvoiceExcelView
.
as_view
()),
path
(
r'invoice/queryInfo'
,
views
.
InvoiceQueryInfoView
.
as_view
()),
path
(
r'contract/v1'
,
views
.
SEContractView
.
as_view
()),
path
(
r'reocr'
,
views
.
DocReOcrView
.
as_view
()),
path
(
r'batch/reocr'
,
views
.
BatchReOcrView
.
as_view
()),
]
...
...
src/apps/doc/management/commands/ocr_process.py
View file @
160ac57
...
...
@@ -100,7 +100,7 @@ class Command(BaseCommand, LoggerMixin):
if
len
(
info_tuple
)
==
2
:
business_type
,
doc_id_str
=
info_tuple
else
:
business_type
,
doc_id_str
,
classify_1_str
=
info_tuple
business_type
,
doc_id_str
,
classify_1_str
,
re_ocr_flag
=
info_tuple
doc_id
=
int
(
doc_id_str
)
doc_class
=
HILDoc
if
business_type
==
consts
.
HIL_PREFIX
else
AFCDoc
zip_doc
=
doc_class
.
objects
.
filter
(
id
=
doc_id
)
.
first
()
...
...
@@ -124,7 +124,7 @@ class Command(BaseCommand, LoggerMixin):
else
:
self
.
online_log
.
info
(
'{0} [zip_2_pdfs] [db save end] [task_str={1}]'
.
format
(
self
.
log_base
,
task_str
))
return
zip_doc
,
business_type
return
zip_doc
,
business_type
,
re_ocr_flag
def
get_doc_info
(
self
,
task_str
,
is_priority
=
False
):
try
:
...
...
@@ -135,7 +135,7 @@ class Command(BaseCommand, LoggerMixin):
classify_1_str
=
'0'
rebuild_task_str
=
task_str
else
:
business_type
,
doc_id_str
,
classify_1_str
=
info_tuple
business_type
,
doc_id_str
,
classify_1_str
,
re_ocr_flag
=
info_tuple
rebuild_task_str
=
'{0}{1}{2}'
.
format
(
business_type
,
consts
.
SPLIT_STR
,
doc_id_str
)
doc_id
=
int
(
doc_id_str
)
doc_class
=
HILDoc
if
business_type
==
consts
.
HIL_PREFIX
else
AFCDoc
...
...
@@ -160,7 +160,7 @@ class Command(BaseCommand, LoggerMixin):
else
:
self
.
online_log
.
info
(
'{0} [get_doc_info] [db save end] [task_str={1}] [is_priority={2}]'
.
format
(
self
.
log_base
,
task_str
,
is_priority
))
return
doc
,
business_type
,
rebuild_task_str
,
classify_1_str
return
doc
,
business_type
,
rebuild_task_str
,
classify_1_str
,
re_ocr_flag
# def pdf_download(self, doc, pdf_path):
# if not doc.application_id.startswith(consts.FIXED_APPLICATION_ID_PREFIX):
...
...
@@ -1202,7 +1202,7 @@ class Command(BaseCommand, LoggerMixin):
self
.
online_log
.
info
(
'{0} [zip_2_pdfs] [task={1}]'
.
format
(
self
.
log_base
,
task_str
))
# 2. 修改doc状态: 识别中
zip_doc
,
business_type
=
self
.
get_zip_doc_info
(
task_str
)
zip_doc
,
business_type
,
re_ocr_flag
=
self
.
get_zip_doc_info
(
task_str
)
if
zip_doc
is
None
:
time
.
sleep
(
self
.
sleep_time_doc_get
)
continue
...
...
@@ -1339,7 +1339,7 @@ class Command(BaseCommand, LoggerMixin):
try
:
# 1. 从队列获取文件信息
doc
,
business_type
,
task_str
,
classify_1_str
=
self
.
get_doc_info
(
task_str
,
is_priority
)
doc
,
business_type
,
task_str
,
classify_1_str
,
re_ocr_flag
=
self
.
get_doc_info
(
task_str
,
is_priority
)
# 队列为空时的处理
if
doc
is
None
:
time
.
sleep
(
self
.
sleep_time_doc_get
)
...
...
@@ -1389,7 +1389,8 @@ class Command(BaseCommand, LoggerMixin):
self
.
online_log
.
info
(
'{0} [pdf to img start] [task={1}] [times={2}]'
.
format
(
self
.
log_base
,
task_str
,
times
))
start_time
=
time
.
time
()
pdf_handler
.
extract_image
(
max_img_count
)
max_img_count_or_none
=
None
if
re_ocr_flag
==
'Y'
else
max_img_count
pdf_handler
.
extract_image
(
max_img_count_or_none
)
end_time
=
time
.
time
()
speed_time
=
int
(
end_time
-
start_time
)
self
.
online_log
.
info
(
'{0} [pdf to img end] [task={1}] [times={2}] [spend_time={3}] [is_new_modify={4}]'
.
format
(
...
...
@@ -1407,7 +1408,7 @@ class Command(BaseCommand, LoggerMixin):
self
.
online_log
.
warn
(
'{0} [pdf to img failed (pdf img empty)] [task={1}]'
.
format
(
self
.
log_base
,
task_str
))
raise
Exception
(
'pdf img empty'
)
elif
pdf_handler
.
img_count
>=
max_img_count
:
elif
re_ocr_flag
==
'N'
and
pdf_handler
.
img_count
>=
max_img_count
:
self
.
online_log
.
info
(
'{0} [too many pdf image] [task={1}] [img_count={2}]'
.
format
(
self
.
log_base
,
task_str
,
pdf_handler
.
img_count
))
...
...
src/apps/doc/views.py
View file @
160ac57
...
...
@@ -593,6 +593,16 @@ invoice_download_args = {
'application_ids'
:
fields
.
Str
(
required
=
True
),
}
doc_reocr_args
=
{
'doc_id'
:
fields
.
Int
(
required
=
True
),
'application_entity'
:
fields
.
Str
(
required
=
True
),
}
batch_reocr_args
=
{
'application_entity'
:
fields
.
Str
(
required
=
True
),
}
class
UploadDocView
(
GenericView
,
DocHandler
):
# permission_classes = []
# authentication_classes = []
...
...
@@ -709,7 +719,7 @@ class UploadDocView(GenericView, DocHandler):
or
document_name
.
endswith
(
'.RAR'
):
is_zip
=
True
task
=
consts
.
SPLIT_STR
.
join
([
prefix
,
str
(
doc
.
id
),
str
(
classify_1
)])
task
=
consts
.
SPLIT_STR
.
join
([
prefix
,
str
(
doc
.
id
),
str
(
classify_1
)
,
'N'
])
enqueue_res
=
rh
.
enqueue
([
task
],
is_priority
,
is_zip
)
self
.
running_log
.
info
(
'[doc upload success] [args={0}] [business_type={1}] [doc_id={2}] '
'[is_priority={3}] [enqueue_res={4}] [is_fsm={5} [classify_1={6}]]'
.
format
(
args
,
prefix
,
doc
.
id
,
...
...
@@ -1294,7 +1304,7 @@ class DocView(DocGenericView, DocHandler):
is_zip
=
True
# tasks = ['{0}{1}{2}'.format(prefix, consts.SPLIT_STR, doc.id)]
task
=
consts
.
SPLIT_STR
.
join
([
prefix
,
str
(
doc
.
id
),
str
(
classify_1
)])
task
=
consts
.
SPLIT_STR
.
join
([
prefix
,
str
(
doc
.
id
),
str
(
classify_1
)
,
'N'
])
enqueue_res
=
rh
.
enqueue
([
task
],
is_priority
,
is_zip
)
self
.
running_log
.
info
(
'[mock doc upload success] [args={0}] [business_type={1}] [doc_id={2}] '
...
...
@@ -1983,88 +1993,88 @@ class GoView(GenericView):
else
:
return
response
.
error_msg
(
msg
=
'识别错误'
)
class
InvoiceExcelView
(
GenericView
):
#permission_classes = [IsAuthenticated]
#authentication_classes = [OAuth2AuthenticationWithUser]
# 下载发票excel
@use_args
(
invoice_download_args
,
location
=
'data'
)
def
post
(
self
,
request
,
args
):
application_ids
=
args
.
get
(
'application_ids'
)
application_entity
=
args
.
get
(
'application_entity'
)
self
.
running_log
.
info
(
'[InvoiceExcelView] [user_role={0}] '
.
format
(
'111222333'
))
# 角色权限不符,返回异常
token
=
request
.
META
.
get
(
"HTTP_AUTHORIZATION"
)
user_role
=
rh
.
get_token
(
token
[
-
11
:])
self
.
running_log
.
info
(
'[InvoiceExcelView] [user_role={0}] '
.
format
(
user_role
))
if
user_role
is
None
or
user_role
==
'-1'
or
(
user_role
==
'1'
and
application_entity
==
'2'
)
or
(
user_role
==
'2'
and
application_entity
==
'1'
):
self
.
running_log
.
info
(
'[InvoiceExcelView no permission] [user_role={0}] [application_entity={1}]'
.
format
(
user_role
,
application_entity
))
raise
NoPermissionException
(
'no permission'
)
url
=
'http://127.0.0.1:8088/napi/invoice/downloadExcelOri'
body
=
{
'applicationIds'
:
application_ids
,
'applicationEntity'
:
application_entity
}
try
:
self
.
running_log
.
info
(
"request java invoice excel api, url:{0}, body:{1}"
.
format
(
url
,
json
.
dumps
(
body
)))
headers
=
{
'Content-Type'
:
'application/json'
}
resp
=
requests
.
post
(
url
,
headers
=
headers
,
json
=
body
)
self
.
running_log
.
info
(
"java invoice excel api finish, applicationIds:{0},{1}"
.
format
(
application_ids
,
resp
.
text
))
res_json
=
json
.
loads
(
resp
.
text
)
file_path
=
res_json
.
get
(
'result'
)
self
.
running_log
.
info
(
"java invoice excel after process, filePath:{0}"
.
format
(
file_path
))
current_time
=
time
.
strftime
(
'
%
Y-
%
m-
%
d_
%
H_
%
M_
%
S'
,
time
.
localtime
())
download_file_name
=
"发票信息提取-"
+
current_time
+
".xlsx"
f
=
open
(
file_path
,
"rb"
)
response
=
HttpResponse
(
content_type
=
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
)
response
[
'Content-Disposition'
]
=
'attachment; filename={0}'
.
format
(
escape_uri_path
(
download_file_name
))
response
[
'Access-Control-Expose-Headers'
]
=
'content-disposition'
response
.
write
(
f
.
read
())
f
.
close
()
return
response
except
Exception
as
e
:
self
.
running_log
.
error
(
"invoice excel request to java error, url:{0}, param:{1}, errorMsg:{2}"
.
format
(
url
,
json
.
dumps
(
body
),
traceback
.
format_exc
()))
class
InvoiceQueryInfoView
(
GenericView
):
#permission_classes = [IsAuthenticated]
#authentication_classes = [OAuth2AuthenticationWithUser]
@use_args
(
invoice_download_args
,
location
=
'data'
)
def
post
(
self
,
request
,
args
):
application_ids
=
args
.
get
(
'application_ids'
)
application_entity
=
args
.
get
(
'application_entity'
)
self
.
running_log
.
info
(
'[InvoiceExcelView] [user_role={0}] '
.
format
(
'111222333'
))
# 角色权限不符,返回异常
token
=
request
.
META
.
get
(
"HTTP_AUTHORIZATION"
)
user_role
=
rh
.
get_token
(
token
[
-
11
:])
self
.
running_log
.
info
(
'[InvoiceQueryInfoView] [user_role={0}] '
.
format
(
user_role
))
if
user_role
is
None
or
user_role
==
'-1'
or
(
user_role
==
'1'
and
application_entity
==
'2'
)
or
(
user_role
==
'2'
and
application_entity
==
'1'
):
self
.
running_log
.
info
(
'[InvoiceExcelView no permission] [user_role={0}] [application_entity={1}]'
.
format
(
user_role
,
application_entity
))
raise
NoPermissionException
(
'no permission'
)
url
=
'http://127.0.0.1:8088/napi/invoice/queryInfoOri'
body
=
{
'applicationIds'
:
application_ids
,
'applicationEntity'
:
application_entity
}
try
:
self
.
running_log
.
info
(
"request java invoice info api, url:{0}, body:{1}"
.
format
(
url
,
json
.
dumps
(
body
)))
headers
=
{
'Content-Type'
:
'application/json'
}
resp
=
requests
.
post
(
url
,
headers
=
headers
,
json
=
body
)
self
.
running_log
.
info
(
"java invoice info api finish, applicationIds:{0},{1}"
.
format
(
application_ids
,
resp
.
text
))
res_json
=
json
.
loads
(
resp
.
text
)
java_result
=
res_json
.
get
(
'result'
)
return
response2
.
ok
(
data
=
java_result
)
except
Exception
as
e
:
self
.
running_log
.
error
(
"invoice info request to java error, url:{0}, param:{1}, errorMsg:{2}"
.
format
(
url
,
json
.
dumps
(
body
),
traceback
.
format_exc
()))
#
class InvoiceExcelView(GenericView):
#
#permission_classes = [IsAuthenticated]
#
#authentication_classes = [OAuth2AuthenticationWithUser]
#
# 下载发票excel
#
@use_args(invoice_download_args, location='data')
#
def post(self, request, args):
#
application_ids = args.get('application_ids')
#
application_entity = args.get('application_entity')
#
self.running_log.info('[InvoiceExcelView] [user_role={0}] '.format('111222333'))
#
# 角色权限不符,返回异常
#
token = request.META.get("HTTP_AUTHORIZATION")
#
user_role = rh.get_token(token[-11:])
#
self.running_log.info('[InvoiceExcelView] [user_role={0}] '.format(user_role))
#
if user_role is None or user_role == '-1' or (user_role == '1' and application_entity == '2') or (user_role == '2' and application_entity == '1'):
#
self.running_log.info('[InvoiceExcelView no permission] [user_role={0}] [application_entity={1}]'.format(user_role, application_entity))
#
raise NoPermissionException('no permission')
#
url = 'http://127.0.0.1:8088/napi/invoice/downloadExcelOri'
#
body = {
#
'applicationIds': application_ids,
#
'applicationEntity': application_entity
#
}
#
try:
#
self.running_log.info("request java invoice excel api, url:{0}, body:{1}".format(url, json.dumps(body)))
#
headers = {
#
'Content-Type': 'application/json'
#
}
#
resp = requests.post(url, headers=headers, json=body)
#
self.running_log.info("java invoice excel api finish, applicationIds:{0},{1}".format(application_ids, resp.text))
#
res_json = json.loads(resp.text)
#
file_path = res_json.get('result')
#
self.running_log.info("java invoice excel after process, filePath:{0}".format(file_path))
#
current_time = time.strftime('%Y-%m-%d_%H_%M_%S', time.localtime())
#
download_file_name = "发票信息提取-" + current_time + ".xlsx"
#
f = open(file_path,"rb")
#
response = HttpResponse(content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
#
response['Content-Disposition'] = 'attachment; filename={0}'.format(escape_uri_path(download_file_name))
#
response['Access-Control-Expose-Headers'] = 'content-disposition'
#
response.write(f.read())
#
f.close()
#
return response
#
except Exception as e:
#
self.running_log.error("invoice excel request to java error, url:{0}, param:{1}, errorMsg:{2}".format(
#
url, json.dumps(body), traceback.format_exc()))
#
class InvoiceQueryInfoView(GenericView):
#
#permission_classes = [IsAuthenticated]
#
#authentication_classes = [OAuth2AuthenticationWithUser]
#
@use_args(invoice_download_args, location='data')
#
def post(self, request, args):
#
application_ids = args.get('application_ids')
#
application_entity = args.get('application_entity')
#
self.running_log.info('[InvoiceExcelView] [user_role={0}] '.format('111222333'))
#
# 角色权限不符,返回异常
#
token = request.META.get("HTTP_AUTHORIZATION")
#
user_role = rh.get_token(token[-11:])
#
self.running_log.info('[InvoiceQueryInfoView] [user_role={0}] '.format(user_role))
#
if user_role is None or user_role == '-1' or (user_role == '1' and application_entity == '2') or (user_role == '2' and application_entity == '1'):
#
self.running_log.info('[InvoiceExcelView no permission] [user_role={0}] [application_entity={1}]'.format(user_role, application_entity))
#
raise NoPermissionException('no permission')
#
url = 'http://127.0.0.1:8088/napi/invoice/queryInfoOri'
#
body = {
#
'applicationIds': application_ids,
#
'applicationEntity': application_entity
#
}
#
try:
#
self.running_log.info("request java invoice info api, url:{0}, body:{1}".format(url, json.dumps(body)))
#
headers = {
#
'Content-Type': 'application/json'
#
}
#
resp = requests.post(url, headers=headers, json=body)
#
self.running_log.info("java invoice info api finish, applicationIds:{0},{1}".format(application_ids, resp.text))
#
res_json = json.loads(resp.text)
#
java_result = res_json.get('result')
#
return response2.ok(data=java_result)
#
except Exception as e:
#
self.running_log.error("invoice info request to java error, url:{0}, param:{1}, errorMsg:{2}".format(
#
url, json.dumps(body), traceback.format_exc()))
def
notifyCmsPass
(
self
,
request
):
args
=
request
.
data
...
...
@@ -2190,4 +2200,194 @@ class DownloadGBHistoryFileView(GenericView):
self
.
running_log
.
info
(
'[DownloadGBHistoryFileView] [args={0}] '
.
format
(
args
))
return
response
.
ok
(
data
=
True
)
except
Exception
as
e
:
return
response
.
ok
(
data
=
False
)
\ No newline at end of file
return
response
.
ok
(
data
=
False
)
class
InvoiceExcelView
(
GenericView
):
#permission_classes = [IsAuthenticated]
#authentication_classes = [OAuth2AuthenticationWithUser]
# 下载发票excel
@use_args
(
invoice_download_args
,
location
=
'data'
)
def
post
(
self
,
request
,
args
):
application_ids
=
args
.
get
(
'application_ids'
)
application_entity
=
args
.
get
(
'application_entity'
)
self
.
running_log
.
info
(
'[InvoiceExcelView] [user_role={0}] '
.
format
(
'111222333'
))
# 角色权限不符,返回异常
#token = request.META.get("HTTP_AUTHORIZATION")
#user_role = rh.get_token(token[-11:])
#self.running_log.info('[InvoiceExcelView] [user_role={0}] '.format(user_role))
#if user_role is None or user_role == '-1' or (user_role == '1' and application_entity == '2') or (user_role == '2' and application_entity == '1'):
# self.running_log.info('[InvoiceExcelView no permission] [user_role={0}] [application_entity={1}]'.format(user_role, application_entity))
# raise NoPermissionException('no permission')
url
=
'http://127.0.0.1:8088/napi/invoice/downloadExcelOri'
body
=
{
'applicationIds'
:
application_ids
,
'applicationEntity'
:
application_entity
}
try
:
self
.
running_log
.
info
(
"request java invoice excel api, url:{0}, body:{1}"
.
format
(
url
,
json
.
dumps
(
body
)))
headers
=
{
'Content-Type'
:
'application/json'
}
resp
=
requests
.
post
(
url
,
headers
=
headers
,
json
=
body
)
self
.
running_log
.
info
(
"java invoice excel api finish, applicationIds:{0},{1}"
.
format
(
application_ids
,
resp
.
text
))
res_json
=
json
.
loads
(
resp
.
text
)
file_path
=
res_json
.
get
(
'result'
)
self
.
running_log
.
info
(
"java invoice excel after process, filePath:{0}"
.
format
(
file_path
))
current_time
=
time
.
strftime
(
'
%
Y-
%
m-
%
d_
%
H_
%
M_
%
S'
,
time
.
localtime
())
download_file_name
=
"发票信息提取-"
+
current_time
+
".xlsx"
f
=
open
(
file_path
,
"rb"
)
response
=
HttpResponse
(
content_type
=
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
)
response
[
'Content-Disposition'
]
=
'attachment; filename={0}'
.
format
(
escape_uri_path
(
download_file_name
))
response
[
'Access-Control-Expose-Headers'
]
=
'content-disposition'
response
.
write
(
f
.
read
())
f
.
close
()
return
response
except
Exception
as
e
:
self
.
running_log
.
error
(
"invoice excel request to java error, url:{0}, param:{1}, errorMsg:{2}"
.
format
(
url
,
json
.
dumps
(
body
),
traceback
.
format_exc
()))
class
InvoiceQueryInfoView
(
GenericView
):
#permission_classes = [IsAuthenticated]
#authentication_classes = [OAuth2AuthenticationWithUser]
@use_args
(
invoice_download_args
,
location
=
'data'
)
def
post
(
self
,
request
,
args
):
application_ids
=
args
.
get
(
'application_ids'
)
application_entity
=
args
.
get
(
'application_entity'
)
self
.
running_log
.
info
(
'[InvoiceExcelView] [user_role={0}] '
.
format
(
'111222333'
))
# 角色权限不符,返回异常
#token = request.META.get("HTTP_AUTHORIZATION")
#user_role = rh.get_token(token[-11:])
#self.running_log.info('[InvoiceQueryInfoView] [user_role={0}] '.format(user_role))
#if user_role is None or user_role == '-1' or (user_role == '1' and application_entity == '2') or (user_role == '2' and application_entity == '1'):
# self.running_log.info('[InvoiceExcelView no permission] [user_role={0}] [application_entity={1}]'.format(user_role, application_entity))
# raise NoPermissionException('no permission')
url
=
'http://127.0.0.1:8088/napi/invoice/queryInfoOri'
body
=
{
'applicationIds'
:
application_ids
,
'applicationEntity'
:
application_entity
}
try
:
self
.
running_log
.
info
(
"request java invoice info api, url:{0}, body:{1}"
.
format
(
url
,
json
.
dumps
(
body
)))
headers
=
{
'Content-Type'
:
'application/json'
}
resp
=
requests
.
post
(
url
,
headers
=
headers
,
json
=
body
)
self
.
running_log
.
info
(
"java invoice info api finish, applicationIds:{0},{1}"
.
format
(
application_ids
,
resp
.
text
))
res_json
=
json
.
loads
(
resp
.
text
)
java_result
=
res_json
.
get
(
'result'
)
return
response2
.
ok
(
data
=
java_result
)
except
Exception
as
e
:
self
.
running_log
.
error
(
"invoice info request to java error, url:{0}, param:{1}, errorMsg:{2}"
.
format
(
url
,
json
.
dumps
(
body
),
traceback
.
format_exc
()))
class
DocReOcrView
(
GenericView
,
DocHandler
):
permission_classes
=
[
IsAuthenticated
]
authentication_classes
=
[
OAuth2AuthenticationWithUser
]
# required_scopes = ['write']
# 现有文件重新识别接口
@use_args
(
doc_reocr_args
,
location
=
'data'
)
def
post
(
self
,
request
,
args
):
start_time
=
time
.
time
()
application_entity
=
args
.
get
(
'application_entity'
)
doc_id
=
args
.
get
(
'doc_id'
)
doc_class
,
prefix
=
self
.
get_doc_class
(
application_entity
)
doc
=
doc_class
.
objects
.
filter
(
id
=
doc_id
)
.
first
()
# 3. 选择队列进入
is_priority
=
PriorityApplication
.
objects
.
filter
(
application_id
=
doc
.
application_id
,
on_off
=
True
)
.
exists
()
is_zip
=
False
classify_1
=
0
# 电子合同 Econtract or OVP(FSM)
if
doc
.
data_source
==
consts
.
DATA_SOURCE_LIST
[
2
]
or
doc
.
data_source
==
consts
.
DATA_SOURCE_LIST
[
3
]:
if
doc
.
document_scheme
==
consts
.
DOC_SCHEME_LIST
[
1
]:
for
keyword
,
classify_1_tmp
in
consts
.
ECONTRACT_KEYWORDS_MAP
.
get
(
prefix
):
if
keyword
in
doc
.
document_name
:
classify_1
=
classify_1_tmp
break
# FSM合同:WEP/MSI/SC/SC2
elif
doc
.
data_source
==
consts
.
DATA_SOURCE_LIST
[
0
]
and
doc
.
document_scheme
==
consts
.
DOC_SCHEME_LIST
[
0
]:
for
keyword
,
classify_1_tmp
in
consts
.
FSM_ECONTRACT_KEYWORDS_MAP
.
get
(
prefix
):
if
keyword
in
doc
.
document_name
:
classify_1
=
classify_1_tmp
break
if
doc
.
document_name
.
endswith
(
'.zip'
)
or
doc
.
document_name
.
endswith
(
'.rar'
)
or
doc
.
document_name
.
endswith
(
'.ZIP'
)
\
or
doc
.
document_name
.
endswith
(
'.RAR'
):
is_zip
=
True
# task = 'AFC_11001_0_Y' 'AFC_11001_0_N' 最后的Y,N表示是否是reocr,N否,Y是
task
=
consts
.
SPLIT_STR
.
join
([
prefix
,
str
(
doc
.
id
),
str
(
classify_1
),
'Y'
])
enqueue_res
=
rh
.
enqueue
([
task
],
is_priority
,
is_zip
)
self
.
running_log
.
info
(
'[doc upload success] [args={0}] [business_type={1}] [doc_id={2}] '
'[is_priority={3}] [enqueue_res={4}] [classify_1={5}]'
.
format
(
args
,
prefix
,
doc
.
id
,
is_priority
,
enqueue_res
,
classify_1
))
return
response
.
ok
()
class
BatchReOcrView
(
GenericView
,
DocHandler
):
permission_classes
=
[
IsAuthenticated
]
authentication_classes
=
[
OAuth2AuthenticationWithUser
]
# 现有文件批量重新识别接口
@use_args
(
batch_reocr_args
,
location
=
'data'
)
def
post
(
self
,
request
,
args
):
start_time
=
time
.
time
()
application_entity
=
args
.
get
(
'application_entity'
)
today
=
timezone
.
now
()
.
date
()
start_of_day
=
timezone
.
make_aware
(
timezone
.
datetime
.
combine
(
today
,
timezone
.
datetime
.
min
.
time
()))
end_of_day
=
timezone
.
make_aware
(
timezone
.
datetime
.
combine
(
today
,
timezone
.
datetime
.
max
.
time
()))
doc_class
,
prefix
=
self
.
get_doc_class
(
application_entity
)
docs
=
doc_class
.
objects
.
filter
(
status
=
2
,
create_time__range
=
(
start_of_day
,
end_of_day
))
# 遍历
time_stamp
=
time
.
time
()
for
doc
in
docs
.
iterator
():
self
.
running_log
.
info
(
'[batch doc reocr timestamp={0}] [doc_id={1}]'
.
format
(
time_stamp
,
doc
.
id
))
# 3. 选择队列进入
is_priority
=
PriorityApplication
.
objects
.
filter
(
application_id
=
doc
.
application_id
,
on_off
=
True
)
.
exists
()
is_zip
=
False
classify_1
=
0
# 电子合同 Econtract or OVP(FSM)
if
doc
.
data_source
==
consts
.
DATA_SOURCE_LIST
[
2
]
or
doc
.
data_source
==
consts
.
DATA_SOURCE_LIST
[
3
]:
if
doc
.
document_scheme
==
consts
.
DOC_SCHEME_LIST
[
1
]:
for
keyword
,
classify_1_tmp
in
consts
.
ECONTRACT_KEYWORDS_MAP
.
get
(
prefix
):
if
keyword
in
doc
.
document_name
:
classify_1
=
classify_1_tmp
break
# FSM合同:WEP/MSI/SC/SC2
elif
doc
.
data_source
==
consts
.
DATA_SOURCE_LIST
[
0
]
and
doc
.
document_scheme
==
consts
.
DOC_SCHEME_LIST
[
0
]:
for
keyword
,
classify_1_tmp
in
consts
.
FSM_ECONTRACT_KEYWORDS_MAP
.
get
(
prefix
):
if
keyword
in
doc
.
document_name
:
classify_1
=
classify_1_tmp
break
if
doc
.
document_name
.
endswith
(
'.zip'
)
or
doc
.
document_name
.
endswith
(
'.rar'
)
or
doc
.
document_name
.
endswith
(
'.ZIP'
)
\
or
doc
.
document_name
.
endswith
(
'.RAR'
):
is_zip
=
True
# task = 'AFC_11001_0_Y' 'AFC_11001_0_N' 最后的Y,N表示是否是reocr,N否,Y是
task
=
consts
.
SPLIT_STR
.
join
([
prefix
,
str
(
doc
.
id
),
str
(
classify_1
),
'Y'
])
enqueue_res
=
rh
.
enqueue
([
task
],
is_priority
,
is_zip
)
self
.
running_log
.
info
(
'[doc upload success] [args={0}] [business_type={1}] [doc_id={2}] '
'[is_priority={3}] [enqueue_res={4}] [classify_1={5}]'
.
format
(
args
,
prefix
,
doc
.
id
,
is_priority
,
enqueue_res
,
classify_1
))
return
response
.
ok
()
\ No newline at end of file
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment