Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
cc6c63c8
authored
2021-11-02 18:54:52 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Plain Diff
Merge branch 'feature/ecm' into feature/1119
2 parents
24ed4007
e9d7ce17
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
173 additions
and
3 deletions
src/apps/doc/exceptions.py
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/ocr/ecm.py
src/common/redis_cache/handler.py
src/apps/doc/exceptions.py
View file @
cc6c63c
...
...
@@ -24,3 +24,7 @@ class GCAPException(Exception):
class
CMSException
(
Exception
):
pass
class
ECMException
(
Exception
):
pass
...
...
src/apps/doc/management/commands/ocr_process.py
View file @
cc6c63c
...
...
@@ -19,7 +19,8 @@ from common.mixins import LoggerMixin
from
common.tools.file_tools
import
write_zip_file
from
common.tools.pdf_to_img
import
PDFHandler
from
apps.doc
import
consts
from
apps.doc.ocr.edms
import
EDMS
,
rh
# from apps.doc.ocr.edms import EDMS, rh
from
apps.doc.ocr.ecm
import
ECM
,
rh
from
apps.doc.named_enum
import
KeywordsType
,
FailureReason
,
WorkflowName
,
ProcessName
,
RequestTeam
,
RequestTrigger
from
apps.doc.exceptions
import
EDMSException
,
OCR1Exception
,
OCR2Exception
,
OCR4Exception
from
apps.doc.ocr.wb
import
BSWorkbook
...
...
@@ -68,7 +69,8 @@ class Command(BaseCommand, LoggerMixin):
self
.
ocr_url_3
=
conf
.
BC_URL
self
.
ocr_url_4
=
conf
.
IC_URL
# EDMS web_service_api
self
.
edms
=
EDMS
()
# self.edms = EDMS()
self
.
edms
=
ECM
()
# 优雅退出信号:15
signal
.
signal
(
signal
.
SIGTERM
,
self
.
signal_handler
)
...
...
@@ -945,7 +947,8 @@ class Command(BaseCommand, LoggerMixin):
for
times
in
range
(
consts
.
RETRY_TIMES
):
try
:
if
not
doc
.
application_id
.
startswith
(
consts
.
FIXED_APPLICATION_ID_PREFIX
):
self
.
edms
.
download
(
pdf_path
,
doc
.
metadata_version_id
)
# self.edms.download(pdf_path, doc.metadata_version_id)
self
.
edms
.
download
(
pdf_path
,
doc
.
metadata_version_id
,
doc
.
document_scheme
,
business_type
)
self
.
online_log
.
info
(
'{0} [edms download success] [task={1}] [times={2}] '
'[pdf_path={3}]'
.
format
(
self
.
log_base
,
task_str
,
times
,
pdf_path
))
...
...
src/apps/doc/ocr/ecm.py
0 → 100644
View file @
cc6c63c
import
base64
import
requests
from
common.redis_cache
import
redis_handler
as
rh
from
settings
import
conf
from
apps.doc.exceptions
import
ECMException
class
ECM
:
def
__init__
(
self
):
self
.
oauth_token
=
None
self
.
username
=
conf
.
ECM_USER
self
.
pwd
=
conf
.
ECM_PWD
self
.
oauth_url
=
conf
.
ECM_OAUTH_URL
self
.
download_url
=
conf
.
ECM_DOWNLOAD_URL
self
.
upload_url
=
conf
.
ECM_UPLOAD_URL
self
.
search_url
=
conf
.
ECM_SEARCH_URL
self
.
oauth_headers
=
{
'Content-Type'
:
'application/x-www-form-urlencoded'
}
self
.
oauth_payload
=
{
'grant_type'
:
'client_credentials'
,
'client_id'
:
conf
.
ECM_OAUTH_ID
,
'client_secret'
:
conf
.
ECM_OAUTH_SECRET
,
}
self
.
token_key
=
'access_token'
self
.
token_type
=
'Bearer'
self
.
token_type_key
=
'token_type'
self
.
expires_key
=
'expires_in'
self
.
settlement_type
=
'settlement'
self
.
doc_type_map
=
{
'ACCEPTANCE'
:
(
'acceptance'
,
conf
.
ECM_FOLDER_CA
,
conf
.
ECM_FOLDER_CA_HIL
),
'SETTLEMENT'
:
(
self
.
settlement_type
,
conf
.
ECM_FOLDER_SE
,
conf
.
ECM_FOLDER_SE_HIL
),
'CONTRACTMANAGEMENT'
:
(
'contract_management'
,
conf
.
ECM_FOLDER_CA
,
conf
.
ECM_FOLDER_CA_HIL
),
}
self
.
doc_base_map
=
{
'AFC'
:
'SF5_CN'
,
'HIL'
:
'SF5_CL'
,
}
self
.
prefix
=
'OCR'
self
.
upload_fields
=
[
"b_region"
,
"b_region_name"
,
"r_object_type"
,
"r_creation_date"
,
"r_creator_name"
,
"r_modify_date"
,
"r_modifier"
,
"owner"
,
"b_short_application_no"
,
"b_short_contract_no"
,
"b_customer_id"
,
"b_customer_name"
,
"b_customer_mobile"
,
"b_coborrower_id"
,
"b_coborrower_name"
,
"b_guarantor_id"
,
"b_guarantor_name"
,
"b_frontend_partner"
,
"b_dealer_code"
,
"b_dealer_name"
,
"b_input_date"
,
"b_comment"
,
"b_contract_no"
,
"b_location"
]
self
.
contract_prefix
=
'电子'
def
update_oauth_token
(
self
):
response
=
requests
.
post
(
self
.
oauth_url
,
headers
=
self
.
oauth_headers
,
data
=
self
.
oauth_payload
,
verify
=
False
)
if
response
.
status_code
!=
200
:
raise
ECMException
(
'ECM Oauth response with code: {0}'
.
format
(
response
.
status_code
))
token
=
response
.
json
()
.
get
(
self
.
token_key
)
if
not
isinstance
(
token
,
str
):
raise
ECMException
(
'ECM Oauth can not get token: {0}'
.
format
(
response
.
json
()))
self
.
oauth_token
=
token
self
.
token_type
=
response
.
json
()
.
get
(
self
.
token_type_key
,
self
.
token_type
)
expires
=
response
.
json
()
.
get
(
self
.
expires_key
,
3600
)
rh
.
set_ecm_token
(
self
.
oauth_token
,
expires
)
def
get_oauth_token
(
self
):
if
self
.
oauth_token
is
None
:
# redis获取token
self
.
oauth_token
=
rh
.
get_ecm_token
()
if
self
.
oauth_token
is
None
:
self
.
update_oauth_token
()
return
self
.
oauth_token
def
get_headers
(
self
):
return
{
'Authorization'
:
'{0} {1}'
.
format
(
self
.
token_type
,
self
.
get_oauth_token
())}
def
search
(
self
,
application_id
,
business_type
):
sql
=
"select * from {0} where b_application_no='{1}' and object_name like '{2}
%
'"
.
format
(
self
.
settlement_type
,
application_id
,
self
.
contract_prefix
)
search_args
=
{
"userName"
:
self
.
username
,
"password"
:
self
.
pwd
,
"docbase"
:
self
.
doc_base_map
.
get
(
business_type
),
"documentType"
:
self
.
settlement_type
,
"dql"
:
sql
}
response
=
requests
.
post
(
self
.
search_url
,
headers
=
self
.
get_headers
(),
json
=
search_args
,
verify
=
False
)
if
response
.
status_code
!=
200
:
raise
ECMException
(
'ECM search failed with code: {0} , with headers: {1}'
.
format
(
response
.
status_code
,
response
.
headers
))
result
=
[]
for
object_dict
in
response
.
json
()
.
get
(
'Envelope'
,
{})
.
get
(
'Body'
,
{})
.
get
(
'executeResponse'
,
{})
.
get
(
'return'
,
{})
.
get
(
'dataPackage'
,
{})
.
get
(
'DataObjects'
,
[]):
object_id
=
object_dict
.
get
(
'Identity'
,
{})
.
get
(
'ObjectId'
,
{})
.
get
(
'@id'
,
''
)
object_name
=
''
for
attr_dict
in
object_dict
.
get
(
'Properties'
,
{})
.
get
(
'Properties'
,
[]):
if
attr_dict
.
get
(
'@name'
,
''
)
==
'object_name'
:
object_name
=
attr_dict
.
get
(
'Value'
,
''
)
break
if
len
(
object_id
)
>
0
and
len
(
object_name
)
>
0
:
result
.
append
((
object_name
,
object_id
))
return
result
def
download
(
self
,
save_path
,
object_id
,
document_scheme
,
business_type
):
doc_type
,
_
,
_
=
self
.
doc_type_map
.
get
(
document_scheme
)
download_json
=
{
"userName"
:
self
.
username
,
"password"
:
self
.
pwd
,
"docbase"
:
self
.
doc_base_map
.
get
(
business_type
),
"documentType"
:
doc_type
,
"objectId"
:
object_id
,
}
response
=
requests
.
post
(
self
.
download_url
,
headers
=
self
.
get_headers
(),
json
=
download_json
,
verify
=
False
)
if
response
.
status_code
!=
200
:
raise
ECMException
(
'ECM download failed with code: {0}'
.
format
(
response
.
status_code
))
base64_data
=
response
.
json
()
.
get
(
'Envelope'
,
{})
.
get
(
'Body'
,
{})
.
get
(
'getResponse'
,
{})
.
get
(
'return'
,
{})
.
get
(
'DataObjects'
,
{})
.
get
(
'Contents'
,
{})
.
get
(
'Value'
)
if
not
isinstance
(
base64_data
,
str
):
raise
ECMException
(
'ECM download failed: {0}'
.
format
(
response
.
json
()))
with
open
(
save_path
,
"wb"
)
as
fh
:
fh
.
write
(
base64
.
b64decode
(
base64_data
.
encode
()))
def
get_doc_file_name
(
self
,
doc_name
):
if
not
isinstance
(
doc_name
,
str
):
return
self
.
prefix
if
doc_name
.
endswith
(
'.pdf'
)
or
doc_name
.
endswith
(
'.PDF'
)
or
\
doc_name
.
endswith
(
'.pdF'
)
or
doc_name
.
endswith
(
'.pDF'
)
or
doc_name
.
endswith
(
'.pDf'
)
or
\
doc_name
.
endswith
(
'.Pdf'
)
or
doc_name
.
endswith
(
'.PdF'
)
or
doc_name
.
endswith
(
'.PDf'
):
name
,
_
=
os
.
path
.
splitext
(
doc_name
)
return
'{0}{1}'
.
format
(
self
.
prefix
,
name
)
return
'{0}{1}'
.
format
(
self
.
prefix
,
doc_name
)
def
upload
(
self
,
file_path
,
doc
,
business_type
):
doc_type
,
folder_afc
,
folder_hil
=
self
.
doc_type_map
.
get
(
doc
.
document_scheme
)
folder
=
folder_afc
if
business_type
==
'AFC'
else
folder_hil
args
=
{
"username"
:
self
.
username
,
"password"
:
self
.
pwd
,
"docbase"
:
self
.
doc_base_map
.
get
(
business_type
),
"documentType"
:
doc_type
,
"object_name"
:
self
.
get_doc_file_name
(
doc
.
document_name
),
"folder"
:
folder
,
"format"
:
"excel12book"
,
"r_content_type"
:
"excel12book"
,
"b_application_no"
:
doc
.
application_id
,
# "file_base64_content": "",
}
for
key
in
self
.
upload_fields
:
args
[
key
]
=
''
with
open
(
file_path
,
'rb'
)
as
f
:
base64_data
=
base64
.
b64encode
(
f
.
read
())
# 获取解码后的base64值
file_data
=
base64_data
.
decode
()
args
[
'file_base64_content'
]
=
file_data
response
=
requests
.
post
(
self
.
upload_url
,
headers
=
self
.
get_headers
(),
json
=
args
,
verify
=
False
)
if
response
.
status_code
!=
200
:
raise
ECMException
(
'ECM upload failed with code: {0} , with headers: {1}'
.
format
(
response
.
status_code
,
response
.
headers
))
if
'ns6:createResponse'
not
in
response
.
json
()
.
get
(
'S:Envelope'
,
{})
.
get
(
'S:Body'
,
{}):
raise
ECMException
(
'ECM upload failed: {0} , with headers: {1}'
.
format
(
response
.
json
(),
response
.
headers
))
src/common/redis_cache/handler.py
View file @
cc6c63c
...
...
@@ -37,6 +37,7 @@ class RedisHandler:
self
.
priority_queue_key
=
'{0}:priority_queue'
.
format
(
self
.
prefix
)
self
.
session_id_key
=
'{0}:session_id'
.
format
(
self
.
prefix
)
self
.
cms_token_key
=
'{0}:cms_token'
.
format
(
self
.
prefix
)
self
.
ecm_token_key
=
'{0}:ecm_token'
.
format
(
self
.
prefix
)
def
enqueue
(
self
,
tasks
,
is_priority
=
False
):
# 1
...
...
@@ -64,3 +65,9 @@ class RedisHandler:
def
set_cms_token
(
self
,
token
,
expires
=
None
):
return
self
.
redis
.
set
(
self
.
cms_token_key
,
token
,
expires
)
def
get_ecm_token
(
self
):
return
self
.
redis
.
get
(
self
.
ecm_token_key
)
def
set_ecm_token
(
self
,
token
,
expires
=
None
):
return
self
.
redis
.
set
(
self
.
ecm_token_key
,
token
,
expires
)
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment