Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
4de905f7
authored
2021-10-26 15:15:35 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
add ecm
1 parent
50b15762
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
64 additions
and
22 deletions
src/apps/doc/management/commands/ocr_process.py
src/apps/doc/ocr/ecm.py
src/apps/doc/management/commands/ocr_process.py
View file @
4de905f
...
...
@@ -19,7 +19,8 @@ from common.mixins import LoggerMixin
from
common.tools.file_tools
import
write_zip_file
from
common.tools.pdf_to_img
import
PDFHandler
from
apps.doc
import
consts
from
apps.doc.ocr.edms
import
EDMS
,
rh
# from apps.doc.ocr.edms import EDMS, rh
from
apps.doc.ocr.ecm
import
ECM
,
rh
from
apps.doc.named_enum
import
KeywordsType
,
FailureReason
,
WorkflowName
,
ProcessName
,
RequestTeam
,
RequestTrigger
from
apps.doc.exceptions
import
EDMSException
,
OCR1Exception
,
OCR2Exception
,
OCR4Exception
from
apps.doc.ocr.wb
import
BSWorkbook
...
...
@@ -67,7 +68,8 @@ class Command(BaseCommand, LoggerMixin):
self
.
ocr_url_3
=
conf
.
BC_URL
self
.
ocr_url_4
=
conf
.
IC_URL
# EDMS web_service_api
self
.
edms
=
EDMS
()
# self.edms = EDMS()
self
.
edms
=
ECM
()
# 优雅退出信号:15
signal
.
signal
(
signal
.
SIGTERM
,
self
.
signal_handler
)
...
...
@@ -717,7 +719,8 @@ class Command(BaseCommand, LoggerMixin):
for
times
in
range
(
consts
.
RETRY_TIMES
):
try
:
if
not
doc
.
application_id
.
startswith
(
consts
.
FIXED_APPLICATION_ID_PREFIX
):
self
.
edms
.
download
(
pdf_path
,
doc
.
metadata_version_id
)
# self.edms.download(pdf_path, doc.metadata_version_id)
self
.
edms
.
download
(
pdf_path
,
doc
.
metadata_version_id
,
doc
.
document_scheme
,
business_type
)
self
.
online_log
.
info
(
'{0} [edms download success] [task={1}] [times={2}] '
'[pdf_path={3}]'
.
format
(
self
.
log_base
,
task_str
,
times
,
pdf_path
))
...
...
src/apps/doc/ocr/ecm.py
View file @
4de905f
...
...
@@ -13,6 +13,7 @@ class ECM:
self
.
pwd
=
conf
.
ECM_PWD
self
.
oauth_url
=
conf
.
ECM_OAUTH_URL
self
.
download_url
=
conf
.
ECM_DOWNLOAD_URL
self
.
upload_url
=
conf
.
ECM_UPLOAD_URL
self
.
oauth_headers
=
{
'Content-Type'
:
'application/x-www-form-urlencoded'
}
...
...
@@ -25,6 +26,22 @@ class ECM:
self
.
token_type
=
'Bearer'
self
.
token_type_key
=
'token_type'
self
.
expires_key
=
'expires_in'
self
.
doc_type_map
=
{
'ACCEPTANCE'
:
(
'acceptance'
,
conf
.
ECM_FOLDER_CA
),
'SETTLEMENT'
:
(
'settlement'
,
conf
.
ECM_FOLDER_SE
),
'CONTRACTMANAGEMENT'
:
(
'contract_management'
,
conf
.
ECM_FOLDER_CA
),
}
self
.
doc_base_map
=
{
'AFC'
:
'SF5_CN'
,
'HIL'
:
'SF5_CL'
,
}
self
.
prefix
=
'OCR'
self
.
upload_fields
=
[
"b_region"
,
"b_region_name"
,
"r_object_type"
,
"r_content_type"
,
"r_creation_date"
,
"r_creator_name"
,
"r_modify_date"
,
"r_modifier"
,
"owner"
,
"b_short_application_no"
,
"b_short_contract_no"
,
"b_customer_id"
,
"b_customer_name"
,
"b_customer_mobile"
,
"b_coborrower_id"
,
"b_coborrower_name"
,
"b_guarantor_id"
,
"b_guarantor_name"
,
"b_frontend_partner"
,
"b_dealer_code"
,
"b_dealer_name"
,
"b_input_date"
,
"b_comment"
,
"b_contract_no"
,
"b_location"
]
def
update_oauth_token
(
self
):
response
=
requests
.
post
(
self
.
oauth_url
,
headers
=
self
.
oauth_headers
,
data
=
self
.
oauth_payload
,
verify
=
False
)
...
...
@@ -46,20 +63,21 @@ class ECM:
self
.
update_oauth_token
()
return
self
.
oauth_token
def
download
(
self
,
save_path
,
doc_base
,
doc_type
,
object_id
):
download_headers
=
{
'Authorization'
:
'{0} {1}'
.
format
(
self
.
token_type
,
self
.
get_oauth_token
())
}
def
get_headers
(
self
):
return
{
'Authorization'
:
'{0} {1}'
.
format
(
self
.
token_type
,
self
.
get_oauth_token
())}
def
download
(
self
,
save_path
,
object_id
,
document_scheme
,
business_type
):
doc_type
,
_
=
self
.
doc_type_map
.
get
(
document_scheme
)
download_json
=
{
"userName"
:
self
.
username
,
"password"
:
self
.
pwd
,
"docbase"
:
doc_base
,
"docbase"
:
self
.
doc_base_map
.
get
(
business_type
)
,
"documentType"
:
doc_type
,
"objectId"
:
object_id
,
}
response
=
requests
.
post
(
self
.
download_url
,
headers
=
download_headers
,
json
=
download_json
,
verify
=
False
)
response
=
requests
.
post
(
self
.
download_url
,
headers
=
self
.
get_headers
()
,
json
=
download_json
,
verify
=
False
)
if
response
.
status_code
!=
200
:
raise
ECMException
(
'ECM download
response
with code: {0}'
.
format
(
response
.
status_code
))
raise
ECMException
(
'ECM download
failed
with code: {0}'
.
format
(
response
.
status_code
))
base64_data
=
response
.
json
()
.
get
(
'Envelope'
,
{})
.
get
(
'Body'
,
{})
.
get
(
'getResponse'
,
{})
.
get
(
'return'
,
{})
.
get
(
'DataObjects'
,
{})
.
get
(
'Contents'
,
{})
.
get
(
'Value'
)
if
not
isinstance
(
base64_data
,
str
):
...
...
@@ -67,17 +85,38 @@ class ECM:
with
open
(
save_path
,
"wb"
)
as
fh
:
fh
.
write
(
base64
.
b64decode
(
base64_data
.
encode
()))
def
upload
(
self
):
{
"username"
:
"fanliubing"
,
"password"
:
"Bmwecm123456&api1"
,
"docbase"
:
"SF5_CN"
,
"documentType"
:
"acceptance"
,
"object_name"
:
"JoeMyPDF202109061529.pdf"
,
"folder"
:
"/Wholesale/Operations/Audits"
,
"format"
:
"pdf"
,
"b_application_no"
:
""
,
def
get_doc_file_name
(
self
,
doc_name
):
if
not
isinstance
(
doc_name
,
str
):
return
self
.
prefix
if
doc_name
.
endswith
(
'.pdf'
)
or
doc_name
.
endswith
(
'.PDF'
)
or
\
doc_name
.
endswith
(
'.pdF'
)
or
doc_name
.
endswith
(
'.pDF'
)
or
doc_name
.
endswith
(
'.pDf'
)
or
\
doc_name
.
endswith
(
'.Pdf'
)
or
doc_name
.
endswith
(
'.PdF'
)
or
doc_name
.
endswith
(
'.PDf'
):
name
,
_
=
os
.
path
.
splitext
(
doc_name
)
return
'{0}{1}'
.
format
(
self
.
prefix
,
name
)
return
'{0}{1}'
.
format
(
self
.
prefix
,
doc_name
)
def
upload
(
self
,
file_path
,
doc
,
business_type
):
doc_type
,
folder
=
self
.
doc_type_map
.
get
(
doc
.
document_scheme
)
args
=
{
"username"
:
self
.
username
,
"password"
:
self
.
pwd
,
"docbase"
:
self
.
doc_base_map
.
get
(
business_type
),
"documentType"
:
doc_type
,
"object_name"
:
self
.
get_doc_file_name
(
doc
.
document_name
),
"folder"
:
folder
,
"format"
:
"xlsx"
,
"b_application_no"
:
doc
.
application_id
,
"file_base64_content"
:
""
,
}
for
key
in
self
.
upload_fields
:
args
[
key
]
=
''
with
open
(
file_path
,
'rb'
)
as
f
:
base64_data
=
base64
.
b64encode
(
f
.
read
())
# 获取解码后的base64值
file_data
=
base64_data
.
decode
()
args
[
'file_base64_content'
]
=
file_data
response
=
requests
.
post
(
self
.
upload_url
,
headers
=
self
.
get_headers
(),
json
=
args
,
verify
=
False
)
if
response
.
status_code
!=
200
:
raise
ECMException
(
'ECM upload failed with code: {0}'
.
format
(
response
.
status_code
))
if
'ns6:createResponse'
not
in
response
.
json
()
.
get
(
'S:Envelope'
,
{})
.
get
(
'S:Body'
,
{}):
raise
ECMException
(
'ECM upload failed: {0}'
.
format
(
response
.
json
()))
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment