Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
周伟奇
/
bmw-ocr
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
eb2d5a51
authored
2020-07-15 19:04:11 +0800
by
周伟奇
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
add eDMS
1 parent
639ea2eb
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
67 additions
and
5 deletions
src/apps/doc/consts.py
src/apps/doc/edms.py
src/apps/doc/management/commands/doc_process.py
src/settings/conf/_default_config.py
src/settings/conf/prd.ini
src/settings/conf/sit.ini
src/settings/conf/uat.ini
src/apps/doc/consts.py
View file @
eb2d5a5
...
...
@@ -8,4 +8,8 @@ HIL_SET = {'HIL', 'HIl', 'HiL', 'Hil', 'hIL', 'hIl', 'hiL', 'hil', 'CO00002'}
HIL_PREFIX
=
'HIL'
AFC_PREFIX
=
'AFC'
SESSION_PREFIX
=
'FHLSID'
CUSTOM_CLIENT
=
'CustomClient'
FIXED_TOKEN
=
'00000000-0000-0000-0000-000000000000'
FIXED_FILE_SIZE
=
0
DOWNLOAD_ACTION_TYPE
=
'Downloaded'
...
...
src/apps/doc/edms.py
0 → 100644
View file @
eb2d5a5
import
requests
from
zeep
import
Client
from
settings
import
conf
from
.
import
consts
class
EDMS
:
def
__init__
(
self
,
user_name
,
pwd
):
self
.
sm_client
=
Client
(
wsdl
=
conf
.
SM_WSDL
)
self
.
dm_client
=
Client
(
wsdl
=
conf
.
DM_WSDL
)
self
.
df_client
=
Client
(
wsdl
=
conf
.
DF_WSDL
)
self
.
download_url
=
conf
.
EDMS_DOWNLOAD_URL
self
.
user_name
=
user_name
self
.
pwd
=
pwd
self
.
session_id
=
None
def
get_session_id
(
self
):
self
.
session_id
=
self
.
sm_client
.
service
.
StartSession
(
login
=
self
.
user_name
,
password
=
self
.
pwd
,
clientType
=
consts
.
CUSTOM_CLIENT
)
return
self
.
session_id
def
get_download_token
(
self
,
headers
,
metadata_version_id
):
with
self
.
dm_client
.
settings
(
extra_http_headers
=
headers
):
res
=
self
.
dm_client
.
service
.
PrepareSingleDocumentToDownload
(
metadataVersionId
=
metadata_version_id
,
token
=
consts
.
FIXED_TOKEN
,
fileSize
=
consts
.
FIXED_FILE_SIZE
,
actionType
=
consts
.
DOWNLOAD_ACTION_TYPE
)
return
res
.
token
def
download
(
self
,
save_path
,
metadata_version_id
):
session_id
=
self
.
get_session_id
()
headers
=
{
'Cookie'
:
'{0}={1}'
.
format
(
consts
.
SESSION_PREFIX
,
session_id
)}
token
=
self
.
get_download_token
(
headers
,
metadata_version_id
)
params
=
{
'token'
:
token
}
r
=
requests
.
get
(
self
.
download_url
,
params
=
params
,
headers
=
headers
,
stream
=
True
)
with
open
(
save_path
,
"wb"
)
as
f
:
# chunk是指定每次写入的大小,每次只写了512byte
for
chunk
in
r
.
iter_content
(
chunk_size
=
512
):
if
chunk
:
f
.
write
(
chunk
)
f
.
flush
()
src/apps/doc/management/commands/doc_process.py
View file @
eb2d5a5
...
...
@@ -17,6 +17,7 @@ from common.tools.file_tools import write_zip_file
from
apps.doc.models
import
DocStatus
,
HILDoc
,
AFCDoc
from
apps.doc
import
consts
from
settings
import
conf
from
apps.doc.edms
import
EDMS
class
Command
(
BaseCommand
,
LoggerMixin
):
...
...
@@ -39,7 +40,7 @@ class Command(BaseCommand, LoggerMixin):
'Content-Type'
:
'application/json'
}
# EDMS web_service_api
self
.
sm_client
=
Client
(
wsdl
=
conf
.
SM_WSDL
)
self
.
edms
=
EDMS
(
conf
.
EDMS_USER
,
conf
.
EDMS_PWD
)
# 优雅退出信号:15
signal
.
signal
(
signal
.
SIGTERM
,
self
.
signal_handler
)
...
...
@@ -70,9 +71,6 @@ class Command(BaseCommand, LoggerMixin):
if
doc_info
is
None
:
return
None
,
None
,
None
# TODO EDMS下载pdf
# session_id = self.sm_client.service.StartSession(login=conf.EDMS_USER,
# password=conf.EDMS_PWD,
# clientType=consts.CUSTOM_CLIENT)
doc_data_path
=
os
.
path
.
join
(
self
.
data_dir
,
business_type
,
str
(
doc_id
))
pdf_path
=
os
.
path
.
join
(
doc_data_path
,
'{0}.pdf'
.
format
(
doc_id
))
...
...
@@ -200,16 +198,20 @@ class Command(BaseCommand, LoggerMixin):
def
handle
(
self
,
*
args
,
**
kwargs
):
sleep_second
=
int
(
conf
.
SLEEP_SECOND
)
max_sleep_second
=
int
(
conf
.
MAX_SLEEP_SECOND
)
while
self
.
switch
:
# 1. 从队列获取文件信息
doc_info
,
doc_class
,
doc_id
,
business_type
=
self
.
get_doc_info
()
# 2. 从EDMS获取PDF文件
doc_data_path
,
excel_path
,
pdf_path
=
self
.
pdf_download
(
doc_id
,
doc_info
,
business_type
)
# 队列为空时的处理
if
pdf_path
is
None
:
time
.
sleep
(
sleep_second
)
sleep_second
=
min
(
max_sleep_second
,
sleep_second
+
5
)
continue
sleep_second
=
int
(
conf
.
SLEEP_SECOND
)
try
:
# 3.PDF文件提取图片
...
...
@@ -280,6 +282,7 @@ class Command(BaseCommand, LoggerMixin):
self
.
cronjob_log
.
info
(
'{0} [pdf to img success] [doc_id={1}]'
.
format
(
self
.
log_base
,
doc_id
))
write_zip_file
(
img_save_path
,
os
.
path
.
join
(
doc_data_path
,
'{0}_img.zip'
.
format
(
doc_id
)))
# 4.图片调用算法判断是否为银行流水, 图片调用算法OCR为excel文件
wb
=
xlwt
.
Workbook
()
loop
=
asyncio
.
get_event_loop
()
...
...
@@ -288,7 +291,9 @@ class Command(BaseCommand, LoggerMixin):
# loop.close()
wb
.
save
(
excel_path
)
# TODO no sheet (res always [])
# 整合excel文件
# 5.上传至EDMS
except
Exception
as
e
:
doc_class
.
objects
.
filter
(
id
=
doc_id
)
.
update
(
status
=
DocStatus
.
PROCESS_FAILED
.
value
)
self
.
cronjob_log
.
error
(
'{0} [process failed] [doc_id={1}] [err={2}]'
.
format
(
self
.
log_base
,
doc_id
,
e
))
...
...
src/settings/conf/_default_config.py
View file @
eb2d5a5
...
...
@@ -9,6 +9,8 @@ WSDL_DIR = os.path.join(os.path.dirname(BASE_DIR), 'wsdl')
SECRET_CONF_FILE
=
os
.
path
.
join
(
SECRET_CONF_DIR
,
'secret.ini'
)
LOGGING_CONFIG_FILE
=
os
.
path
.
join
(
COMMON_CONF_DIR
,
'logging.conf'
)
SM_WSDL
=
os
.
path
.
join
(
WSDL_DIR
,
'SessionManager.wsdl'
)
DM_WSDL
=
os
.
path
.
join
(
WSDL_DIR
,
'DocumentManager.wsdl'
)
DF_WSDL
=
os
.
path
.
join
(
WSDL_DIR
,
'DocumentFinder.wsdl'
)
# 文件存放根目录
LOG_DIR
=
os
.
path
.
join
(
os
.
path
.
dirname
(
BASE_DIR
),
'logs'
)
...
...
src/settings/conf/prd.ini
View file @
eb2d5a5
[settings]
DEBUG
=
False
SLEEP_SECOND
=
5
MAX_SLEEP_SECOND
=
60
\ No newline at end of file
MAX_SLEEP_SECOND
=
60
EDMS_DOWNLOAD_URL
=
https://edms-test.bmw.com/FH/FileHold/DocumentRepository/DownloadHandler.ashx
\ No newline at end of file
...
...
src/settings/conf/sit.ini
View file @
eb2d5a5
...
...
@@ -2,3 +2,5 @@
DEBUG
=
True
SLEEP_SECOND
=
5
MAX_SLEEP_SECOND
=
60
EDMS_DOWNLOAD_URL
=
https://edms-test.bmw.com/FH/FileHold/DocumentRepository/DownloadHandler.ashx
\ No newline at end of file
...
...
src/settings/conf/uat.ini
View file @
eb2d5a5
...
...
@@ -2,3 +2,5 @@
DEBUG
=
False
SLEEP_SECOND
=
5
MAX_SLEEP_SECOND
=
60
EDMS_DOWNLOAD_URL
=
https://edms-test.bmw.com/FH/FileHold/DocumentRepository/DownloadHandler.ashx
...
...
Write
Preview
Styling with
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment