eb2d5a51 by 周伟奇

add eDMS

1 parent 639ea2eb
......@@ -8,4 +8,8 @@ HIL_SET = {'HIL', 'HIl', 'HiL', 'Hil', 'hIL', 'hIl', 'hiL', 'hil', 'CO00002'}
HIL_PREFIX = 'HIL'
AFC_PREFIX = 'AFC'
SESSION_PREFIX = 'FHLSID'
CUSTOM_CLIENT = 'CustomClient'
FIXED_TOKEN = '00000000-0000-0000-0000-000000000000'
FIXED_FILE_SIZE = 0
DOWNLOAD_ACTION_TYPE = 'Downloaded'
......
import requests
from zeep import Client
from settings import conf
from . import consts
class EDMS:
def __init__(self, user_name, pwd):
self.sm_client = Client(wsdl=conf.SM_WSDL)
self.dm_client = Client(wsdl=conf.DM_WSDL)
self.df_client = Client(wsdl=conf.DF_WSDL)
self.download_url = conf.EDMS_DOWNLOAD_URL
self.user_name = user_name
self.pwd = pwd
self.session_id = None
def get_session_id(self):
self.session_id = self.sm_client.service.StartSession(login=self.user_name,
password=self.pwd,
clientType=consts.CUSTOM_CLIENT)
return self.session_id
def get_download_token(self, headers, metadata_version_id):
with self.dm_client.settings(extra_http_headers=headers):
res = self.dm_client.service.PrepareSingleDocumentToDownload(metadataVersionId=metadata_version_id,
token=consts.FIXED_TOKEN,
fileSize=consts.FIXED_FILE_SIZE,
actionType=consts.DOWNLOAD_ACTION_TYPE)
return res.token
def download(self, save_path, metadata_version_id):
session_id = self.get_session_id()
headers = {'Cookie': '{0}={1}'.format(consts.SESSION_PREFIX, session_id)}
token = self.get_download_token(headers, metadata_version_id)
params = {'token': token}
r = requests.get(self.download_url, params=params, headers=headers, stream=True)
with open(save_path, "wb") as f:
# chunk是指定每次写入的大小,每次只写了512byte
for chunk in r.iter_content(chunk_size=512):
if chunk:
f.write(chunk)
f.flush()
......@@ -17,6 +17,7 @@ from common.tools.file_tools import write_zip_file
from apps.doc.models import DocStatus, HILDoc, AFCDoc
from apps.doc import consts
from settings import conf
from apps.doc.edms import EDMS
class Command(BaseCommand, LoggerMixin):
......@@ -39,7 +40,7 @@ class Command(BaseCommand, LoggerMixin):
'Content-Type': 'application/json'
}
# EDMS web_service_api
self.sm_client = Client(wsdl=conf.SM_WSDL)
self.edms = EDMS(conf.EDMS_USER, conf.EDMS_PWD)
# 优雅退出信号:15
signal.signal(signal.SIGTERM, self.signal_handler)
......@@ -70,9 +71,6 @@ class Command(BaseCommand, LoggerMixin):
if doc_info is None:
return None, None, None
# TODO EDMS下载pdf
# session_id = self.sm_client.service.StartSession(login=conf.EDMS_USER,
# password=conf.EDMS_PWD,
# clientType=consts.CUSTOM_CLIENT)
doc_data_path = os.path.join(self.data_dir, business_type, str(doc_id))
pdf_path = os.path.join(doc_data_path, '{0}.pdf'.format(doc_id))
......@@ -200,16 +198,20 @@ class Command(BaseCommand, LoggerMixin):
def handle(self, *args, **kwargs):
sleep_second = int(conf.SLEEP_SECOND)
max_sleep_second = int(conf.MAX_SLEEP_SECOND)
while self.switch:
# 1. 从队列获取文件信息
doc_info, doc_class, doc_id, business_type = self.get_doc_info()
# 2. 从EDMS获取PDF文件
doc_data_path, excel_path, pdf_path = self.pdf_download(doc_id, doc_info, business_type)
# 队列为空时的处理
if pdf_path is None:
time.sleep(sleep_second)
sleep_second = min(max_sleep_second, sleep_second+5)
continue
sleep_second = int(conf.SLEEP_SECOND)
try:
# 3.PDF文件提取图片
......@@ -280,6 +282,7 @@ class Command(BaseCommand, LoggerMixin):
self.cronjob_log.info('{0} [pdf to img success] [doc_id={1}]'.format(self.log_base, doc_id))
write_zip_file(img_save_path, os.path.join(doc_data_path, '{0}_img.zip'.format(doc_id)))
# 4.图片调用算法判断是否为银行流水, 图片调用算法OCR为excel文件
wb = xlwt.Workbook()
loop = asyncio.get_event_loop()
......@@ -288,7 +291,9 @@ class Command(BaseCommand, LoggerMixin):
# loop.close()
wb.save(excel_path) # TODO no sheet (res always [])
# 整合excel文件
# 5.上传至EDMS
except Exception as e:
doc_class.objects.filter(id=doc_id).update(status=DocStatus.PROCESS_FAILED.value)
self.cronjob_log.error('{0} [process failed] [doc_id={1}] [err={2}]'.format(self.log_base, doc_id, e))
......
......@@ -9,6 +9,8 @@ WSDL_DIR = os.path.join(os.path.dirname(BASE_DIR), 'wsdl')
SECRET_CONF_FILE = os.path.join(SECRET_CONF_DIR, 'secret.ini')
LOGGING_CONFIG_FILE = os.path.join(COMMON_CONF_DIR, 'logging.conf')
SM_WSDL = os.path.join(WSDL_DIR, 'SessionManager.wsdl')
DM_WSDL = os.path.join(WSDL_DIR, 'DocumentManager.wsdl')
DF_WSDL = os.path.join(WSDL_DIR, 'DocumentFinder.wsdl')
# 文件存放根目录
LOG_DIR = os.path.join(os.path.dirname(BASE_DIR), 'logs')
......
[settings]
DEBUG = False
SLEEP_SECOND = 5
MAX_SLEEP_SECOND = 60
\ No newline at end of file
MAX_SLEEP_SECOND = 60
EDMS_DOWNLOAD_URL = https://edms-test.bmw.com/FH/FileHold/DocumentRepository/DownloadHandler.ashx
\ No newline at end of file
......
......@@ -2,3 +2,5 @@
DEBUG = True
SLEEP_SECOND = 5
MAX_SLEEP_SECOND = 60
EDMS_DOWNLOAD_URL = https://edms-test.bmw.com/FH/FileHold/DocumentRepository/DownloadHandler.ashx
\ No newline at end of file
......
......@@ -2,3 +2,5 @@
DEBUG = False
SLEEP_SECOND = 5
MAX_SLEEP_SECOND = 60
EDMS_DOWNLOAD_URL = https://edms-test.bmw.com/FH/FileHold/DocumentRepository/DownloadHandler.ashx
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!