6966f069 by 周伟奇

Merge branch 'feature/zip'

2 parents 7cf03ec9 6010c32f
......@@ -570,15 +570,14 @@ class UploadDocView(GenericView, DocHandler):
data_source = self.fix_data_source(data_source)
document_scheme = self.fix_scheme(document_scheme)
if document_name.endswith('.zip'):
self.running_log.info('[doc upload success] [zip file skip] [args={0}]'.format(args))
return response.ok()
# if document_name.endswith('.zip'):
# self.running_log.info('[doc upload success] [zip file skip] [args={0}]'.format(args))
# return response.ok()
if data_source == consts.DATA_SOURCE_LIST[1]:
if isinstance(document_name, str):
if document_name.endswith('-证书.pdf') or document_name.endswith('-证书'):
self.running_log.info('[doc upload success] [eapp license skip] [args={0}]'.format(args))
return response.ok()
if document_name.endswith('-证书.pdf') or document_name.endswith('-证书'):
self.running_log.info('[doc upload success] [eapp license skip] [args={0}]'.format(args))
return response.ok()
# 2. 根据业务类型分库存储
doc_class, prefix = self.get_doc_class(business_type)
......@@ -594,17 +593,24 @@ class UploadDocView(GenericView, DocHandler):
data_source=data_source,
upload_finish_time=document.get('uploadFinishTime'),
)
# 3. 选择队列进入
is_priority = PriorityApplication.objects.filter(application_id=application_id, on_off=True).exists()
is_zip = False
classify_1 = 0
# 电子合同
if data_source == consts.DATA_SOURCE_LIST[-1] and document_scheme == consts.DOC_SCHEME_LIST[1]:
for keyword, classify_1_tmp in consts.ECONTRACT_KEYWORDS_MAP.get(prefix):
if keyword in document_name:
classify_1 = classify_1_tmp
break
elif document_name.endswith('.zip') or document_name.endswith('.rar') or document_name.endswith('.ZIP') \
or document_name.endswith('.RAR'):
is_zip = True
task = consts.SPLIT_STR.join([prefix, str(doc.id), str(classify_1)])
enqueue_res = rh.enqueue([task], is_priority)
enqueue_res = rh.enqueue([task], is_priority, is_zip)
self.running_log.info('[doc upload success] [args={0}] [business_type={1}] [doc_id={2}] '
'[is_priority={3}] [enqueue_res={4}]'.format(args, prefix, doc.id,
is_priority, enqueue_res))
......@@ -669,7 +675,7 @@ class PriorityDocView(GenericView, DocHandler):
self.running_log.info(
'[priority doc success] [args={0}]'.format(args))
else:
enqueue_res = rh.enqueue(tasks_list, is_priority=True)
enqueue_res = rh.enqueue(tasks_list, is_priority=True) # TODO 可能把压缩文件放入优先队列
self.running_log.info('[priority doc success] [args={0}] [tasks_list={1}] [enqueue_res={2}]'.format(
args, tasks_list, enqueue_res))
return response.ok()
......
......@@ -35,16 +35,27 @@ class RedisHandler:
self.prefix = 'bwm_ocr'
self.common_queue_key = '{0}:common_queue'.format(self.prefix)
self.priority_queue_key = '{0}:priority_queue'.format(self.prefix)
self.zip_queue_key = '{0}:zip_queue'.format(self.prefix)
self.session_id_key = '{0}:session_id'.format(self.prefix)
self.cms_token_key = '{0}:cms_token'.format(self.prefix)
self.ecm_token_key = '{0}:ecm_token'.format(self.prefix)
self.login_limit_key = '{0}:login_limit'.format(self.prefix)
def enqueue(self, tasks, is_priority=False):
def enqueue(self, tasks, is_priority=False, is_zip=False):
# 1
key = self.priority_queue_key if is_priority else self.common_queue_key
if is_zip:
key = self.zip_queue_key
elif is_priority:
key = self.priority_queue_key
else:
key = self.common_queue_key
return self.redis.lpush(key, tasks)
def dequeue_zip(self):
# task or None
task = self.redis.rpop(self.zip_queue_key)
return task
def dequeue(self):
# task or None
task = self.redis.rpop(self.priority_queue_key)
......
import os
import re
import zipfile
import rarfile
from zipfile import ZipFile
......@@ -18,3 +22,77 @@ def write_zip_file(dir_name, zipfile_path):
src_file_path = os.path.join(root, single_file)
file_target_path = os.path.join(root_target_path, single_file)
z.write(src_file_path, file_target_path)
def get_pwd_list_from_str(doc_name):
try:
pwd_list = re.findall(r'\d{6}', doc_name)
return pwd_list
except Exception as e:
return []
def extract_zip_or_rar(file_path, extract_path, pwd_list=[]):
if file_path.endswith('.zip') or file_path.endswith('.ZIP'):
if len(pwd_list) > 0:
for password in pwd_list:
try:
with zipfile.ZipFile(file_path) as zf:
zf.extractall(extract_path, pwd=bytes(password, 'utf-8'))
except Exception as e:
continue
else:
return True
else:
return False
else:
try:
with zipfile.ZipFile(file_path) as zf:
zf.extractall(extract_path)
except Exception as e:
return False
else:
return True
elif file_path.endswith('.rar') or file_path.endswith('.RAR'):
if len(pwd_list) > 0:
for password in pwd_list:
try:
with rarfile.RarFile(file_path) as rf:
rf.extractall(extract_path, pwd=password)
except Exception as e:
continue
else:
return True
else:
return False
else:
try:
with rarfile.RarFile(file_path) as rf:
rf.extractall(extract_path)
except Exception as e:
return False
else:
return True
else:
return False
def get_file_paths(input_path, suffix_list):
"""
Args:
input_path: str 目标目录
suffix_list: list 搜索的文件的后缀列表
Returns: list 搜索到的相关文件绝对路径列表
"""
for parent, _, filenames in os.walk(input_path):
for filename in filenames:
for suffix in suffix_list:
if filename.endswith(suffix):
file_path = os.path.join(parent, filename)
break
else:
continue
yield file_path
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!