f8904dcb by 周伟奇

fix doc list

1 parent a1a92499
......@@ -8,7 +8,7 @@ from io import BytesIO
from django.core.management import BaseCommand
from common.mixins import LoggerMixin
from common.redis_cache import redis_handler as rh
from apps.doc.models import UploadDocRecords
from apps.doc.models import UploadDocRecords, DocStatus
from settings import conf
......@@ -31,26 +31,32 @@ class Command(BaseCommand, LoggerMixin):
def signal_handler(self, sig, frame):
self.switch = False # 停止处理文件
def get_task_info(self): # TODO 优先队列 & status modify
task_id = rh.dequeue()
if task_id is None:
self.cronjob_log.info('{0} [get_task_info] [queue empty]'.format(self.log_base))
def get_doc_info(self): # TODO 优先队列
doc_id = rh.dequeue()
if doc_id is None:
self.cronjob_log.info('{0} [get_doc_info] [queue empty]'.format(self.log_base))
return
task_info = UploadDocRecords.objects.filter(id=task_id).values(
doc_info = UploadDocRecords.objects.filter(id=doc_id).values(
'id', 'metadata_version_id', 'document_name').first()
if task_info is None:
self.cronjob_log.warn('{0} [get_task_info] [task not found] [task_id={1}]'.format(self.log_base, task_id))
self.cronjob_log.info('{0} [get_task_info success] [task_info={1}]'.format(self.log_base, task_info))
return task_info
if doc_info is None:
self.cronjob_log.warn('{0} [get_doc_info] [doc not found] [doc_id={1}]'.format(self.log_base, doc_id))
return
UploadDocRecords.objects.filter(id=doc_id).update(status=DocStatus.PROCESSING.value)
self.cronjob_log.info('{0} [get_task_info success] [doc_info={1}]'.format(self.log_base, doc_info))
return doc_info
def pdf_download(self, task_info):
if task_info is None:
def pdf_download(self, doc_info):
if doc_info is None:
return
# TODO EDMS下载pdf
pdf_path = '/Users/clay/Desktop/biz/biz_logic/data/2/横版-表格-工商银行CH-B008802400.pdf'
self.cronjob_log.info('{0} [pdf download success] [task_info={1}] [pdf_path={2}]'.format(
self.log_base, task_info, pdf_path))
return pdf_path
# pdf_path = '/Users/clay/Desktop/biz/biz_logic/data/2/横版-表格-工商银行CH-B008802400.pdf'
# doc_data_path = os.path.dirname(pdf_path)
doc_id = doc_info['id']
doc_data_path = os.path.join(self.data_dir, str(doc_id))
pdf_path = os.path.join(doc_data_path, '{0}.pdf'.format(doc_id))
self.cronjob_log.info('{0} [pdf download success] [doc_info={1}] [pdf_path={2}]'.format(
self.log_base, doc_info, pdf_path))
return pdf_path, doc_data_path
@staticmethod
def getimage(pix):
......@@ -135,15 +141,15 @@ class Command(BaseCommand, LoggerMixin):
def handle(self, *args, **kwargs):
while self.switch:
# 从队列获取文件信息
task_info = self.get_task_info()
doc_info = self.get_doc_info()
# 从EDMS获取PDF文件
pdf_path = self.pdf_download(task_info)
pdf_path, doc_data_path = self.pdf_download(doc_info)
# 队列为空时的处理
if pdf_path is None:
time.sleep(10)
continue
# PDF文件提取图片
img_save_path = os.path.join(os.path.dirname(pdf_path), 'img')
img_save_path = os.path.join(doc_data_path, 'img')
os.makedirs(img_save_path, exist_ok=True)
with fitz.Document(pdf_path) as pdf:
self.cronjob_log.info('{0} [pdf_path={1}] [pdf_metadata={2}]'.format(
......@@ -159,8 +165,7 @@ class Command(BaseCommand, LoggerMixin):
page = pdf.loadPage(pno)
pm = page.getPixmap(matrix=self.trans, alpha=False)
save_path = os.path.join(img_save_path, 'page_{0}_img_0.png'.format(page.number))
# pm.writePNG(save_path)
pm.writeImage(save_path)
pm.writePNG(save_path)
else: # 提取图片
for img_count, img_il in enumerate(img_il_list):
if len(img_il) == 1: # 当只有一张图片时, 简化处理
......
......@@ -61,8 +61,10 @@ doc_list_args = {
'application_id': fields.Str(required=False, validate=validate.Length(max=64)),
'data_source': fields.Str(required=False, validate=validate.Length(max=64)),
'business_type': fields.Str(required=False, validate=validate.Length(max=64)),
'upload_finish_time': fields.Date(required=False),
'create_time': fields.Date(required=False),
'upload_time_start': fields.Date(required=False),
'upload_time_end': fields.Date(required=False),
'create_time_start': fields.Date(required=False),
'create_time_end': fields.Date(required=False),
}
upload_pdf_args = {
......@@ -133,18 +135,23 @@ class DocView(GenericView, DocHandler):
application_id = args.get('application_id')
data_source = args.get('data_source')
business_type = args.get('business_type')
upload_finish_time = args.get('upload_finish_time')
create_time = args.get('create_time')
upload_time_start = args.get('upload_time_start')
upload_time_end = args.get('upload_time_end')
create_time_start = args.get('create_time_start')
create_time_end = args.get('create_time_end')
status_query = Q(status=status) if status else Q()
application_id_query = Q(application_id=application_id) if application_id else Q()
data_source_query = Q(data_source=data_source) if data_source else Q()
business_type_query = Q(business_type=business_type) if business_type else Q()
upload_finish_time_query = Q(upload_finish_time=upload_finish_time) if upload_finish_time else Q()
create_time_query = Q(create_time=create_time) if create_time else Q()
upload_finish_time_query = Q(upload_finish_time__gte=upload_time_start, upload_finish_time__lte=upload_time_end)\
if upload_time_start and upload_time_end else Q()
create_time_query = Q(create_time__gte=create_time_start, create_time__lte=create_time_end)\
if create_time_start and create_time_end else Q()
query = status_query & application_id_query & data_source_query & business_type_query\
& upload_finish_time_query & create_time_query
doc_queryset = UploadDocRecords.objects.filter(query).values(
'id', 'application_id', 'upload_finish_time', 'create_time', 'business_type', 'data_source', 'status')
val_tuple = ('id', 'application_id', 'upload_finish_time', 'create_time',
'business_type', 'data_source', 'status')
doc_queryset = UploadDocRecords.objects.filter(query).values(*val_tuple).order_by('-upload_finish_time')
doc_list = self.get_doc_list(doc_queryset)
total = len(doc_list)
......
......@@ -41,6 +41,7 @@ INSTALLED_APPS = [
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
# 'corsheaders',
'rest_framework',
'common',
'apps.account',
......@@ -48,6 +49,7 @@ INSTALLED_APPS = [
]
MIDDLEWARE = [
# 'corsheaders.middleware.CorsMiddleware',
'django.middleware.security.SecurityMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
......@@ -166,3 +168,7 @@ JWT_AUTH = {
'JWT_VERIFY_EXPIRATION': True,
'JWT_ALLOW_REFRESH': True,
}
# 跨域设置
# CORS_ORIGIN_ALLOW_ALL = True
# CORS_ALLOW_CREDENTIALS = True
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!