f8904dcb by 周伟奇

fix doc list

1 parent a1a92499
...@@ -8,7 +8,7 @@ from io import BytesIO ...@@ -8,7 +8,7 @@ from io import BytesIO
8 from django.core.management import BaseCommand 8 from django.core.management import BaseCommand
9 from common.mixins import LoggerMixin 9 from common.mixins import LoggerMixin
10 from common.redis_cache import redis_handler as rh 10 from common.redis_cache import redis_handler as rh
11 from apps.doc.models import UploadDocRecords 11 from apps.doc.models import UploadDocRecords, DocStatus
12 from settings import conf 12 from settings import conf
13 13
14 14
...@@ -31,26 +31,32 @@ class Command(BaseCommand, LoggerMixin): ...@@ -31,26 +31,32 @@ class Command(BaseCommand, LoggerMixin):
31 def signal_handler(self, sig, frame): 31 def signal_handler(self, sig, frame):
32 self.switch = False # 停止处理文件 32 self.switch = False # 停止处理文件
33 33
34 def get_task_info(self): # TODO 优先队列 & status modify 34 def get_doc_info(self): # TODO 优先队列
35 task_id = rh.dequeue() 35 doc_id = rh.dequeue()
36 if task_id is None: 36 if doc_id is None:
37 self.cronjob_log.info('{0} [get_task_info] [queue empty]'.format(self.log_base)) 37 self.cronjob_log.info('{0} [get_doc_info] [queue empty]'.format(self.log_base))
38 return 38 return
39 task_info = UploadDocRecords.objects.filter(id=task_id).values( 39 doc_info = UploadDocRecords.objects.filter(id=doc_id).values(
40 'id', 'metadata_version_id', 'document_name').first() 40 'id', 'metadata_version_id', 'document_name').first()
41 if task_info is None: 41 if doc_info is None:
42 self.cronjob_log.warn('{0} [get_task_info] [task not found] [task_id={1}]'.format(self.log_base, task_id)) 42 self.cronjob_log.warn('{0} [get_doc_info] [doc not found] [doc_id={1}]'.format(self.log_base, doc_id))
43 self.cronjob_log.info('{0} [get_task_info success] [task_info={1}]'.format(self.log_base, task_info)) 43 return
44 return task_info 44 UploadDocRecords.objects.filter(id=doc_id).update(status=DocStatus.PROCESSING.value)
45 self.cronjob_log.info('{0} [get_task_info success] [doc_info={1}]'.format(self.log_base, doc_info))
46 return doc_info
45 47
46 def pdf_download(self, task_info): 48 def pdf_download(self, doc_info):
47 if task_info is None: 49 if doc_info is None:
48 return 50 return
49 # TODO EDMS下载pdf 51 # TODO EDMS下载pdf
50 pdf_path = '/Users/clay/Desktop/biz/biz_logic/data/2/横版-表格-工商银行CH-B008802400.pdf' 52 # pdf_path = '/Users/clay/Desktop/biz/biz_logic/data/2/横版-表格-工商银行CH-B008802400.pdf'
51 self.cronjob_log.info('{0} [pdf download success] [task_info={1}] [pdf_path={2}]'.format( 53 # doc_data_path = os.path.dirname(pdf_path)
52 self.log_base, task_info, pdf_path)) 54 doc_id = doc_info['id']
53 return pdf_path 55 doc_data_path = os.path.join(self.data_dir, str(doc_id))
56 pdf_path = os.path.join(doc_data_path, '{0}.pdf'.format(doc_id))
57 self.cronjob_log.info('{0} [pdf download success] [doc_info={1}] [pdf_path={2}]'.format(
58 self.log_base, doc_info, pdf_path))
59 return pdf_path, doc_data_path
54 60
55 @staticmethod 61 @staticmethod
56 def getimage(pix): 62 def getimage(pix):
...@@ -135,15 +141,15 @@ class Command(BaseCommand, LoggerMixin): ...@@ -135,15 +141,15 @@ class Command(BaseCommand, LoggerMixin):
135 def handle(self, *args, **kwargs): 141 def handle(self, *args, **kwargs):
136 while self.switch: 142 while self.switch:
137 # 从队列获取文件信息 143 # 从队列获取文件信息
138 task_info = self.get_task_info() 144 doc_info = self.get_doc_info()
139 # 从EDMS获取PDF文件 145 # 从EDMS获取PDF文件
140 pdf_path = self.pdf_download(task_info) 146 pdf_path, doc_data_path = self.pdf_download(doc_info)
141 # 队列为空时的处理 147 # 队列为空时的处理
142 if pdf_path is None: 148 if pdf_path is None:
143 time.sleep(10) 149 time.sleep(10)
144 continue 150 continue
145 # PDF文件提取图片 151 # PDF文件提取图片
146 img_save_path = os.path.join(os.path.dirname(pdf_path), 'img') 152 img_save_path = os.path.join(doc_data_path, 'img')
147 os.makedirs(img_save_path, exist_ok=True) 153 os.makedirs(img_save_path, exist_ok=True)
148 with fitz.Document(pdf_path) as pdf: 154 with fitz.Document(pdf_path) as pdf:
149 self.cronjob_log.info('{0} [pdf_path={1}] [pdf_metadata={2}]'.format( 155 self.cronjob_log.info('{0} [pdf_path={1}] [pdf_metadata={2}]'.format(
...@@ -159,8 +165,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -159,8 +165,7 @@ class Command(BaseCommand, LoggerMixin):
159 page = pdf.loadPage(pno) 165 page = pdf.loadPage(pno)
160 pm = page.getPixmap(matrix=self.trans, alpha=False) 166 pm = page.getPixmap(matrix=self.trans, alpha=False)
161 save_path = os.path.join(img_save_path, 'page_{0}_img_0.png'.format(page.number)) 167 save_path = os.path.join(img_save_path, 'page_{0}_img_0.png'.format(page.number))
162 # pm.writePNG(save_path) 168 pm.writePNG(save_path)
163 pm.writeImage(save_path)
164 else: # 提取图片 169 else: # 提取图片
165 for img_count, img_il in enumerate(img_il_list): 170 for img_count, img_il in enumerate(img_il_list):
166 if len(img_il) == 1: # 当只有一张图片时, 简化处理 171 if len(img_il) == 1: # 当只有一张图片时, 简化处理
......
...@@ -61,8 +61,10 @@ doc_list_args = { ...@@ -61,8 +61,10 @@ doc_list_args = {
61 'application_id': fields.Str(required=False, validate=validate.Length(max=64)), 61 'application_id': fields.Str(required=False, validate=validate.Length(max=64)),
62 'data_source': fields.Str(required=False, validate=validate.Length(max=64)), 62 'data_source': fields.Str(required=False, validate=validate.Length(max=64)),
63 'business_type': fields.Str(required=False, validate=validate.Length(max=64)), 63 'business_type': fields.Str(required=False, validate=validate.Length(max=64)),
64 'upload_finish_time': fields.Date(required=False), 64 'upload_time_start': fields.Date(required=False),
65 'create_time': fields.Date(required=False), 65 'upload_time_end': fields.Date(required=False),
66 'create_time_start': fields.Date(required=False),
67 'create_time_end': fields.Date(required=False),
66 } 68 }
67 69
68 upload_pdf_args = { 70 upload_pdf_args = {
...@@ -133,18 +135,23 @@ class DocView(GenericView, DocHandler): ...@@ -133,18 +135,23 @@ class DocView(GenericView, DocHandler):
133 application_id = args.get('application_id') 135 application_id = args.get('application_id')
134 data_source = args.get('data_source') 136 data_source = args.get('data_source')
135 business_type = args.get('business_type') 137 business_type = args.get('business_type')
136 upload_finish_time = args.get('upload_finish_time') 138 upload_time_start = args.get('upload_time_start')
137 create_time = args.get('create_time') 139 upload_time_end = args.get('upload_time_end')
140 create_time_start = args.get('create_time_start')
141 create_time_end = args.get('create_time_end')
138 status_query = Q(status=status) if status else Q() 142 status_query = Q(status=status) if status else Q()
139 application_id_query = Q(application_id=application_id) if application_id else Q() 143 application_id_query = Q(application_id=application_id) if application_id else Q()
140 data_source_query = Q(data_source=data_source) if data_source else Q() 144 data_source_query = Q(data_source=data_source) if data_source else Q()
141 business_type_query = Q(business_type=business_type) if business_type else Q() 145 business_type_query = Q(business_type=business_type) if business_type else Q()
142 upload_finish_time_query = Q(upload_finish_time=upload_finish_time) if upload_finish_time else Q() 146 upload_finish_time_query = Q(upload_finish_time__gte=upload_time_start, upload_finish_time__lte=upload_time_end)\
143 create_time_query = Q(create_time=create_time) if create_time else Q() 147 if upload_time_start and upload_time_end else Q()
148 create_time_query = Q(create_time__gte=create_time_start, create_time__lte=create_time_end)\
149 if create_time_start and create_time_end else Q()
144 query = status_query & application_id_query & data_source_query & business_type_query\ 150 query = status_query & application_id_query & data_source_query & business_type_query\
145 & upload_finish_time_query & create_time_query 151 & upload_finish_time_query & create_time_query
146 doc_queryset = UploadDocRecords.objects.filter(query).values( 152 val_tuple = ('id', 'application_id', 'upload_finish_time', 'create_time',
147 'id', 'application_id', 'upload_finish_time', 'create_time', 'business_type', 'data_source', 'status') 153 'business_type', 'data_source', 'status')
154 doc_queryset = UploadDocRecords.objects.filter(query).values(*val_tuple).order_by('-upload_finish_time')
148 doc_list = self.get_doc_list(doc_queryset) 155 doc_list = self.get_doc_list(doc_queryset)
149 156
150 total = len(doc_list) 157 total = len(doc_list)
......
...@@ -41,6 +41,7 @@ INSTALLED_APPS = [ ...@@ -41,6 +41,7 @@ INSTALLED_APPS = [
41 'django.contrib.sessions', 41 'django.contrib.sessions',
42 'django.contrib.messages', 42 'django.contrib.messages',
43 'django.contrib.staticfiles', 43 'django.contrib.staticfiles',
44 # 'corsheaders',
44 'rest_framework', 45 'rest_framework',
45 'common', 46 'common',
46 'apps.account', 47 'apps.account',
...@@ -48,6 +49,7 @@ INSTALLED_APPS = [ ...@@ -48,6 +49,7 @@ INSTALLED_APPS = [
48 ] 49 ]
49 50
50 MIDDLEWARE = [ 51 MIDDLEWARE = [
52 # 'corsheaders.middleware.CorsMiddleware',
51 'django.middleware.security.SecurityMiddleware', 53 'django.middleware.security.SecurityMiddleware',
52 'django.contrib.sessions.middleware.SessionMiddleware', 54 'django.contrib.sessions.middleware.SessionMiddleware',
53 'django.middleware.common.CommonMiddleware', 55 'django.middleware.common.CommonMiddleware',
...@@ -166,3 +168,7 @@ JWT_AUTH = { ...@@ -166,3 +168,7 @@ JWT_AUTH = {
166 'JWT_VERIFY_EXPIRATION': True, 168 'JWT_VERIFY_EXPIRATION': True,
167 'JWT_ALLOW_REFRESH': True, 169 'JWT_ALLOW_REFRESH': True,
168 } 170 }
171
172 # 跨域设置
173 # CORS_ORIGIN_ALLOW_ALL = True
174 # CORS_ALLOW_CREDENTIALS = True
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!