fix doc list
Showing
3 changed files
with
47 additions
and
29 deletions
... | @@ -8,7 +8,7 @@ from io import BytesIO | ... | @@ -8,7 +8,7 @@ from io import BytesIO |
8 | from django.core.management import BaseCommand | 8 | from django.core.management import BaseCommand |
9 | from common.mixins import LoggerMixin | 9 | from common.mixins import LoggerMixin |
10 | from common.redis_cache import redis_handler as rh | 10 | from common.redis_cache import redis_handler as rh |
11 | from apps.doc.models import UploadDocRecords | 11 | from apps.doc.models import UploadDocRecords, DocStatus |
12 | from settings import conf | 12 | from settings import conf |
13 | 13 | ||
14 | 14 | ||
... | @@ -31,26 +31,32 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -31,26 +31,32 @@ class Command(BaseCommand, LoggerMixin): |
31 | def signal_handler(self, sig, frame): | 31 | def signal_handler(self, sig, frame): |
32 | self.switch = False # 停止处理文件 | 32 | self.switch = False # 停止处理文件 |
33 | 33 | ||
34 | def get_task_info(self): # TODO 优先队列 & status modify | 34 | def get_doc_info(self): # TODO 优先队列 |
35 | task_id = rh.dequeue() | 35 | doc_id = rh.dequeue() |
36 | if task_id is None: | 36 | if doc_id is None: |
37 | self.cronjob_log.info('{0} [get_task_info] [queue empty]'.format(self.log_base)) | 37 | self.cronjob_log.info('{0} [get_doc_info] [queue empty]'.format(self.log_base)) |
38 | return | 38 | return |
39 | task_info = UploadDocRecords.objects.filter(id=task_id).values( | 39 | doc_info = UploadDocRecords.objects.filter(id=doc_id).values( |
40 | 'id', 'metadata_version_id', 'document_name').first() | 40 | 'id', 'metadata_version_id', 'document_name').first() |
41 | if task_info is None: | 41 | if doc_info is None: |
42 | self.cronjob_log.warn('{0} [get_task_info] [task not found] [task_id={1}]'.format(self.log_base, task_id)) | 42 | self.cronjob_log.warn('{0} [get_doc_info] [doc not found] [doc_id={1}]'.format(self.log_base, doc_id)) |
43 | self.cronjob_log.info('{0} [get_task_info success] [task_info={1}]'.format(self.log_base, task_info)) | 43 | return |
44 | return task_info | 44 | UploadDocRecords.objects.filter(id=doc_id).update(status=DocStatus.PROCESSING.value) |
45 | self.cronjob_log.info('{0} [get_task_info success] [doc_info={1}]'.format(self.log_base, doc_info)) | ||
46 | return doc_info | ||
45 | 47 | ||
46 | def pdf_download(self, task_info): | 48 | def pdf_download(self, doc_info): |
47 | if task_info is None: | 49 | if doc_info is None: |
48 | return | 50 | return |
49 | # TODO EDMS下载pdf | 51 | # TODO EDMS下载pdf |
50 | pdf_path = '/Users/clay/Desktop/biz/biz_logic/data/2/横版-表格-工商银行CH-B008802400.pdf' | 52 | # pdf_path = '/Users/clay/Desktop/biz/biz_logic/data/2/横版-表格-工商银行CH-B008802400.pdf' |
51 | self.cronjob_log.info('{0} [pdf download success] [task_info={1}] [pdf_path={2}]'.format( | 53 | # doc_data_path = os.path.dirname(pdf_path) |
52 | self.log_base, task_info, pdf_path)) | 54 | doc_id = doc_info['id'] |
53 | return pdf_path | 55 | doc_data_path = os.path.join(self.data_dir, str(doc_id)) |
56 | pdf_path = os.path.join(doc_data_path, '{0}.pdf'.format(doc_id)) | ||
57 | self.cronjob_log.info('{0} [pdf download success] [doc_info={1}] [pdf_path={2}]'.format( | ||
58 | self.log_base, doc_info, pdf_path)) | ||
59 | return pdf_path, doc_data_path | ||
54 | 60 | ||
55 | @staticmethod | 61 | @staticmethod |
56 | def getimage(pix): | 62 | def getimage(pix): |
... | @@ -135,15 +141,15 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -135,15 +141,15 @@ class Command(BaseCommand, LoggerMixin): |
135 | def handle(self, *args, **kwargs): | 141 | def handle(self, *args, **kwargs): |
136 | while self.switch: | 142 | while self.switch: |
137 | # 从队列获取文件信息 | 143 | # 从队列获取文件信息 |
138 | task_info = self.get_task_info() | 144 | doc_info = self.get_doc_info() |
139 | # 从EDMS获取PDF文件 | 145 | # 从EDMS获取PDF文件 |
140 | pdf_path = self.pdf_download(task_info) | 146 | pdf_path, doc_data_path = self.pdf_download(doc_info) |
141 | # 队列为空时的处理 | 147 | # 队列为空时的处理 |
142 | if pdf_path is None: | 148 | if pdf_path is None: |
143 | time.sleep(10) | 149 | time.sleep(10) |
144 | continue | 150 | continue |
145 | # PDF文件提取图片 | 151 | # PDF文件提取图片 |
146 | img_save_path = os.path.join(os.path.dirname(pdf_path), 'img') | 152 | img_save_path = os.path.join(doc_data_path, 'img') |
147 | os.makedirs(img_save_path, exist_ok=True) | 153 | os.makedirs(img_save_path, exist_ok=True) |
148 | with fitz.Document(pdf_path) as pdf: | 154 | with fitz.Document(pdf_path) as pdf: |
149 | self.cronjob_log.info('{0} [pdf_path={1}] [pdf_metadata={2}]'.format( | 155 | self.cronjob_log.info('{0} [pdf_path={1}] [pdf_metadata={2}]'.format( |
... | @@ -159,8 +165,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -159,8 +165,7 @@ class Command(BaseCommand, LoggerMixin): |
159 | page = pdf.loadPage(pno) | 165 | page = pdf.loadPage(pno) |
160 | pm = page.getPixmap(matrix=self.trans, alpha=False) | 166 | pm = page.getPixmap(matrix=self.trans, alpha=False) |
161 | save_path = os.path.join(img_save_path, 'page_{0}_img_0.png'.format(page.number)) | 167 | save_path = os.path.join(img_save_path, 'page_{0}_img_0.png'.format(page.number)) |
162 | # pm.writePNG(save_path) | 168 | pm.writePNG(save_path) |
163 | pm.writeImage(save_path) | ||
164 | else: # 提取图片 | 169 | else: # 提取图片 |
165 | for img_count, img_il in enumerate(img_il_list): | 170 | for img_count, img_il in enumerate(img_il_list): |
166 | if len(img_il) == 1: # 当只有一张图片时, 简化处理 | 171 | if len(img_il) == 1: # 当只有一张图片时, 简化处理 | ... | ... |
... | @@ -61,8 +61,10 @@ doc_list_args = { | ... | @@ -61,8 +61,10 @@ doc_list_args = { |
61 | 'application_id': fields.Str(required=False, validate=validate.Length(max=64)), | 61 | 'application_id': fields.Str(required=False, validate=validate.Length(max=64)), |
62 | 'data_source': fields.Str(required=False, validate=validate.Length(max=64)), | 62 | 'data_source': fields.Str(required=False, validate=validate.Length(max=64)), |
63 | 'business_type': fields.Str(required=False, validate=validate.Length(max=64)), | 63 | 'business_type': fields.Str(required=False, validate=validate.Length(max=64)), |
64 | 'upload_finish_time': fields.Date(required=False), | 64 | 'upload_time_start': fields.Date(required=False), |
65 | 'create_time': fields.Date(required=False), | 65 | 'upload_time_end': fields.Date(required=False), |
66 | 'create_time_start': fields.Date(required=False), | ||
67 | 'create_time_end': fields.Date(required=False), | ||
66 | } | 68 | } |
67 | 69 | ||
68 | upload_pdf_args = { | 70 | upload_pdf_args = { |
... | @@ -133,18 +135,23 @@ class DocView(GenericView, DocHandler): | ... | @@ -133,18 +135,23 @@ class DocView(GenericView, DocHandler): |
133 | application_id = args.get('application_id') | 135 | application_id = args.get('application_id') |
134 | data_source = args.get('data_source') | 136 | data_source = args.get('data_source') |
135 | business_type = args.get('business_type') | 137 | business_type = args.get('business_type') |
136 | upload_finish_time = args.get('upload_finish_time') | 138 | upload_time_start = args.get('upload_time_start') |
137 | create_time = args.get('create_time') | 139 | upload_time_end = args.get('upload_time_end') |
140 | create_time_start = args.get('create_time_start') | ||
141 | create_time_end = args.get('create_time_end') | ||
138 | status_query = Q(status=status) if status else Q() | 142 | status_query = Q(status=status) if status else Q() |
139 | application_id_query = Q(application_id=application_id) if application_id else Q() | 143 | application_id_query = Q(application_id=application_id) if application_id else Q() |
140 | data_source_query = Q(data_source=data_source) if data_source else Q() | 144 | data_source_query = Q(data_source=data_source) if data_source else Q() |
141 | business_type_query = Q(business_type=business_type) if business_type else Q() | 145 | business_type_query = Q(business_type=business_type) if business_type else Q() |
142 | upload_finish_time_query = Q(upload_finish_time=upload_finish_time) if upload_finish_time else Q() | 146 | upload_finish_time_query = Q(upload_finish_time__gte=upload_time_start, upload_finish_time__lte=upload_time_end)\ |
143 | create_time_query = Q(create_time=create_time) if create_time else Q() | 147 | if upload_time_start and upload_time_end else Q() |
148 | create_time_query = Q(create_time__gte=create_time_start, create_time__lte=create_time_end)\ | ||
149 | if create_time_start and create_time_end else Q() | ||
144 | query = status_query & application_id_query & data_source_query & business_type_query\ | 150 | query = status_query & application_id_query & data_source_query & business_type_query\ |
145 | & upload_finish_time_query & create_time_query | 151 | & upload_finish_time_query & create_time_query |
146 | doc_queryset = UploadDocRecords.objects.filter(query).values( | 152 | val_tuple = ('id', 'application_id', 'upload_finish_time', 'create_time', |
147 | 'id', 'application_id', 'upload_finish_time', 'create_time', 'business_type', 'data_source', 'status') | 153 | 'business_type', 'data_source', 'status') |
154 | doc_queryset = UploadDocRecords.objects.filter(query).values(*val_tuple).order_by('-upload_finish_time') | ||
148 | doc_list = self.get_doc_list(doc_queryset) | 155 | doc_list = self.get_doc_list(doc_queryset) |
149 | 156 | ||
150 | total = len(doc_list) | 157 | total = len(doc_list) | ... | ... |
... | @@ -41,6 +41,7 @@ INSTALLED_APPS = [ | ... | @@ -41,6 +41,7 @@ INSTALLED_APPS = [ |
41 | 'django.contrib.sessions', | 41 | 'django.contrib.sessions', |
42 | 'django.contrib.messages', | 42 | 'django.contrib.messages', |
43 | 'django.contrib.staticfiles', | 43 | 'django.contrib.staticfiles', |
44 | # 'corsheaders', | ||
44 | 'rest_framework', | 45 | 'rest_framework', |
45 | 'common', | 46 | 'common', |
46 | 'apps.account', | 47 | 'apps.account', |
... | @@ -48,6 +49,7 @@ INSTALLED_APPS = [ | ... | @@ -48,6 +49,7 @@ INSTALLED_APPS = [ |
48 | ] | 49 | ] |
49 | 50 | ||
50 | MIDDLEWARE = [ | 51 | MIDDLEWARE = [ |
52 | # 'corsheaders.middleware.CorsMiddleware', | ||
51 | 'django.middleware.security.SecurityMiddleware', | 53 | 'django.middleware.security.SecurityMiddleware', |
52 | 'django.contrib.sessions.middleware.SessionMiddleware', | 54 | 'django.contrib.sessions.middleware.SessionMiddleware', |
53 | 'django.middleware.common.CommonMiddleware', | 55 | 'django.middleware.common.CommonMiddleware', |
... | @@ -166,3 +168,7 @@ JWT_AUTH = { | ... | @@ -166,3 +168,7 @@ JWT_AUTH = { |
166 | 'JWT_VERIFY_EXPIRATION': True, | 168 | 'JWT_VERIFY_EXPIRATION': True, |
167 | 'JWT_ALLOW_REFRESH': True, | 169 | 'JWT_ALLOW_REFRESH': True, |
168 | } | 170 | } |
171 | |||
172 | # 跨域设置 | ||
173 | # CORS_ORIGIN_ALLOW_ALL = True | ||
174 | # CORS_ALLOW_CREDENTIALS = True | ... | ... |
-
Please register or sign in to post a comment