modify max sleep time
Showing
1 changed file
with
14 additions
and
12 deletions
... | @@ -184,9 +184,9 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -184,9 +184,9 @@ class Command(BaseCommand, LoggerMixin): |
184 | start = i + 1 | 184 | start = i + 1 |
185 | return img_il_list | 185 | return img_il_list |
186 | 186 | ||
187 | def handle(self, *args, **kwargs): | 187 | def handle(self, *args, **kwargs): # TODO 调用接口重试 |
188 | sleep_second = 5 | 188 | sleep_second = 5 |
189 | max_sleep_second = 300 | 189 | max_sleep_second = 60 |
190 | while self.switch: | 190 | while self.switch: |
191 | # 从队列获取文件信息 | 191 | # 从队列获取文件信息 |
192 | doc_info = self.get_doc_info() | 192 | doc_info = self.get_doc_info() |
... | @@ -206,7 +206,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -206,7 +206,7 @@ class Command(BaseCommand, LoggerMixin): |
206 | with fitz.Document(pdf_path) as pdf: | 206 | with fitz.Document(pdf_path) as pdf: |
207 | self.cronjob_log.info('{0} [pdf_path={1}] [metadata={2}]'.format( | 207 | self.cronjob_log.info('{0} [pdf_path={1}] [metadata={2}]'.format( |
208 | self.log_base, pdf_path, pdf.metadata)) | 208 | self.log_base, pdf_path, pdf.metadata)) |
209 | # xref_list = [] # TODO 图片去重 | 209 | # xref_list = [] # TODO 图片去重 特殊pdf:如电子发票 |
210 | for pno in range(pdf.pageCount): | 210 | for pno in range(pdf.pageCount): |
211 | il = pdf.getPageImageList(pno) | 211 | il = pdf.getPageImageList(pno) |
212 | il.sort(key=lambda x: x[0]) | 212 | il.sort(key=lambda x: x[0]) |
... | @@ -219,8 +219,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -219,8 +219,8 @@ class Command(BaseCommand, LoggerMixin): |
219 | save_path = os.path.join(img_save_path, 'page_{0}_img_0.png'.format(page.number)) | 219 | save_path = os.path.join(img_save_path, 'page_{0}_img_0.png'.format(page.number)) |
220 | pm.writePNG(save_path) | 220 | pm.writePNG(save_path) |
221 | img_path_list.append(save_path) | 221 | img_path_list.append(save_path) |
222 | self.cronjob_log.info('{0} [page to img success] [pdf_path={1}] [page={2}]'.format( | 222 | self.cronjob_log.info('{0} [page to img success] [doc_id={1}] [pdf_path={2}] ' |
223 | self.log_base, pdf_path, page.number)) | 223 | '[page={3}]'.format(self.log_base, doc_id, pdf_path, page.number)) |
224 | else: # 提取图片 | 224 | else: # 提取图片 |
225 | for img_index, img_il in enumerate(img_il_list): | 225 | for img_index, img_il in enumerate(img_il_list): |
226 | if len(img_il) == 1: # 当只有一张图片时, 简化处理 | 226 | if len(img_il) == 1: # 当只有一张图片时, 简化处理 |
... | @@ -232,8 +232,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -232,8 +232,8 @@ class Command(BaseCommand, LoggerMixin): |
232 | f.write(img_data) | 232 | f.write(img_data) |
233 | img_path_list.append(save_path) | 233 | img_path_list.append(save_path) |
234 | self.cronjob_log.info( | 234 | self.cronjob_log.info( |
235 | '{0} [extract img success] [pdf_path={1}] [page={2}] [img_index={3}]'.format( | 235 | '{0} [extract img success] [doc_id={1}] [pdf_path={2}] [page={3}] ' |
236 | self.log_base, pdf_path, pno, img_index)) | 236 | '[img_index={4}]'.format(self.log_base, doc_id, pdf_path, pno, img_index)) |
237 | else: # 多张图片,竖向拼接 | 237 | else: # 多张图片,竖向拼接 |
238 | height_sum = 0 | 238 | height_sum = 0 |
239 | im_list = [] | 239 | im_list = [] |
... | @@ -262,9 +262,9 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -262,9 +262,9 @@ class Command(BaseCommand, LoggerMixin): |
262 | res.save(save_path) | 262 | res.save(save_path) |
263 | img_path_list.append(save_path) | 263 | img_path_list.append(save_path) |
264 | self.cronjob_log.info( | 264 | self.cronjob_log.info( |
265 | '{0} [extract img success] [pdf_path={1}] [page={2}] [img_index={3}]'.format( | 265 | '{0} [extract img success] [doc_id={1}] [pdf_path={2}] [page={3}] ' |
266 | self.log_base, pdf_path, pno, img_index)) | 266 | '[img_index={4}]'.format(self.log_base, doc_id, pdf_path, pno, img_index)) |
267 | self.cronjob_log.info('{0} [pdf to img success]'.format(self.log_base)) | 267 | self.cronjob_log.info('{0} [pdf to img success] [doc_id={1}]'.format(self.log_base, doc_id)) |
268 | 268 | ||
269 | write_zip_file(img_save_path, os.path.join(doc_data_path, '{0}_img.zip'.format(doc_id))) | 269 | write_zip_file(img_save_path, os.path.join(doc_data_path, '{0}_img.zip'.format(doc_id))) |
270 | # 图片调用算法判断是否为银行流水, 图片调用算法OCR为excel文件 | 270 | # 图片调用算法判断是否为银行流水, 图片调用算法OCR为excel文件 |
... | @@ -273,11 +273,13 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -273,11 +273,13 @@ class Command(BaseCommand, LoggerMixin): |
273 | tasks = [self.img_ocr_excel(wb, img_path) for img_path in img_path_list] | 273 | tasks = [self.img_ocr_excel(wb, img_path) for img_path in img_path_list] |
274 | loop.run_until_complete(asyncio.wait(tasks)) | 274 | loop.run_until_complete(asyncio.wait(tasks)) |
275 | # loop.close() | 275 | # loop.close() |
276 | wb.save(excel_path) | 276 | wb.save(excel_path) # TODO no sheet (res always []) |
277 | # 整合excel文件上传至EDMS | 277 | # 整合excel文件上传至EDMS |
278 | except Exception as e: | 278 | except Exception as e: |
279 | UploadDocRecords.objects.filter(id=doc_id).update(status=DocStatus.PROCESS_FAILED.value) | 279 | UploadDocRecords.objects.filter(id=doc_id).update(status=DocStatus.PROCESS_FAILED.value) |
280 | self.cronjob_log.error('{0} [process failed] [err={1}]'.format(self.log_base, e)) | 280 | self.cronjob_log.error('{0} [process failed] [doc_id={1}] [err={2}]'.format(self.log_base, doc_id, e)) |
281 | else: | 281 | else: |
282 | UploadDocRecords.objects.filter(id=doc_id).update(status=DocStatus.COMPLETE.value) | 282 | UploadDocRecords.objects.filter(id=doc_id).update(status=DocStatus.COMPLETE.value) |
283 | self.cronjob_log.info('{0} [doc process complete] [doc_id={1}]'.format(self.log_base, doc_id)) | 283 | self.cronjob_log.info('{0} [doc process complete] [doc_id={1}]'.format(self.log_base, doc_id)) |
284 | |||
285 | self.cronjob_log.info('{0} [stop safely]') | ... | ... |
-
Please register or sign in to post a comment