e325cfc3 by 周伟奇

modify max sleep time

1 parent d024de62
...@@ -184,9 +184,9 @@ class Command(BaseCommand, LoggerMixin): ...@@ -184,9 +184,9 @@ class Command(BaseCommand, LoggerMixin):
184 start = i + 1 184 start = i + 1
185 return img_il_list 185 return img_il_list
186 186
187 def handle(self, *args, **kwargs): 187 def handle(self, *args, **kwargs): # TODO 调用接口重试
188 sleep_second = 5 188 sleep_second = 5
189 max_sleep_second = 300 189 max_sleep_second = 60
190 while self.switch: 190 while self.switch:
191 # 从队列获取文件信息 191 # 从队列获取文件信息
192 doc_info = self.get_doc_info() 192 doc_info = self.get_doc_info()
...@@ -206,7 +206,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -206,7 +206,7 @@ class Command(BaseCommand, LoggerMixin):
206 with fitz.Document(pdf_path) as pdf: 206 with fitz.Document(pdf_path) as pdf:
207 self.cronjob_log.info('{0} [pdf_path={1}] [metadata={2}]'.format( 207 self.cronjob_log.info('{0} [pdf_path={1}] [metadata={2}]'.format(
208 self.log_base, pdf_path, pdf.metadata)) 208 self.log_base, pdf_path, pdf.metadata))
209 # xref_list = [] # TODO 图片去重 209 # xref_list = [] # TODO 图片去重 特殊pdf:如电子发票
210 for pno in range(pdf.pageCount): 210 for pno in range(pdf.pageCount):
211 il = pdf.getPageImageList(pno) 211 il = pdf.getPageImageList(pno)
212 il.sort(key=lambda x: x[0]) 212 il.sort(key=lambda x: x[0])
...@@ -219,8 +219,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -219,8 +219,8 @@ class Command(BaseCommand, LoggerMixin):
219 save_path = os.path.join(img_save_path, 'page_{0}_img_0.png'.format(page.number)) 219 save_path = os.path.join(img_save_path, 'page_{0}_img_0.png'.format(page.number))
220 pm.writePNG(save_path) 220 pm.writePNG(save_path)
221 img_path_list.append(save_path) 221 img_path_list.append(save_path)
222 self.cronjob_log.info('{0} [page to img success] [pdf_path={1}] [page={2}]'.format( 222 self.cronjob_log.info('{0} [page to img success] [doc_id={1}] [pdf_path={2}] '
223 self.log_base, pdf_path, page.number)) 223 '[page={3}]'.format(self.log_base, doc_id, pdf_path, page.number))
224 else: # 提取图片 224 else: # 提取图片
225 for img_index, img_il in enumerate(img_il_list): 225 for img_index, img_il in enumerate(img_il_list):
226 if len(img_il) == 1: # 当只有一张图片时, 简化处理 226 if len(img_il) == 1: # 当只有一张图片时, 简化处理
...@@ -232,8 +232,8 @@ class Command(BaseCommand, LoggerMixin): ...@@ -232,8 +232,8 @@ class Command(BaseCommand, LoggerMixin):
232 f.write(img_data) 232 f.write(img_data)
233 img_path_list.append(save_path) 233 img_path_list.append(save_path)
234 self.cronjob_log.info( 234 self.cronjob_log.info(
235 '{0} [extract img success] [pdf_path={1}] [page={2}] [img_index={3}]'.format( 235 '{0} [extract img success] [doc_id={1}] [pdf_path={2}] [page={3}] '
236 self.log_base, pdf_path, pno, img_index)) 236 '[img_index={4}]'.format(self.log_base, doc_id, pdf_path, pno, img_index))
237 else: # 多张图片,竖向拼接 237 else: # 多张图片,竖向拼接
238 height_sum = 0 238 height_sum = 0
239 im_list = [] 239 im_list = []
...@@ -262,9 +262,9 @@ class Command(BaseCommand, LoggerMixin): ...@@ -262,9 +262,9 @@ class Command(BaseCommand, LoggerMixin):
262 res.save(save_path) 262 res.save(save_path)
263 img_path_list.append(save_path) 263 img_path_list.append(save_path)
264 self.cronjob_log.info( 264 self.cronjob_log.info(
265 '{0} [extract img success] [pdf_path={1}] [page={2}] [img_index={3}]'.format( 265 '{0} [extract img success] [doc_id={1}] [pdf_path={2}] [page={3}] '
266 self.log_base, pdf_path, pno, img_index)) 266 '[img_index={4}]'.format(self.log_base, doc_id, pdf_path, pno, img_index))
267 self.cronjob_log.info('{0} [pdf to img success]'.format(self.log_base)) 267 self.cronjob_log.info('{0} [pdf to img success] [doc_id={1}]'.format(self.log_base, doc_id))
268 268
269 write_zip_file(img_save_path, os.path.join(doc_data_path, '{0}_img.zip'.format(doc_id))) 269 write_zip_file(img_save_path, os.path.join(doc_data_path, '{0}_img.zip'.format(doc_id)))
270 # 图片调用算法判断是否为银行流水, 图片调用算法OCR为excel文件 270 # 图片调用算法判断是否为银行流水, 图片调用算法OCR为excel文件
...@@ -273,11 +273,13 @@ class Command(BaseCommand, LoggerMixin): ...@@ -273,11 +273,13 @@ class Command(BaseCommand, LoggerMixin):
273 tasks = [self.img_ocr_excel(wb, img_path) for img_path in img_path_list] 273 tasks = [self.img_ocr_excel(wb, img_path) for img_path in img_path_list]
274 loop.run_until_complete(asyncio.wait(tasks)) 274 loop.run_until_complete(asyncio.wait(tasks))
275 # loop.close() 275 # loop.close()
276 wb.save(excel_path) 276 wb.save(excel_path) # TODO no sheet (res always [])
277 # 整合excel文件上传至EDMS 277 # 整合excel文件上传至EDMS
278 except Exception as e: 278 except Exception as e:
279 UploadDocRecords.objects.filter(id=doc_id).update(status=DocStatus.PROCESS_FAILED.value) 279 UploadDocRecords.objects.filter(id=doc_id).update(status=DocStatus.PROCESS_FAILED.value)
280 self.cronjob_log.error('{0} [process failed] [err={1}]'.format(self.log_base, e)) 280 self.cronjob_log.error('{0} [process failed] [doc_id={1}] [err={2}]'.format(self.log_base, doc_id, e))
281 else: 281 else:
282 UploadDocRecords.objects.filter(id=doc_id).update(status=DocStatus.COMPLETE.value) 282 UploadDocRecords.objects.filter(id=doc_id).update(status=DocStatus.COMPLETE.value)
283 self.cronjob_log.info('{0} [doc process complete] [doc_id={1}]'.format(self.log_base, doc_id)) 283 self.cronjob_log.info('{0} [doc process complete] [doc_id={1}]'.format(self.log_base, doc_id))
284
285 self.cronjob_log.info('{0} [stop safely]')
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!