e325cfc3 by 周伟奇

modify max sleep time

1 parent d024de62
......@@ -184,9 +184,9 @@ class Command(BaseCommand, LoggerMixin):
start = i + 1
return img_il_list
def handle(self, *args, **kwargs):
def handle(self, *args, **kwargs): # TODO 调用接口重试
sleep_second = 5
max_sleep_second = 300
max_sleep_second = 60
while self.switch:
# 从队列获取文件信息
doc_info = self.get_doc_info()
......@@ -206,7 +206,7 @@ class Command(BaseCommand, LoggerMixin):
with fitz.Document(pdf_path) as pdf:
self.cronjob_log.info('{0} [pdf_path={1}] [metadata={2}]'.format(
self.log_base, pdf_path, pdf.metadata))
# xref_list = [] # TODO 图片去重
# xref_list = [] # TODO 图片去重 特殊pdf:如电子发票
for pno in range(pdf.pageCount):
il = pdf.getPageImageList(pno)
il.sort(key=lambda x: x[0])
......@@ -219,8 +219,8 @@ class Command(BaseCommand, LoggerMixin):
save_path = os.path.join(img_save_path, 'page_{0}_img_0.png'.format(page.number))
pm.writePNG(save_path)
img_path_list.append(save_path)
self.cronjob_log.info('{0} [page to img success] [pdf_path={1}] [page={2}]'.format(
self.log_base, pdf_path, page.number))
self.cronjob_log.info('{0} [page to img success] [doc_id={1}] [pdf_path={2}] '
'[page={3}]'.format(self.log_base, doc_id, pdf_path, page.number))
else: # 提取图片
for img_index, img_il in enumerate(img_il_list):
if len(img_il) == 1: # 当只有一张图片时, 简化处理
......@@ -232,8 +232,8 @@ class Command(BaseCommand, LoggerMixin):
f.write(img_data)
img_path_list.append(save_path)
self.cronjob_log.info(
'{0} [extract img success] [pdf_path={1}] [page={2}] [img_index={3}]'.format(
self.log_base, pdf_path, pno, img_index))
'{0} [extract img success] [doc_id={1}] [pdf_path={2}] [page={3}] '
'[img_index={4}]'.format(self.log_base, doc_id, pdf_path, pno, img_index))
else: # 多张图片,竖向拼接
height_sum = 0
im_list = []
......@@ -262,9 +262,9 @@ class Command(BaseCommand, LoggerMixin):
res.save(save_path)
img_path_list.append(save_path)
self.cronjob_log.info(
'{0} [extract img success] [pdf_path={1}] [page={2}] [img_index={3}]'.format(
self.log_base, pdf_path, pno, img_index))
self.cronjob_log.info('{0} [pdf to img success]'.format(self.log_base))
'{0} [extract img success] [doc_id={1}] [pdf_path={2}] [page={3}] '
'[img_index={4}]'.format(self.log_base, doc_id, pdf_path, pno, img_index))
self.cronjob_log.info('{0} [pdf to img success] [doc_id={1}]'.format(self.log_base, doc_id))
write_zip_file(img_save_path, os.path.join(doc_data_path, '{0}_img.zip'.format(doc_id)))
# 图片调用算法判断是否为银行流水, 图片调用算法OCR为excel文件
......@@ -273,11 +273,13 @@ class Command(BaseCommand, LoggerMixin):
tasks = [self.img_ocr_excel(wb, img_path) for img_path in img_path_list]
loop.run_until_complete(asyncio.wait(tasks))
# loop.close()
wb.save(excel_path)
wb.save(excel_path) # TODO no sheet (res always [])
# 整合excel文件上传至EDMS
except Exception as e:
UploadDocRecords.objects.filter(id=doc_id).update(status=DocStatus.PROCESS_FAILED.value)
self.cronjob_log.error('{0} [process failed] [err={1}]'.format(self.log_base, e))
self.cronjob_log.error('{0} [process failed] [doc_id={1}] [err={2}]'.format(self.log_base, doc_id, e))
else:
UploadDocRecords.objects.filter(id=doc_id).update(status=DocStatus.COMPLETE.value)
self.cronjob_log.info('{0} [doc process complete] [doc_id={1}]'.format(self.log_base, doc_id))
self.cronjob_log.info('{0} [stop safely]')
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!