c25ced3c by 周伟奇

fix bug

1 parent 97b0b2ed
......@@ -56,13 +56,14 @@ class Command(BaseCommand, LoggerMixin):
return img_name, 1, 1
@staticmethod
def get_path(name, img_output_dir, wb_output_dir):
def get_path(name, img_output_dir, wb_output_dir, pdf_output_dir):
time_stamp = int(time.time())
new_name = '{0}_{1}'.format(time_stamp, name)
img_save_path = os.path.join(img_output_dir, new_name)
pdf_save_path = os.path.join(pdf_output_dir, new_name)
excel_name = '{0}.xlsx'.format(os.path.splitext(new_name)[0])
excel_path = os.path.join(wb_output_dir, excel_name)
return img_save_path, excel_path
return img_save_path, excel_path, pdf_save_path
def res_process(self, all_res, classify, excel_path):
try:
......@@ -131,7 +132,7 @@ class Command(BaseCommand, LoggerMixin):
def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir):
if os.path.exists(path):
try:
img_save_path, excel_path= self.get_path(name, img_output_dir, wb_output_dir)
img_save_path, excel_path, pdf_save_path = self.get_path(name, img_output_dir, wb_output_dir, pdf_output_dir)
self.cronjob_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path))
pdf_handler = PDFHandler(path, img_save_path)
pdf_handler.extract_image()
......@@ -145,15 +146,14 @@ class Command(BaseCommand, LoggerMixin):
ocr_res = self.ocr_process(img_path, classify)
all_res[img_path] = ocr_res
self.res_process(all_res, classify, excel_path)
shutil.move(pdf_save_path, pdf_output_dir)
shutil.move(path, pdf_output_dir)
def img_process(self, name, path, classify, wb_output_dir, img_output_dir):
def img_process(self, name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir):
ocr_res = self.ocr_process(path, classify)
all_res = {path: ocr_res}
try:
img_save_path, excel_path = self.get_path(name, img_output_dir, wb_output_dir)
img_save_path, excel_path, _ = self.get_path(name, img_output_dir, wb_output_dir, pdf_output_dir)
except Exception as e:
self.cronjob_log.error('{0} [get path error] [path={1}] [error={2}]'.format(
self.log_base, path, traceback.format_exc()))
......@@ -174,7 +174,7 @@ class Command(BaseCommand, LoggerMixin):
# 1. 从input dir获取pdf or image
list_dir = os.listdir(input_dir)
if not list_dir:
self.cronjob_log.error('{0} [input dir empty] [input_dir={1}]'.format(self.log_base, input_dir))
self.cronjob_log.info('{0} [input dir empty] [input_dir={1}]'.format(self.log_base, input_dir))
time.sleep(self.sleep_time)
for name in list_dir:
path = os.path.join(input_dir, name)
......@@ -183,7 +183,7 @@ class Command(BaseCommand, LoggerMixin):
if name.endswith('.pdf'):
self.pdf_process(name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir)
else:
self.img_process(name, path, classify, wb_output_dir, img_output_dir)
self.img_process(name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir)
self.cronjob_log.info('{0} [file end] [path={1}]'.format(self.log_base, path))
def handle(self, *args, **kwargs):
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!