From c25ced3cbd5472c78c6074f8f6e572df308095f1 Mon Sep 17 00:00:00 2001 From: 周伟奇 <zhouweiqi@situdata.com> Date: Wed, 11 Nov 2020 22:03:44 +0800 Subject: [PATCH] fix bug --- src/apps/doc/management/commands/folder_ocr_process.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/apps/doc/management/commands/folder_ocr_process.py b/src/apps/doc/management/commands/folder_ocr_process.py index 4a28f96..f1d952c 100644 --- a/src/apps/doc/management/commands/folder_ocr_process.py +++ b/src/apps/doc/management/commands/folder_ocr_process.py @@ -56,13 +56,14 @@ class Command(BaseCommand, LoggerMixin): return img_name, 1, 1 @staticmethod - def get_path(name, img_output_dir, wb_output_dir): + def get_path(name, img_output_dir, wb_output_dir, pdf_output_dir): time_stamp = int(time.time()) new_name = '{0}_{1}'.format(time_stamp, name) img_save_path = os.path.join(img_output_dir, new_name) + pdf_save_path = os.path.join(pdf_output_dir, new_name) excel_name = '{0}.xlsx'.format(os.path.splitext(new_name)[0]) excel_path = os.path.join(wb_output_dir, excel_name) - return img_save_path, excel_path + return img_save_path, excel_path, pdf_save_path def res_process(self, all_res, classify, excel_path): try: @@ -131,7 +132,7 @@ class Command(BaseCommand, LoggerMixin): def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir): if os.path.exists(path): try: - img_save_path, excel_path= self.get_path(name, img_output_dir, wb_output_dir) + img_save_path, excel_path, pdf_save_path = self.get_path(name, img_output_dir, wb_output_dir, pdf_output_dir) self.cronjob_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path)) pdf_handler = PDFHandler(path, img_save_path) pdf_handler.extract_image() @@ -145,15 +146,14 @@ class Command(BaseCommand, LoggerMixin): ocr_res = self.ocr_process(img_path, classify) all_res[img_path] = ocr_res self.res_process(all_res, classify, excel_path) + shutil.move(pdf_save_path, pdf_output_dir) - shutil.move(path, pdf_output_dir) - - def img_process(self, name, path, classify, wb_output_dir, img_output_dir): + def img_process(self, name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir): ocr_res = self.ocr_process(path, classify) all_res = {path: ocr_res} try: - img_save_path, excel_path = self.get_path(name, img_output_dir, wb_output_dir) + img_save_path, excel_path, _ = self.get_path(name, img_output_dir, wb_output_dir, pdf_output_dir) except Exception as e: self.cronjob_log.error('{0} [get path error] [path={1}] [error={2}]'.format( self.log_base, path, traceback.format_exc())) @@ -174,7 +174,7 @@ class Command(BaseCommand, LoggerMixin): # 1. 从input dir获取pdf or image list_dir = os.listdir(input_dir) if not list_dir: - self.cronjob_log.error('{0} [input dir empty] [input_dir={1}]'.format(self.log_base, input_dir)) + self.cronjob_log.info('{0} [input dir empty] [input_dir={1}]'.format(self.log_base, input_dir)) time.sleep(self.sleep_time) for name in list_dir: path = os.path.join(input_dir, name) @@ -183,7 +183,7 @@ class Command(BaseCommand, LoggerMixin): if name.endswith('.pdf'): self.pdf_process(name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir) else: - self.img_process(name, path, classify, wb_output_dir, img_output_dir) + self.img_process(name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir) self.cronjob_log.info('{0} [file end] [path={1}]'.format(self.log_base, path)) def handle(self, *args, **kwargs): -- libgit2 0.24.0