From c25ced3cbd5472c78c6074f8f6e572df308095f1 Mon Sep 17 00:00:00 2001
From: 周伟奇 <zhouweiqi@situdata.com>
Date: Wed, 11 Nov 2020 22:03:44 +0800
Subject: [PATCH] fix bug

---
 src/apps/doc/management/commands/folder_ocr_process.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/apps/doc/management/commands/folder_ocr_process.py b/src/apps/doc/management/commands/folder_ocr_process.py
index 4a28f96..f1d952c 100644
--- a/src/apps/doc/management/commands/folder_ocr_process.py
+++ b/src/apps/doc/management/commands/folder_ocr_process.py
@@ -56,13 +56,14 @@ class Command(BaseCommand, LoggerMixin):
             return img_name, 1, 1
 
     @staticmethod
-    def get_path(name, img_output_dir, wb_output_dir):
+    def get_path(name, img_output_dir, wb_output_dir, pdf_output_dir):
         time_stamp = int(time.time())
         new_name = '{0}_{1}'.format(time_stamp, name)
         img_save_path = os.path.join(img_output_dir, new_name)
+        pdf_save_path = os.path.join(pdf_output_dir, new_name)
         excel_name = '{0}.xlsx'.format(os.path.splitext(new_name)[0])
         excel_path = os.path.join(wb_output_dir, excel_name)
-        return img_save_path, excel_path
+        return img_save_path, excel_path, pdf_save_path
 
     def res_process(self, all_res, classify, excel_path):
         try:
@@ -131,7 +132,7 @@ class Command(BaseCommand, LoggerMixin):
     def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir):
         if os.path.exists(path):
             try:
-                img_save_path, excel_path= self.get_path(name, img_output_dir, wb_output_dir)
+                img_save_path, excel_path, pdf_save_path = self.get_path(name, img_output_dir, wb_output_dir, pdf_output_dir)
                 self.cronjob_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path))
                 pdf_handler = PDFHandler(path, img_save_path)
                 pdf_handler.extract_image()
@@ -145,15 +146,14 @@ class Command(BaseCommand, LoggerMixin):
                     ocr_res = self.ocr_process(img_path, classify)
                     all_res[img_path] = ocr_res
                 self.res_process(all_res, classify, excel_path)
+                shutil.move(pdf_save_path, pdf_output_dir)
 
-            shutil.move(path, pdf_output_dir)
-
-    def img_process(self, name, path, classify, wb_output_dir, img_output_dir):
+    def img_process(self, name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir):
         ocr_res = self.ocr_process(path, classify)
         all_res = {path: ocr_res}
 
         try:
-            img_save_path, excel_path = self.get_path(name, img_output_dir, wb_output_dir)
+            img_save_path, excel_path, _ = self.get_path(name, img_output_dir, wb_output_dir, pdf_output_dir)
         except Exception as e:
             self.cronjob_log.error('{0} [get path error] [path={1}] [error={2}]'.format(
                 self.log_base, path, traceback.format_exc()))
@@ -174,7 +174,7 @@ class Command(BaseCommand, LoggerMixin):
             # 1. 从input dir获取pdf or image
             list_dir = os.listdir(input_dir)
             if not list_dir:
-                self.cronjob_log.error('{0} [input dir empty] [input_dir={1}]'.format(self.log_base, input_dir))
+                self.cronjob_log.info('{0} [input dir empty] [input_dir={1}]'.format(self.log_base, input_dir))
                 time.sleep(self.sleep_time)
             for name in list_dir:
                 path = os.path.join(input_dir, name)
@@ -183,7 +183,7 @@ class Command(BaseCommand, LoggerMixin):
                     if name.endswith('.pdf'):
                         self.pdf_process(name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir)
                     else:
-                        self.img_process(name, path, classify, wb_output_dir, img_output_dir)
+                        self.img_process(name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir)
                     self.cronjob_log.info('{0} [file end] [path={1}]'.format(self.log_base, path))
 
     def handle(self, *args, **kwargs):
--
libgit2 0.24.0