fix bug

周伟奇
Showing 1 changed file with 9 additions and 9 deletions
src/apps/doc/management/commands/folder_ocr_process.py
--- a/src/apps/doc/management/commands/folder_ocr_process.py
View file @c25ced3
+++ b/src/apps/doc/management/commands/folder_ocr_process.py
View file @c25ced3
@@ -56,13 +56,14 @@ class Command(BaseCommand, LoggerMixin):
            return img_name, 1, 1
    @staticmethod
-    def get_path(name, img_output_dir, wb_output_dir):
+    def get_path(name, img_output_dir, wb_output_dir, pdf_output_dir):
        time_stamp = int(time.time())
        new_name = '{0}_{1}'.format(time_stamp, name)
        img_save_path = os.path.join(img_output_dir, new_name)
+        pdf_save_path = os.path.join(pdf_output_dir, new_name)
        excel_name = '{0}.xlsx'.format(os.path.splitext(new_name)[0])
        excel_path = os.path.join(wb_output_dir, excel_name)
-        return img_save_path, excel_path
+        return img_save_path, excel_path, pdf_save_path
    def res_process(self, all_res, classify, excel_path):
        try:
@@ -131,7 +132,7 @@ class Command(BaseCommand, LoggerMixin):
    def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir):
        if os.path.exists(path):
            try:
-                img_save_path, excel_path= self.get_path(name, img_output_dir, wb_output_dir)
+                img_save_path, excel_path, pdf_save_path = self.get_path(name, img_output_dir, wb_output_dir, pdf_output_dir)
                self.cronjob_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path))
                pdf_handler = PDFHandler(path, img_save_path)
                pdf_handler.extract_image()
@@ -145,15 +146,14 @@ class Command(BaseCommand, LoggerMixin):
                    ocr_res = self.ocr_process(img_path, classify)
                    all_res[img_path] = ocr_res
                self.res_process(all_res, classify, excel_path)
+                shutil.move(pdf_save_path, pdf_output_dir)
-            shutil.move(path, pdf_output_dir)
+    def img_process(self, name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir):
-    def img_process(self, name, path, classify, wb_output_dir, img_output_dir):
        ocr_res = self.ocr_process(path, classify)
        all_res = {path: ocr_res}
        try:
-            img_save_path, excel_path = self.get_path(name, img_output_dir, wb_output_dir)
+            img_save_path, excel_path, _ = self.get_path(name, img_output_dir, wb_output_dir, pdf_output_dir)
        except Exception as e:
            self.cronjob_log.error('{0} [get path error] [path={1}] [error={2}]'.format(
                self.log_base, path, traceback.format_exc()))
@@ -174,7 +174,7 @@ class Command(BaseCommand, LoggerMixin):
            # 1. 从input dir获取pdf or image
            list_dir = os.listdir(input_dir)
            if not list_dir:
-                self.cronjob_log.error('{0} [input dir empty] [input_dir={1}]'.format(self.log_base, input_dir))
+                self.cronjob_log.info('{0} [input dir empty] [input_dir={1}]'.format(self.log_base, input_dir))
                time.sleep(self.sleep_time)
            for name in list_dir:
                path = os.path.join(input_dir, name)
@@ -183,7 +183,7 @@ class Command(BaseCommand, LoggerMixin):
                    if name.endswith('.pdf'):
                        self.pdf_process(name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir)
                    else:
-                        self.img_process(name, path, classify, wb_output_dir, img_output_dir)
+                        self.img_process(name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir)
                    self.cronjob_log.info('{0} [file end] [path={1}]'.format(self.log_base, path))
    def handle(self, *args, **kwargs):