fix bug
Showing
1 changed file
with
9 additions
and
9 deletions
| ... | @@ -56,13 +56,14 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -56,13 +56,14 @@ class Command(BaseCommand, LoggerMixin): |
| 56 | return img_name, 1, 1 | 56 | return img_name, 1, 1 |
| 57 | 57 | ||
| 58 | @staticmethod | 58 | @staticmethod |
| 59 | def get_path(name, img_output_dir, wb_output_dir): | 59 | def get_path(name, img_output_dir, wb_output_dir, pdf_output_dir): |
| 60 | time_stamp = int(time.time()) | 60 | time_stamp = int(time.time()) |
| 61 | new_name = '{0}_{1}'.format(time_stamp, name) | 61 | new_name = '{0}_{1}'.format(time_stamp, name) |
| 62 | img_save_path = os.path.join(img_output_dir, new_name) | 62 | img_save_path = os.path.join(img_output_dir, new_name) |
| 63 | pdf_save_path = os.path.join(pdf_output_dir, new_name) | ||
| 63 | excel_name = '{0}.xlsx'.format(os.path.splitext(new_name)[0]) | 64 | excel_name = '{0}.xlsx'.format(os.path.splitext(new_name)[0]) |
| 64 | excel_path = os.path.join(wb_output_dir, excel_name) | 65 | excel_path = os.path.join(wb_output_dir, excel_name) |
| 65 | return img_save_path, excel_path | 66 | return img_save_path, excel_path, pdf_save_path |
| 66 | 67 | ||
| 67 | def res_process(self, all_res, classify, excel_path): | 68 | def res_process(self, all_res, classify, excel_path): |
| 68 | try: | 69 | try: |
| ... | @@ -131,7 +132,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -131,7 +132,7 @@ class Command(BaseCommand, LoggerMixin): |
| 131 | def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir): | 132 | def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir): |
| 132 | if os.path.exists(path): | 133 | if os.path.exists(path): |
| 133 | try: | 134 | try: |
| 134 | img_save_path, excel_path= self.get_path(name, img_output_dir, wb_output_dir) | 135 | img_save_path, excel_path, pdf_save_path = self.get_path(name, img_output_dir, wb_output_dir, pdf_output_dir) |
| 135 | self.cronjob_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path)) | 136 | self.cronjob_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path)) |
| 136 | pdf_handler = PDFHandler(path, img_save_path) | 137 | pdf_handler = PDFHandler(path, img_save_path) |
| 137 | pdf_handler.extract_image() | 138 | pdf_handler.extract_image() |
| ... | @@ -145,15 +146,14 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -145,15 +146,14 @@ class Command(BaseCommand, LoggerMixin): |
| 145 | ocr_res = self.ocr_process(img_path, classify) | 146 | ocr_res = self.ocr_process(img_path, classify) |
| 146 | all_res[img_path] = ocr_res | 147 | all_res[img_path] = ocr_res |
| 147 | self.res_process(all_res, classify, excel_path) | 148 | self.res_process(all_res, classify, excel_path) |
| 149 | shutil.move(pdf_save_path, pdf_output_dir) | ||
| 148 | 150 | ||
| 149 | shutil.move(path, pdf_output_dir) | 151 | def img_process(self, name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir): |
| 150 | |||
| 151 | def img_process(self, name, path, classify, wb_output_dir, img_output_dir): | ||
| 152 | ocr_res = self.ocr_process(path, classify) | 152 | ocr_res = self.ocr_process(path, classify) |
| 153 | all_res = {path: ocr_res} | 153 | all_res = {path: ocr_res} |
| 154 | 154 | ||
| 155 | try: | 155 | try: |
| 156 | img_save_path, excel_path = self.get_path(name, img_output_dir, wb_output_dir) | 156 | img_save_path, excel_path, _ = self.get_path(name, img_output_dir, wb_output_dir, pdf_output_dir) |
| 157 | except Exception as e: | 157 | except Exception as e: |
| 158 | self.cronjob_log.error('{0} [get path error] [path={1}] [error={2}]'.format( | 158 | self.cronjob_log.error('{0} [get path error] [path={1}] [error={2}]'.format( |
| 159 | self.log_base, path, traceback.format_exc())) | 159 | self.log_base, path, traceback.format_exc())) |
| ... | @@ -174,7 +174,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -174,7 +174,7 @@ class Command(BaseCommand, LoggerMixin): |
| 174 | # 1. 从input dir获取pdf or image | 174 | # 1. 从input dir获取pdf or image |
| 175 | list_dir = os.listdir(input_dir) | 175 | list_dir = os.listdir(input_dir) |
| 176 | if not list_dir: | 176 | if not list_dir: |
| 177 | self.cronjob_log.error('{0} [input dir empty] [input_dir={1}]'.format(self.log_base, input_dir)) | 177 | self.cronjob_log.info('{0} [input dir empty] [input_dir={1}]'.format(self.log_base, input_dir)) |
| 178 | time.sleep(self.sleep_time) | 178 | time.sleep(self.sleep_time) |
| 179 | for name in list_dir: | 179 | for name in list_dir: |
| 180 | path = os.path.join(input_dir, name) | 180 | path = os.path.join(input_dir, name) |
| ... | @@ -183,7 +183,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -183,7 +183,7 @@ class Command(BaseCommand, LoggerMixin): |
| 183 | if name.endswith('.pdf'): | 183 | if name.endswith('.pdf'): |
| 184 | self.pdf_process(name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir) | 184 | self.pdf_process(name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir) |
| 185 | else: | 185 | else: |
| 186 | self.img_process(name, path, classify, wb_output_dir, img_output_dir) | 186 | self.img_process(name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir) |
| 187 | self.cronjob_log.info('{0} [file end] [path={1}]'.format(self.log_base, path)) | 187 | self.cronjob_log.info('{0} [file end] [path={1}]'.format(self.log_base, path)) |
| 188 | 188 | ||
| 189 | def handle(self, *args, **kwargs): | 189 | def handle(self, *args, **kwargs): | ... | ... |
-
Please register or sign in to post a comment