fix bug
Showing
1 changed file
with
9 additions
and
9 deletions
| ... | @@ -56,13 +56,14 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -56,13 +56,14 @@ class Command(BaseCommand, LoggerMixin): | 
| 56 | return img_name, 1, 1 | 56 | return img_name, 1, 1 | 
| 57 | 57 | ||
| 58 | @staticmethod | 58 | @staticmethod | 
| 59 | def get_path(name, img_output_dir, wb_output_dir): | 59 | def get_path(name, img_output_dir, wb_output_dir, pdf_output_dir): | 
| 60 | time_stamp = int(time.time()) | 60 | time_stamp = int(time.time()) | 
| 61 | new_name = '{0}_{1}'.format(time_stamp, name) | 61 | new_name = '{0}_{1}'.format(time_stamp, name) | 
| 62 | img_save_path = os.path.join(img_output_dir, new_name) | 62 | img_save_path = os.path.join(img_output_dir, new_name) | 
| 63 | pdf_save_path = os.path.join(pdf_output_dir, new_name) | ||
| 63 | excel_name = '{0}.xlsx'.format(os.path.splitext(new_name)[0]) | 64 | excel_name = '{0}.xlsx'.format(os.path.splitext(new_name)[0]) | 
| 64 | excel_path = os.path.join(wb_output_dir, excel_name) | 65 | excel_path = os.path.join(wb_output_dir, excel_name) | 
| 65 | return img_save_path, excel_path | 66 | return img_save_path, excel_path, pdf_save_path | 
| 66 | 67 | ||
| 67 | def res_process(self, all_res, classify, excel_path): | 68 | def res_process(self, all_res, classify, excel_path): | 
| 68 | try: | 69 | try: | 
| ... | @@ -131,7 +132,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -131,7 +132,7 @@ class Command(BaseCommand, LoggerMixin): | 
| 131 | def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir): | 132 | def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir): | 
| 132 | if os.path.exists(path): | 133 | if os.path.exists(path): | 
| 133 | try: | 134 | try: | 
| 134 | img_save_path, excel_path= self.get_path(name, img_output_dir, wb_output_dir) | 135 | img_save_path, excel_path, pdf_save_path = self.get_path(name, img_output_dir, wb_output_dir, pdf_output_dir) | 
| 135 | self.cronjob_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path)) | 136 | self.cronjob_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path)) | 
| 136 | pdf_handler = PDFHandler(path, img_save_path) | 137 | pdf_handler = PDFHandler(path, img_save_path) | 
| 137 | pdf_handler.extract_image() | 138 | pdf_handler.extract_image() | 
| ... | @@ -145,15 +146,14 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -145,15 +146,14 @@ class Command(BaseCommand, LoggerMixin): | 
| 145 | ocr_res = self.ocr_process(img_path, classify) | 146 | ocr_res = self.ocr_process(img_path, classify) | 
| 146 | all_res[img_path] = ocr_res | 147 | all_res[img_path] = ocr_res | 
| 147 | self.res_process(all_res, classify, excel_path) | 148 | self.res_process(all_res, classify, excel_path) | 
| 149 | shutil.move(pdf_save_path, pdf_output_dir) | ||
| 148 | 150 | ||
| 149 | shutil.move(path, pdf_output_dir) | 151 | def img_process(self, name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir): | 
| 150 | |||
| 151 | def img_process(self, name, path, classify, wb_output_dir, img_output_dir): | ||
| 152 | ocr_res = self.ocr_process(path, classify) | 152 | ocr_res = self.ocr_process(path, classify) | 
| 153 | all_res = {path: ocr_res} | 153 | all_res = {path: ocr_res} | 
| 154 | 154 | ||
| 155 | try: | 155 | try: | 
| 156 | img_save_path, excel_path = self.get_path(name, img_output_dir, wb_output_dir) | 156 | img_save_path, excel_path, _ = self.get_path(name, img_output_dir, wb_output_dir, pdf_output_dir) | 
| 157 | except Exception as e: | 157 | except Exception as e: | 
| 158 | self.cronjob_log.error('{0} [get path error] [path={1}] [error={2}]'.format( | 158 | self.cronjob_log.error('{0} [get path error] [path={1}] [error={2}]'.format( | 
| 159 | self.log_base, path, traceback.format_exc())) | 159 | self.log_base, path, traceback.format_exc())) | 
| ... | @@ -174,7 +174,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -174,7 +174,7 @@ class Command(BaseCommand, LoggerMixin): | 
| 174 | # 1. 从input dir获取pdf or image | 174 | # 1. 从input dir获取pdf or image | 
| 175 | list_dir = os.listdir(input_dir) | 175 | list_dir = os.listdir(input_dir) | 
| 176 | if not list_dir: | 176 | if not list_dir: | 
| 177 | self.cronjob_log.error('{0} [input dir empty] [input_dir={1}]'.format(self.log_base, input_dir)) | 177 | self.cronjob_log.info('{0} [input dir empty] [input_dir={1}]'.format(self.log_base, input_dir)) | 
| 178 | time.sleep(self.sleep_time) | 178 | time.sleep(self.sleep_time) | 
| 179 | for name in list_dir: | 179 | for name in list_dir: | 
| 180 | path = os.path.join(input_dir, name) | 180 | path = os.path.join(input_dir, name) | 
| ... | @@ -183,7 +183,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -183,7 +183,7 @@ class Command(BaseCommand, LoggerMixin): | 
| 183 | if name.endswith('.pdf'): | 183 | if name.endswith('.pdf'): | 
| 184 | self.pdf_process(name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir) | 184 | self.pdf_process(name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir) | 
| 185 | else: | 185 | else: | 
| 186 | self.img_process(name, path, classify, wb_output_dir, img_output_dir) | 186 | self.img_process(name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir) | 
| 187 | self.cronjob_log.info('{0} [file end] [path={1}]'.format(self.log_base, path)) | 187 | self.cronjob_log.info('{0} [file end] [path={1}]'.format(self.log_base, path)) | 
| 188 | 188 | ||
| 189 | def handle(self, *args, **kwargs): | 189 | def handle(self, *args, **kwargs): | ... | ... | 
- 
Please register or sign in to post a comment