c25ced3c by 周伟奇

fix bug

1 parent 97b0b2ed
...@@ -56,13 +56,14 @@ class Command(BaseCommand, LoggerMixin): ...@@ -56,13 +56,14 @@ class Command(BaseCommand, LoggerMixin):
56 return img_name, 1, 1 56 return img_name, 1, 1
57 57
58 @staticmethod 58 @staticmethod
59 def get_path(name, img_output_dir, wb_output_dir): 59 def get_path(name, img_output_dir, wb_output_dir, pdf_output_dir):
60 time_stamp = int(time.time()) 60 time_stamp = int(time.time())
61 new_name = '{0}_{1}'.format(time_stamp, name) 61 new_name = '{0}_{1}'.format(time_stamp, name)
62 img_save_path = os.path.join(img_output_dir, new_name) 62 img_save_path = os.path.join(img_output_dir, new_name)
63 pdf_save_path = os.path.join(pdf_output_dir, new_name)
63 excel_name = '{0}.xlsx'.format(os.path.splitext(new_name)[0]) 64 excel_name = '{0}.xlsx'.format(os.path.splitext(new_name)[0])
64 excel_path = os.path.join(wb_output_dir, excel_name) 65 excel_path = os.path.join(wb_output_dir, excel_name)
65 return img_save_path, excel_path 66 return img_save_path, excel_path, pdf_save_path
66 67
67 def res_process(self, all_res, classify, excel_path): 68 def res_process(self, all_res, classify, excel_path):
68 try: 69 try:
...@@ -131,7 +132,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -131,7 +132,7 @@ class Command(BaseCommand, LoggerMixin):
131 def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir): 132 def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir):
132 if os.path.exists(path): 133 if os.path.exists(path):
133 try: 134 try:
134 img_save_path, excel_path= self.get_path(name, img_output_dir, wb_output_dir) 135 img_save_path, excel_path, pdf_save_path = self.get_path(name, img_output_dir, wb_output_dir, pdf_output_dir)
135 self.cronjob_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path)) 136 self.cronjob_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path))
136 pdf_handler = PDFHandler(path, img_save_path) 137 pdf_handler = PDFHandler(path, img_save_path)
137 pdf_handler.extract_image() 138 pdf_handler.extract_image()
...@@ -145,15 +146,14 @@ class Command(BaseCommand, LoggerMixin): ...@@ -145,15 +146,14 @@ class Command(BaseCommand, LoggerMixin):
145 ocr_res = self.ocr_process(img_path, classify) 146 ocr_res = self.ocr_process(img_path, classify)
146 all_res[img_path] = ocr_res 147 all_res[img_path] = ocr_res
147 self.res_process(all_res, classify, excel_path) 148 self.res_process(all_res, classify, excel_path)
149 shutil.move(pdf_save_path, pdf_output_dir)
148 150
149 shutil.move(path, pdf_output_dir) 151 def img_process(self, name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir):
150
151 def img_process(self, name, path, classify, wb_output_dir, img_output_dir):
152 ocr_res = self.ocr_process(path, classify) 152 ocr_res = self.ocr_process(path, classify)
153 all_res = {path: ocr_res} 153 all_res = {path: ocr_res}
154 154
155 try: 155 try:
156 img_save_path, excel_path = self.get_path(name, img_output_dir, wb_output_dir) 156 img_save_path, excel_path, _ = self.get_path(name, img_output_dir, wb_output_dir, pdf_output_dir)
157 except Exception as e: 157 except Exception as e:
158 self.cronjob_log.error('{0} [get path error] [path={1}] [error={2}]'.format( 158 self.cronjob_log.error('{0} [get path error] [path={1}] [error={2}]'.format(
159 self.log_base, path, traceback.format_exc())) 159 self.log_base, path, traceback.format_exc()))
...@@ -174,7 +174,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -174,7 +174,7 @@ class Command(BaseCommand, LoggerMixin):
174 # 1. 从input dir获取pdf or image 174 # 1. 从input dir获取pdf or image
175 list_dir = os.listdir(input_dir) 175 list_dir = os.listdir(input_dir)
176 if not list_dir: 176 if not list_dir:
177 self.cronjob_log.error('{0} [input dir empty] [input_dir={1}]'.format(self.log_base, input_dir)) 177 self.cronjob_log.info('{0} [input dir empty] [input_dir={1}]'.format(self.log_base, input_dir))
178 time.sleep(self.sleep_time) 178 time.sleep(self.sleep_time)
179 for name in list_dir: 179 for name in list_dir:
180 path = os.path.join(input_dir, name) 180 path = os.path.join(input_dir, name)
...@@ -183,7 +183,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -183,7 +183,7 @@ class Command(BaseCommand, LoggerMixin):
183 if name.endswith('.pdf'): 183 if name.endswith('.pdf'):
184 self.pdf_process(name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir) 184 self.pdf_process(name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir)
185 else: 185 else:
186 self.img_process(name, path, classify, wb_output_dir, img_output_dir) 186 self.img_process(name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir)
187 self.cronjob_log.info('{0} [file end] [path={1}]'.format(self.log_base, path)) 187 self.cronjob_log.info('{0} [file end] [path={1}]'.format(self.log_base, path))
188 188
189 def handle(self, *args, **kwargs): 189 def handle(self, *args, **kwargs):
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!