Merge branch 'feature/mvc' into feature/compare
Showing
1 changed file
with
47 additions
and
0 deletions
... | @@ -6,6 +6,7 @@ import base64 | ... | @@ -6,6 +6,7 @@ import base64 |
6 | import signal | 6 | import signal |
7 | import requests | 7 | import requests |
8 | import traceback | 8 | import traceback |
9 | from PIL import Image | ||
9 | from datetime import datetime | 10 | from datetime import datetime |
10 | from django.core.management import BaseCommand | 11 | from django.core.management import BaseCommand |
11 | from multiprocessing import Process | 12 | from multiprocessing import Process |
... | @@ -18,6 +19,27 @@ from apps.doc.exceptions import OCR1Exception, OCR4Exception | ... | @@ -18,6 +19,27 @@ from apps.doc.exceptions import OCR1Exception, OCR4Exception |
18 | from apps.doc.ocr.wb import BSWorkbook | 19 | from apps.doc.ocr.wb import BSWorkbook |
19 | 20 | ||
20 | 21 | ||
22 | class TIFFHandler: | ||
23 | |||
24 | def __init__(self, path, img_save_path): | ||
25 | self.path = path | ||
26 | self.img_save_path = img_save_path | ||
27 | self.img_path_list = [] | ||
28 | |||
29 | def extract_image(self): | ||
30 | tiff = Image.open(self.path) | ||
31 | tiff.load() | ||
32 | |||
33 | for i in range(tiff.n_frames): | ||
34 | try: | ||
35 | save_path = os.path.join(self.img_save_path, 'page_{0}'.format(i)) | ||
36 | tiff.seek(i) | ||
37 | tiff.save(save_path) | ||
38 | self.img_path_list.append(save_path) | ||
39 | except EOFError: | ||
40 | break | ||
41 | |||
42 | |||
21 | class Command(BaseCommand, LoggerMixin): | 43 | class Command(BaseCommand, LoggerMixin): |
22 | 44 | ||
23 | def __init__(self): | 45 | def __init__(self): |
... | @@ -225,6 +247,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -225,6 +247,7 @@ class Command(BaseCommand, LoggerMixin): |
225 | except Exception as e: | 247 | except Exception as e: |
226 | self.folder_log.error('{0} [pdf to img error] [path={1}] [error={2}]'.format( | 248 | self.folder_log.error('{0} [pdf to img error] [path={1}] [error={2}]'.format( |
227 | self.log_base, path, traceback.format_exc())) | 249 | self.log_base, path, traceback.format_exc())) |
250 | raise e | ||
228 | else: | 251 | else: |
229 | all_res = {} | 252 | all_res = {} |
230 | for img_path in pdf_handler.img_path_list: | 253 | for img_path in pdf_handler.img_path_list: |
... | @@ -233,6 +256,26 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -233,6 +256,26 @@ class Command(BaseCommand, LoggerMixin): |
233 | self.res_process(all_res, classify, excel_path) | 256 | self.res_process(all_res, classify, excel_path) |
234 | shutil.move(path, pdf_save_path) | 257 | shutil.move(path, pdf_save_path) |
235 | 258 | ||
259 | def tif_process(self, name, path, classify, img_output_dir, wb_output_dir, tiff_output_dir): | ||
260 | if os.path.exists(path): | ||
261 | try: | ||
262 | img_save_path, excel_path, tiff_save_path = self.get_path(name, img_output_dir, wb_output_dir, tiff_output_dir) | ||
263 | self.folder_log.info('{0} [tiff to img start] [path={1}]'.format(self.log_base, path)) | ||
264 | tiff_handler = TIFFHandler(path, img_save_path) | ||
265 | tiff_handler.extract_image() | ||
266 | self.folder_log.info('{0} [tiff to img end] [path={1}]'.format(self.log_base, path)) | ||
267 | except Exception as e: | ||
268 | self.folder_log.error('{0} [tiff to img error] [path={1}] [error={2}]'.format( | ||
269 | self.log_base, path, traceback.format_exc())) | ||
270 | raise e | ||
271 | else: | ||
272 | all_res = {} | ||
273 | for img_path in tiff_handler.img_path_list: | ||
274 | ocr_res = self.ocr_process(img_path, classify) | ||
275 | all_res[img_path] = ocr_res | ||
276 | self.res_process(all_res, classify, excel_path) | ||
277 | shutil.move(path, tiff_save_path) | ||
278 | |||
236 | def img_process(self, name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir): | 279 | def img_process(self, name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir): |
237 | ocr_res = self.ocr_process(path, classify) | 280 | ocr_res = self.ocr_process(path, classify) |
238 | all_res = {path: ocr_res} | 281 | all_res = {path: ocr_res} |
... | @@ -258,11 +301,13 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -258,11 +301,13 @@ class Command(BaseCommand, LoggerMixin): |
258 | img_output_dir = os.path.join(output_dir, 'image') | 301 | img_output_dir = os.path.join(output_dir, 'image') |
259 | wb_output_dir = os.path.join(output_dir, 'excel') | 302 | wb_output_dir = os.path.join(output_dir, 'excel') |
260 | pdf_output_dir = os.path.join(output_dir, 'pdf') | 303 | pdf_output_dir = os.path.join(output_dir, 'pdf') |
304 | tiff_output_dir = os.path.join(output_dir, 'tiff') | ||
261 | failed_output_dir = os.path.join(output_dir, 'failed') | 305 | failed_output_dir = os.path.join(output_dir, 'failed') |
262 | os.makedirs(output_dir, exist_ok=True) | 306 | os.makedirs(output_dir, exist_ok=True) |
263 | os.makedirs(img_output_dir, exist_ok=True) | 307 | os.makedirs(img_output_dir, exist_ok=True) |
264 | os.makedirs(wb_output_dir, exist_ok=True) | 308 | os.makedirs(wb_output_dir, exist_ok=True) |
265 | os.makedirs(pdf_output_dir, exist_ok=True) | 309 | os.makedirs(pdf_output_dir, exist_ok=True) |
310 | os.makedirs(tiff_output_dir, exist_ok=True) | ||
266 | os.makedirs(failed_output_dir, exist_ok=True) | 311 | os.makedirs(failed_output_dir, exist_ok=True) |
267 | while self.switch: | 312 | while self.switch: |
268 | if not os.path.isdir(input_dir): | 313 | if not os.path.isdir(input_dir): |
... | @@ -282,6 +327,8 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -282,6 +327,8 @@ class Command(BaseCommand, LoggerMixin): |
282 | self.folder_log.info('{0} [file start] [path={1}]'.format(self.log_base, path)) | 327 | self.folder_log.info('{0} [file start] [path={1}]'.format(self.log_base, path)) |
283 | if name.endswith('.pdf'): | 328 | if name.endswith('.pdf'): |
284 | self.pdf_process(name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir) | 329 | self.pdf_process(name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir) |
330 | elif name.endswith('.tif'): | ||
331 | self.tif_process(name, path, classify, img_output_dir, wb_output_dir, tiff_output_dir) | ||
285 | else: | 332 | else: |
286 | self.img_process(name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir) | 333 | self.img_process(name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir) |
287 | self.folder_log.info('{0} [file end] [path={1}]'.format(self.log_base, path)) | 334 | self.folder_log.info('{0} [file end] [path={1}]'.format(self.log_base, path)) | ... | ... |
-
Please register or sign in to post a comment