ltgt part 1
Showing
1 changed file
with
35 additions
and
24 deletions
... | @@ -74,12 +74,12 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -74,12 +74,12 @@ class Command(BaseCommand, LoggerMixin): |
74 | # seperate folder name | 74 | # seperate folder name |
75 | self.seperate_map = { | 75 | self.seperate_map = { |
76 | consts.IC_CLASSIFY: 'IDCard', | 76 | consts.IC_CLASSIFY: 'IDCard', |
77 | consts.BC_CLASSIFY: 'BankCard' | 77 | consts.MVC_CLASSIFY: 'GreenBook', |
78 | consts.CONTRACT_CLASSIFY: 'Contract', | ||
78 | } | 79 | } |
79 | self.field_map = { | 80 | self.field_map = { |
80 | consts.VAT_CLASSIFY: (consts.VAT_CN_NAME, None, None, consts.VATS_FIELD_ORDER), | 81 | consts.VAT_CLASSIFY: (consts.VAT_CN_NAME, None, None, consts.VATS_FIELD_ORDER), |
81 | consts.IC_CLASSIFY: (consts.IC_CN_NAME, '有效期限', consts.IC_FIELD_ORDER_3, consts.IC_FIELD_ORDER_2), | 82 | consts.IC_CLASSIFY: (consts.IC_CN_NAME, '有效期限', consts.IC_FIELD_ORDER_3, consts.IC_FIELD_ORDER_2), |
82 | consts.BC_CLASSIFY: (consts.BC_CN_NAME, None, None, consts.BC_FIELD_ORDER_2) | ||
83 | } | 83 | } |
84 | # ocr相关 | 84 | # ocr相关 |
85 | self.ocr_url = conf.OCR_URL_FOLDER | 85 | self.ocr_url = conf.OCR_URL_FOLDER |
... | @@ -312,28 +312,31 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -312,28 +312,31 @@ class Command(BaseCommand, LoggerMixin): |
312 | def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir): | 312 | def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir): |
313 | if os.path.exists(path): | 313 | if os.path.exists(path): |
314 | rebuild_res = None | 314 | rebuild_res = None |
315 | try: | 315 | if classify == consts.CONTRACT_CLASSIFY: |
316 | img_save_path, excel_path, pdf_save_path, seperate_path = self.get_path( | 316 | pass |
317 | name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir) | ||
318 | self.folder_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path)) | ||
319 | pdf_handler = PDFHandler(path, img_save_path) | ||
320 | if classify in self.ltgt_classify_mapping: | ||
321 | pdf_handler.extract_page_image() | ||
322 | else: | ||
323 | pdf_handler.extract_image() | ||
324 | self.folder_log.info('{0} [pdf to img end] [path={1}]'.format(self.log_base, path)) | ||
325 | except Exception as e: | ||
326 | self.folder_log.error('{0} [pdf to img error] [path={1}] [error={2}]'.format( | ||
327 | self.log_base, path, traceback.format_exc())) | ||
328 | raise e | ||
329 | else: | 317 | else: |
330 | if classify in self.ltgt_classify_mapping: | 318 | try: |
331 | rebuild_res = self.ltgt_process(pdf_handler.img_path_list, self.ltgt_classify_mapping[classify], | 319 | img_save_path, excel_path, pdf_save_path, seperate_path = self.get_path( |
332 | excel_path, path) | 320 | name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir) |
321 | self.folder_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path)) | ||
322 | pdf_handler = PDFHandler(path, img_save_path) | ||
323 | if classify in self.ltgt_classify_mapping: | ||
324 | pdf_handler.extract_page_image() | ||
325 | else: | ||
326 | pdf_handler.extract_image() | ||
327 | self.folder_log.info('{0} [pdf to img end] [path={1}]'.format(self.log_base, path)) | ||
328 | except Exception as e: | ||
329 | self.folder_log.error('{0} [pdf to img error] [path={1}] [error={2}]'.format( | ||
330 | self.log_base, path, traceback.format_exc())) | ||
331 | raise e | ||
333 | else: | 332 | else: |
334 | rebuild_res = self.images_process(pdf_handler.img_path_list, classify, excel_path, seperate_path) | 333 | if classify in self.ltgt_classify_mapping: |
335 | shutil.move(path, pdf_save_path) | 334 | rebuild_res = self.ltgt_process(pdf_handler.img_path_list, self.ltgt_classify_mapping[classify], |
336 | return rebuild_res | 335 | excel_path, path) |
336 | else: | ||
337 | rebuild_res = self.images_process(pdf_handler.img_path_list, classify, excel_path, seperate_path) | ||
338 | shutil.move(path, pdf_save_path) | ||
339 | return rebuild_res | ||
337 | 340 | ||
338 | def tif_process(self, name, path, classify, img_output_dir, wb_output_dir, tiff_output_dir, seperate_dir): | 341 | def tif_process(self, name, path, classify, img_output_dir, wb_output_dir, tiff_output_dir, seperate_dir): |
339 | if os.path.exists(path): | 342 | if os.path.exists(path): |
... | @@ -434,7 +437,6 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -434,7 +437,6 @@ class Command(BaseCommand, LoggerMixin): |
434 | else: | 437 | else: |
435 | return | 438 | return |
436 | output_dir = os.path.join(os.path.dirname(input_dir), 'Output') | 439 | output_dir = os.path.join(os.path.dirname(input_dir), 'Output') |
437 | seperate_dir = os.path.join(output_dir, self.seperate_map.get(classify, 'Unknown')) if is_combined else None | ||
438 | img_output_dir = os.path.join(output_dir, 'image') | 440 | img_output_dir = os.path.join(output_dir, 'image') |
439 | wb_output_dir = os.path.join(output_dir, 'excel') | 441 | wb_output_dir = os.path.join(output_dir, 'excel') |
440 | pdf_output_dir = os.path.join(output_dir, 'pdf') | 442 | pdf_output_dir = os.path.join(output_dir, 'pdf') |
... | @@ -446,8 +448,13 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -446,8 +448,13 @@ class Command(BaseCommand, LoggerMixin): |
446 | os.makedirs(pdf_output_dir, exist_ok=True) | 448 | os.makedirs(pdf_output_dir, exist_ok=True) |
447 | os.makedirs(tiff_output_dir, exist_ok=True) | 449 | os.makedirs(tiff_output_dir, exist_ok=True) |
448 | os.makedirs(failed_output_dir, exist_ok=True) | 450 | os.makedirs(failed_output_dir, exist_ok=True) |
449 | if seperate_dir is not None: | 451 | |
452 | if is_combined: | ||
453 | seperate_dir = os.path.join(output_dir, self.seperate_map.get(classify, 'Unknown')) | ||
450 | os.makedirs(seperate_dir, exist_ok=True) | 454 | os.makedirs(seperate_dir, exist_ok=True) |
455 | else: | ||
456 | seperate_dir = None | ||
457 | |||
451 | os_error_filename_set = set() | 458 | os_error_filename_set = set() |
452 | while self.switch: | 459 | while self.switch: |
453 | # if not os.path.isdir(input_dir): | 460 | # if not os.path.isdir(input_dir): |
... | @@ -474,9 +481,13 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -474,9 +481,13 @@ class Command(BaseCommand, LoggerMixin): |
474 | result = self.pdf_process(name, path, classify, img_output_dir, wb_output_dir, | 481 | result = self.pdf_process(name, path, classify, img_output_dir, wb_output_dir, |
475 | pdf_output_dir, seperate_dir) | 482 | pdf_output_dir, seperate_dir) |
476 | elif name.endswith('.tif') or name.endswith('.TIF'): | 483 | elif name.endswith('.tif') or name.endswith('.TIF'): |
484 | if classify == consts.CONTRACT_CLASSIFY: | ||
485 | raise LTGTException('e-contract must be pdf') | ||
477 | result = self.tif_process(name, path, classify, img_output_dir, wb_output_dir, | 486 | result = self.tif_process(name, path, classify, img_output_dir, wb_output_dir, |
478 | tiff_output_dir, seperate_dir) | 487 | tiff_output_dir, seperate_dir) |
479 | else: | 488 | else: |
489 | if classify == consts.CONTRACT_CLASSIFY: | ||
490 | raise LTGTException('e-contract must be pdf') | ||
480 | result = self.img_process(name, path, classify, wb_output_dir, img_output_dir, | 491 | result = self.img_process(name, path, classify, wb_output_dir, img_output_dir, |
481 | pdf_output_dir, seperate_dir) | 492 | pdf_output_dir, seperate_dir) |
482 | self.folder_log.info('{0} [file end] [path={1}]'.format(self.log_base, path)) | 493 | self.folder_log.info('{0} [file end] [path={1}]'.format(self.log_base, path)) | ... | ... |
-
Please register or sign in to post a comment