ltgt part 1
Showing
1 changed file
with
35 additions
and
24 deletions
| ... | @@ -74,12 +74,12 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -74,12 +74,12 @@ class Command(BaseCommand, LoggerMixin): |
| 74 | # seperate folder name | 74 | # seperate folder name |
| 75 | self.seperate_map = { | 75 | self.seperate_map = { |
| 76 | consts.IC_CLASSIFY: 'IDCard', | 76 | consts.IC_CLASSIFY: 'IDCard', |
| 77 | consts.BC_CLASSIFY: 'BankCard' | 77 | consts.MVC_CLASSIFY: 'GreenBook', |
| 78 | consts.CONTRACT_CLASSIFY: 'Contract', | ||
| 78 | } | 79 | } |
| 79 | self.field_map = { | 80 | self.field_map = { |
| 80 | consts.VAT_CLASSIFY: (consts.VAT_CN_NAME, None, None, consts.VATS_FIELD_ORDER), | 81 | consts.VAT_CLASSIFY: (consts.VAT_CN_NAME, None, None, consts.VATS_FIELD_ORDER), |
| 81 | consts.IC_CLASSIFY: (consts.IC_CN_NAME, '有效期限', consts.IC_FIELD_ORDER_3, consts.IC_FIELD_ORDER_2), | 82 | consts.IC_CLASSIFY: (consts.IC_CN_NAME, '有效期限', consts.IC_FIELD_ORDER_3, consts.IC_FIELD_ORDER_2), |
| 82 | consts.BC_CLASSIFY: (consts.BC_CN_NAME, None, None, consts.BC_FIELD_ORDER_2) | ||
| 83 | } | 83 | } |
| 84 | # ocr相关 | 84 | # ocr相关 |
| 85 | self.ocr_url = conf.OCR_URL_FOLDER | 85 | self.ocr_url = conf.OCR_URL_FOLDER |
| ... | @@ -312,28 +312,31 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -312,28 +312,31 @@ class Command(BaseCommand, LoggerMixin): |
| 312 | def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir): | 312 | def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir): |
| 313 | if os.path.exists(path): | 313 | if os.path.exists(path): |
| 314 | rebuild_res = None | 314 | rebuild_res = None |
| 315 | try: | 315 | if classify == consts.CONTRACT_CLASSIFY: |
| 316 | img_save_path, excel_path, pdf_save_path, seperate_path = self.get_path( | 316 | pass |
| 317 | name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir) | ||
| 318 | self.folder_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path)) | ||
| 319 | pdf_handler = PDFHandler(path, img_save_path) | ||
| 320 | if classify in self.ltgt_classify_mapping: | ||
| 321 | pdf_handler.extract_page_image() | ||
| 322 | else: | ||
| 323 | pdf_handler.extract_image() | ||
| 324 | self.folder_log.info('{0} [pdf to img end] [path={1}]'.format(self.log_base, path)) | ||
| 325 | except Exception as e: | ||
| 326 | self.folder_log.error('{0} [pdf to img error] [path={1}] [error={2}]'.format( | ||
| 327 | self.log_base, path, traceback.format_exc())) | ||
| 328 | raise e | ||
| 329 | else: | 317 | else: |
| 330 | if classify in self.ltgt_classify_mapping: | 318 | try: |
| 331 | rebuild_res = self.ltgt_process(pdf_handler.img_path_list, self.ltgt_classify_mapping[classify], | 319 | img_save_path, excel_path, pdf_save_path, seperate_path = self.get_path( |
| 332 | excel_path, path) | 320 | name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir) |
| 321 | self.folder_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path)) | ||
| 322 | pdf_handler = PDFHandler(path, img_save_path) | ||
| 323 | if classify in self.ltgt_classify_mapping: | ||
| 324 | pdf_handler.extract_page_image() | ||
| 325 | else: | ||
| 326 | pdf_handler.extract_image() | ||
| 327 | self.folder_log.info('{0} [pdf to img end] [path={1}]'.format(self.log_base, path)) | ||
| 328 | except Exception as e: | ||
| 329 | self.folder_log.error('{0} [pdf to img error] [path={1}] [error={2}]'.format( | ||
| 330 | self.log_base, path, traceback.format_exc())) | ||
| 331 | raise e | ||
| 333 | else: | 332 | else: |
| 334 | rebuild_res = self.images_process(pdf_handler.img_path_list, classify, excel_path, seperate_path) | 333 | if classify in self.ltgt_classify_mapping: |
| 335 | shutil.move(path, pdf_save_path) | 334 | rebuild_res = self.ltgt_process(pdf_handler.img_path_list, self.ltgt_classify_mapping[classify], |
| 336 | return rebuild_res | 335 | excel_path, path) |
| 336 | else: | ||
| 337 | rebuild_res = self.images_process(pdf_handler.img_path_list, classify, excel_path, seperate_path) | ||
| 338 | shutil.move(path, pdf_save_path) | ||
| 339 | return rebuild_res | ||
| 337 | 340 | ||
| 338 | def tif_process(self, name, path, classify, img_output_dir, wb_output_dir, tiff_output_dir, seperate_dir): | 341 | def tif_process(self, name, path, classify, img_output_dir, wb_output_dir, tiff_output_dir, seperate_dir): |
| 339 | if os.path.exists(path): | 342 | if os.path.exists(path): |
| ... | @@ -434,7 +437,6 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -434,7 +437,6 @@ class Command(BaseCommand, LoggerMixin): |
| 434 | else: | 437 | else: |
| 435 | return | 438 | return |
| 436 | output_dir = os.path.join(os.path.dirname(input_dir), 'Output') | 439 | output_dir = os.path.join(os.path.dirname(input_dir), 'Output') |
| 437 | seperate_dir = os.path.join(output_dir, self.seperate_map.get(classify, 'Unknown')) if is_combined else None | ||
| 438 | img_output_dir = os.path.join(output_dir, 'image') | 440 | img_output_dir = os.path.join(output_dir, 'image') |
| 439 | wb_output_dir = os.path.join(output_dir, 'excel') | 441 | wb_output_dir = os.path.join(output_dir, 'excel') |
| 440 | pdf_output_dir = os.path.join(output_dir, 'pdf') | 442 | pdf_output_dir = os.path.join(output_dir, 'pdf') |
| ... | @@ -446,8 +448,13 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -446,8 +448,13 @@ class Command(BaseCommand, LoggerMixin): |
| 446 | os.makedirs(pdf_output_dir, exist_ok=True) | 448 | os.makedirs(pdf_output_dir, exist_ok=True) |
| 447 | os.makedirs(tiff_output_dir, exist_ok=True) | 449 | os.makedirs(tiff_output_dir, exist_ok=True) |
| 448 | os.makedirs(failed_output_dir, exist_ok=True) | 450 | os.makedirs(failed_output_dir, exist_ok=True) |
| 449 | if seperate_dir is not None: | 451 | |
| 452 | if is_combined: | ||
| 453 | seperate_dir = os.path.join(output_dir, self.seperate_map.get(classify, 'Unknown')) | ||
| 450 | os.makedirs(seperate_dir, exist_ok=True) | 454 | os.makedirs(seperate_dir, exist_ok=True) |
| 455 | else: | ||
| 456 | seperate_dir = None | ||
| 457 | |||
| 451 | os_error_filename_set = set() | 458 | os_error_filename_set = set() |
| 452 | while self.switch: | 459 | while self.switch: |
| 453 | # if not os.path.isdir(input_dir): | 460 | # if not os.path.isdir(input_dir): |
| ... | @@ -474,9 +481,13 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -474,9 +481,13 @@ class Command(BaseCommand, LoggerMixin): |
| 474 | result = self.pdf_process(name, path, classify, img_output_dir, wb_output_dir, | 481 | result = self.pdf_process(name, path, classify, img_output_dir, wb_output_dir, |
| 475 | pdf_output_dir, seperate_dir) | 482 | pdf_output_dir, seperate_dir) |
| 476 | elif name.endswith('.tif') or name.endswith('.TIF'): | 483 | elif name.endswith('.tif') or name.endswith('.TIF'): |
| 484 | if classify == consts.CONTRACT_CLASSIFY: | ||
| 485 | raise LTGTException('e-contract must be pdf') | ||
| 477 | result = self.tif_process(name, path, classify, img_output_dir, wb_output_dir, | 486 | result = self.tif_process(name, path, classify, img_output_dir, wb_output_dir, |
| 478 | tiff_output_dir, seperate_dir) | 487 | tiff_output_dir, seperate_dir) |
| 479 | else: | 488 | else: |
| 489 | if classify == consts.CONTRACT_CLASSIFY: | ||
| 490 | raise LTGTException('e-contract must be pdf') | ||
| 480 | result = self.img_process(name, path, classify, wb_output_dir, img_output_dir, | 491 | result = self.img_process(name, path, classify, wb_output_dir, img_output_dir, |
| 481 | pdf_output_dir, seperate_dir) | 492 | pdf_output_dir, seperate_dir) |
| 482 | self.folder_log.info('{0} [file end] [path={1}]'.format(self.log_base, path)) | 493 | self.folder_log.info('{0} [file end] [path={1}]'.format(self.log_base, path)) | ... | ... |
-
Please register or sign in to post a comment