f417660d by 周伟奇

ltgt part 1

1 parent a21cf252
...@@ -74,12 +74,12 @@ class Command(BaseCommand, LoggerMixin): ...@@ -74,12 +74,12 @@ class Command(BaseCommand, LoggerMixin):
74 # seperate folder name 74 # seperate folder name
75 self.seperate_map = { 75 self.seperate_map = {
76 consts.IC_CLASSIFY: 'IDCard', 76 consts.IC_CLASSIFY: 'IDCard',
77 consts.BC_CLASSIFY: 'BankCard' 77 consts.MVC_CLASSIFY: 'GreenBook',
78 consts.CONTRACT_CLASSIFY: 'Contract',
78 } 79 }
79 self.field_map = { 80 self.field_map = {
80 consts.VAT_CLASSIFY: (consts.VAT_CN_NAME, None, None, consts.VATS_FIELD_ORDER), 81 consts.VAT_CLASSIFY: (consts.VAT_CN_NAME, None, None, consts.VATS_FIELD_ORDER),
81 consts.IC_CLASSIFY: (consts.IC_CN_NAME, '有效期限', consts.IC_FIELD_ORDER_3, consts.IC_FIELD_ORDER_2), 82 consts.IC_CLASSIFY: (consts.IC_CN_NAME, '有效期限', consts.IC_FIELD_ORDER_3, consts.IC_FIELD_ORDER_2),
82 consts.BC_CLASSIFY: (consts.BC_CN_NAME, None, None, consts.BC_FIELD_ORDER_2)
83 } 83 }
84 # ocr相关 84 # ocr相关
85 self.ocr_url = conf.OCR_URL_FOLDER 85 self.ocr_url = conf.OCR_URL_FOLDER
...@@ -312,28 +312,31 @@ class Command(BaseCommand, LoggerMixin): ...@@ -312,28 +312,31 @@ class Command(BaseCommand, LoggerMixin):
312 def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir): 312 def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir):
313 if os.path.exists(path): 313 if os.path.exists(path):
314 rebuild_res = None 314 rebuild_res = None
315 try: 315 if classify == consts.CONTRACT_CLASSIFY:
316 img_save_path, excel_path, pdf_save_path, seperate_path = self.get_path( 316 pass
317 name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir)
318 self.folder_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path))
319 pdf_handler = PDFHandler(path, img_save_path)
320 if classify in self.ltgt_classify_mapping:
321 pdf_handler.extract_page_image()
322 else:
323 pdf_handler.extract_image()
324 self.folder_log.info('{0} [pdf to img end] [path={1}]'.format(self.log_base, path))
325 except Exception as e:
326 self.folder_log.error('{0} [pdf to img error] [path={1}] [error={2}]'.format(
327 self.log_base, path, traceback.format_exc()))
328 raise e
329 else: 317 else:
330 if classify in self.ltgt_classify_mapping: 318 try:
331 rebuild_res = self.ltgt_process(pdf_handler.img_path_list, self.ltgt_classify_mapping[classify], 319 img_save_path, excel_path, pdf_save_path, seperate_path = self.get_path(
332 excel_path, path) 320 name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir)
321 self.folder_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path))
322 pdf_handler = PDFHandler(path, img_save_path)
323 if classify in self.ltgt_classify_mapping:
324 pdf_handler.extract_page_image()
325 else:
326 pdf_handler.extract_image()
327 self.folder_log.info('{0} [pdf to img end] [path={1}]'.format(self.log_base, path))
328 except Exception as e:
329 self.folder_log.error('{0} [pdf to img error] [path={1}] [error={2}]'.format(
330 self.log_base, path, traceback.format_exc()))
331 raise e
333 else: 332 else:
334 rebuild_res = self.images_process(pdf_handler.img_path_list, classify, excel_path, seperate_path) 333 if classify in self.ltgt_classify_mapping:
335 shutil.move(path, pdf_save_path) 334 rebuild_res = self.ltgt_process(pdf_handler.img_path_list, self.ltgt_classify_mapping[classify],
336 return rebuild_res 335 excel_path, path)
336 else:
337 rebuild_res = self.images_process(pdf_handler.img_path_list, classify, excel_path, seperate_path)
338 shutil.move(path, pdf_save_path)
339 return rebuild_res
337 340
338 def tif_process(self, name, path, classify, img_output_dir, wb_output_dir, tiff_output_dir, seperate_dir): 341 def tif_process(self, name, path, classify, img_output_dir, wb_output_dir, tiff_output_dir, seperate_dir):
339 if os.path.exists(path): 342 if os.path.exists(path):
...@@ -434,7 +437,6 @@ class Command(BaseCommand, LoggerMixin): ...@@ -434,7 +437,6 @@ class Command(BaseCommand, LoggerMixin):
434 else: 437 else:
435 return 438 return
436 output_dir = os.path.join(os.path.dirname(input_dir), 'Output') 439 output_dir = os.path.join(os.path.dirname(input_dir), 'Output')
437 seperate_dir = os.path.join(output_dir, self.seperate_map.get(classify, 'Unknown')) if is_combined else None
438 img_output_dir = os.path.join(output_dir, 'image') 440 img_output_dir = os.path.join(output_dir, 'image')
439 wb_output_dir = os.path.join(output_dir, 'excel') 441 wb_output_dir = os.path.join(output_dir, 'excel')
440 pdf_output_dir = os.path.join(output_dir, 'pdf') 442 pdf_output_dir = os.path.join(output_dir, 'pdf')
...@@ -446,8 +448,13 @@ class Command(BaseCommand, LoggerMixin): ...@@ -446,8 +448,13 @@ class Command(BaseCommand, LoggerMixin):
446 os.makedirs(pdf_output_dir, exist_ok=True) 448 os.makedirs(pdf_output_dir, exist_ok=True)
447 os.makedirs(tiff_output_dir, exist_ok=True) 449 os.makedirs(tiff_output_dir, exist_ok=True)
448 os.makedirs(failed_output_dir, exist_ok=True) 450 os.makedirs(failed_output_dir, exist_ok=True)
449 if seperate_dir is not None: 451
452 if is_combined:
453 seperate_dir = os.path.join(output_dir, self.seperate_map.get(classify, 'Unknown'))
450 os.makedirs(seperate_dir, exist_ok=True) 454 os.makedirs(seperate_dir, exist_ok=True)
455 else:
456 seperate_dir = None
457
451 os_error_filename_set = set() 458 os_error_filename_set = set()
452 while self.switch: 459 while self.switch:
453 # if not os.path.isdir(input_dir): 460 # if not os.path.isdir(input_dir):
...@@ -474,9 +481,13 @@ class Command(BaseCommand, LoggerMixin): ...@@ -474,9 +481,13 @@ class Command(BaseCommand, LoggerMixin):
474 result = self.pdf_process(name, path, classify, img_output_dir, wb_output_dir, 481 result = self.pdf_process(name, path, classify, img_output_dir, wb_output_dir,
475 pdf_output_dir, seperate_dir) 482 pdf_output_dir, seperate_dir)
476 elif name.endswith('.tif') or name.endswith('.TIF'): 483 elif name.endswith('.tif') or name.endswith('.TIF'):
484 if classify == consts.CONTRACT_CLASSIFY:
485 raise LTGTException('e-contract must be pdf')
477 result = self.tif_process(name, path, classify, img_output_dir, wb_output_dir, 486 result = self.tif_process(name, path, classify, img_output_dir, wb_output_dir,
478 tiff_output_dir, seperate_dir) 487 tiff_output_dir, seperate_dir)
479 else: 488 else:
489 if classify == consts.CONTRACT_CLASSIFY:
490 raise LTGTException('e-contract must be pdf')
480 result = self.img_process(name, path, classify, wb_output_dir, img_output_dir, 491 result = self.img_process(name, path, classify, wb_output_dir, img_output_dir,
481 pdf_output_dir, seperate_dir) 492 pdf_output_dir, seperate_dir)
482 self.folder_log.info('{0} [file end] [path={1}]'.format(self.log_base, path)) 493 self.folder_log.info('{0} [file end] [path={1}]'.format(self.log_base, path))
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!