f417660d by 周伟奇

ltgt part 1

1 parent a21cf252
......@@ -74,12 +74,12 @@ class Command(BaseCommand, LoggerMixin):
# seperate folder name
self.seperate_map = {
consts.IC_CLASSIFY: 'IDCard',
consts.BC_CLASSIFY: 'BankCard'
consts.MVC_CLASSIFY: 'GreenBook',
consts.CONTRACT_CLASSIFY: 'Contract',
}
self.field_map = {
consts.VAT_CLASSIFY: (consts.VAT_CN_NAME, None, None, consts.VATS_FIELD_ORDER),
consts.IC_CLASSIFY: (consts.IC_CN_NAME, '有效期限', consts.IC_FIELD_ORDER_3, consts.IC_FIELD_ORDER_2),
consts.BC_CLASSIFY: (consts.BC_CN_NAME, None, None, consts.BC_FIELD_ORDER_2)
}
# ocr相关
self.ocr_url = conf.OCR_URL_FOLDER
......@@ -312,28 +312,31 @@ class Command(BaseCommand, LoggerMixin):
def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir):
if os.path.exists(path):
rebuild_res = None
try:
img_save_path, excel_path, pdf_save_path, seperate_path = self.get_path(
name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir)
self.folder_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path))
pdf_handler = PDFHandler(path, img_save_path)
if classify in self.ltgt_classify_mapping:
pdf_handler.extract_page_image()
else:
pdf_handler.extract_image()
self.folder_log.info('{0} [pdf to img end] [path={1}]'.format(self.log_base, path))
except Exception as e:
self.folder_log.error('{0} [pdf to img error] [path={1}] [error={2}]'.format(
self.log_base, path, traceback.format_exc()))
raise e
if classify == consts.CONTRACT_CLASSIFY:
pass
else:
if classify in self.ltgt_classify_mapping:
rebuild_res = self.ltgt_process(pdf_handler.img_path_list, self.ltgt_classify_mapping[classify],
excel_path, path)
try:
img_save_path, excel_path, pdf_save_path, seperate_path = self.get_path(
name, img_output_dir, wb_output_dir, pdf_output_dir, seperate_dir)
self.folder_log.info('{0} [pdf to img start] [path={1}]'.format(self.log_base, path))
pdf_handler = PDFHandler(path, img_save_path)
if classify in self.ltgt_classify_mapping:
pdf_handler.extract_page_image()
else:
pdf_handler.extract_image()
self.folder_log.info('{0} [pdf to img end] [path={1}]'.format(self.log_base, path))
except Exception as e:
self.folder_log.error('{0} [pdf to img error] [path={1}] [error={2}]'.format(
self.log_base, path, traceback.format_exc()))
raise e
else:
rebuild_res = self.images_process(pdf_handler.img_path_list, classify, excel_path, seperate_path)
shutil.move(path, pdf_save_path)
return rebuild_res
if classify in self.ltgt_classify_mapping:
rebuild_res = self.ltgt_process(pdf_handler.img_path_list, self.ltgt_classify_mapping[classify],
excel_path, path)
else:
rebuild_res = self.images_process(pdf_handler.img_path_list, classify, excel_path, seperate_path)
shutil.move(path, pdf_save_path)
return rebuild_res
def tif_process(self, name, path, classify, img_output_dir, wb_output_dir, tiff_output_dir, seperate_dir):
if os.path.exists(path):
......@@ -434,7 +437,6 @@ class Command(BaseCommand, LoggerMixin):
else:
return
output_dir = os.path.join(os.path.dirname(input_dir), 'Output')
seperate_dir = os.path.join(output_dir, self.seperate_map.get(classify, 'Unknown')) if is_combined else None
img_output_dir = os.path.join(output_dir, 'image')
wb_output_dir = os.path.join(output_dir, 'excel')
pdf_output_dir = os.path.join(output_dir, 'pdf')
......@@ -446,8 +448,13 @@ class Command(BaseCommand, LoggerMixin):
os.makedirs(pdf_output_dir, exist_ok=True)
os.makedirs(tiff_output_dir, exist_ok=True)
os.makedirs(failed_output_dir, exist_ok=True)
if seperate_dir is not None:
if is_combined:
seperate_dir = os.path.join(output_dir, self.seperate_map.get(classify, 'Unknown'))
os.makedirs(seperate_dir, exist_ok=True)
else:
seperate_dir = None
os_error_filename_set = set()
while self.switch:
# if not os.path.isdir(input_dir):
......@@ -474,9 +481,13 @@ class Command(BaseCommand, LoggerMixin):
result = self.pdf_process(name, path, classify, img_output_dir, wb_output_dir,
pdf_output_dir, seperate_dir)
elif name.endswith('.tif') or name.endswith('.TIF'):
if classify == consts.CONTRACT_CLASSIFY:
raise LTGTException('e-contract must be pdf')
result = self.tif_process(name, path, classify, img_output_dir, wb_output_dir,
tiff_output_dir, seperate_dir)
else:
if classify == consts.CONTRACT_CLASSIFY:
raise LTGTException('e-contract must be pdf')
result = self.img_process(name, path, classify, wb_output_dir, img_output_dir,
pdf_output_dir, seperate_dir)
self.folder_log.info('{0} [file end] [path={1}]'.format(self.log_base, path))
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!