423427c0 by 周伟奇

Merge branch 'feature/ltgt' into feature/0611

2 parents d78669c5 68d7dd98
......@@ -13,6 +13,9 @@ class OCR2Exception(Exception):
class OCR4Exception(Exception):
pass
class LTGTException(Exception):
pass
class GCAPException(Exception):
pass
......
......@@ -61,13 +61,11 @@ class Command(BaseCommand, LoggerMixin):
def signal_handler(self, sig, frame):
self.switch = False # 停止处理文件
def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path):
def license1_process(self, ocr_data, license_summary, classify, img_path):
# 类别:'0'身份证, '1'居住证
license_data = ocr_data.get('data', [])
if not license_data:
res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY))
return
res_list.append((pno, ino, part_idx, consts.RES_SUCCESS))
if classify == consts.MVC_CLASSIFY: # 车辆登记证 3/4页结果整合
for mvc_dict in license_data:
try:
......@@ -154,29 +152,21 @@ class Command(BaseCommand, LoggerMixin):
def res_process(self, all_res, classify, excel_path):
try:
license_summary = {}
res_list = []
if not all_res:
return
else:
for img_path, ocr_res in all_res.items():
img_name, pno, ino = self.parse_img_path(img_path)
part_idx = 1
# img_name, pno, ino = self.parse_img_path(img_path)
# part_idx = 1
if isinstance(ocr_res, dict):
if ocr_res.get('code') == 1:
data_list = ocr_res.get('data', [])
if isinstance(data_list, list):
for part_idx, ocr_data in enumerate(data_list):
part_idx = part_idx + 1
self.license1_process(ocr_data, license_summary, classify,
res_list, pno, ino, part_idx, img_path)
else:
res_list.append((pno, ino, part_idx, consts.RES_FAILED_3))
else:
res_list.append((pno, ino, part_idx, consts.RES_FAILED))
else:
res_list.append((pno, ino, part_idx, consts.RES_FAILED))
for ocr_data in data_list:
# part_idx = part_idx + 1
self.license1_process(ocr_data, license_summary, classify, img_path)
wb = BSWorkbook(set(), set(), set(), set(), set())
wb.simple_license_rebuild(license_summary, consts.DOC_SCHEME_LIST[0])
......@@ -216,6 +206,13 @@ class Command(BaseCommand, LoggerMixin):
return ocr_res
else:
self.folder_log.warn('{0} [ocr failed] [img_path={1}]'.format(self.log_base, img_path))
def images_process(self, img_path_list, classify, excel_path):
all_res = {}
for img_path in img_path_list:
ocr_res = self.ocr_process(img_path, classify)
all_res[img_path] = ocr_res
self.res_process(all_res, classify, excel_path)
def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir):
if os.path.exists(path):
......@@ -230,11 +227,7 @@ class Command(BaseCommand, LoggerMixin):
self.log_base, path, traceback.format_exc()))
raise e
else:
all_res = {}
for img_path in pdf_handler.img_path_list:
ocr_res = self.ocr_process(img_path, classify)
all_res[img_path] = ocr_res
self.res_process(all_res, classify, excel_path)
self.images_process(pdf_handler.img_path_list, classify, excel_path)
shutil.move(path, pdf_save_path)
def tif_process(self, name, path, classify, img_output_dir, wb_output_dir, tiff_output_dir):
......@@ -250,23 +243,18 @@ class Command(BaseCommand, LoggerMixin):
self.log_base, path, traceback.format_exc()))
raise e
else:
all_res = {}
for img_path in tiff_handler.img_path_list:
ocr_res = self.ocr_process(img_path, classify)
all_res[img_path] = ocr_res
self.res_process(all_res, classify, excel_path)
self.images_process(tiff_handler.img_path_list, classify, excel_path)
shutil.move(path, tiff_save_path)
def img_process(self, name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir):
ocr_res = self.ocr_process(path, classify)
all_res = {path: ocr_res}
try:
img_save_path, excel_path, _ = self.get_path(name, img_output_dir, wb_output_dir, pdf_output_dir)
except Exception as e:
self.folder_log.error('{0} [get path error] [path={1}] [error={2}]'.format(
self.log_base, path, traceback.format_exc()))
else:
ocr_res = self.ocr_process(path, classify)
all_res = {path: ocr_res}
self.res_process(all_res, classify, excel_path)
shutil.move(path, img_save_path)
......@@ -312,9 +300,9 @@ class Command(BaseCommand, LoggerMixin):
try:
if os.path.isfile(path):
self.folder_log.info('{0} [file start] [path={1}]'.format(self.log_base, path))
if name.endswith('.pdf'):
if name.endswith('.pdf') or name.endswith('.PDF'):
self.pdf_process(name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir)
elif name.endswith('.tif'):
elif name.endswith('.tif') or name.endswith('.TIF'):
self.tif_process(name, path, classify, img_output_dir, wb_output_dir, tiff_output_dir)
else:
self.img_process(name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir)
......
......@@ -702,6 +702,31 @@ class BSWorkbook(Workbook):
if field_str is not None:
count_list.append((field_str, count))
def ltgt_build(self, label, result_dict):
ws = self.create_sheet(label)
rebuild_res = {}
for key, value in result_dict.items():
if isinstance(value, list):
value_list = [dict_item.get('words') for dict_item in value]
ws.append((key, '、'.join(value_list)))
rebuild_res[key] = '、'.join(value_list)
elif isinstance(value, dict):
if 'words' in value:
ws.append((key, value['words']))
rebuild_res[key] = value['words']
else:
for sub_key, sub_value in value.items():
if isinstance(sub_value, dict):
ws.append(('{0}: {1}'.format(key, sub_key), sub_value.get('words', '')))
rebuild_res['{0}: {1}'.format(key, sub_key)] = sub_value.get('words', '')
else:
ws.append(('{0}: {1}'.format(key, sub_key), sub_value))
rebuild_res['{0}: {1}'.format(key, sub_key)] = sub_value
else:
ws.append((key, value))
rebuild_res[key] = value
return rebuild_res
def simple_license_rebuild(self, license_summary, document_scheme):
# for ic_license_dict in license_summary.get(consts.IC_CLASSIFY, []):
# if ic_license_dict.get('类别') == '1':
......
......@@ -225,3 +225,13 @@ class PDFHandler:
else:
self.merge_il(pdf, pno, il)
self.img_count = len(self.img_path_list)
def extract_page_image(self):
self.img_path_list = []
self.xref_set = set()
os.makedirs(self.img_dir_path, exist_ok=True)
with fitz.Document(self.path) as pdf:
for pno in range(pdf.pageCount):
page = pdf.loadPage(pno)
self.page_to_png(page)
self.img_count = len(self.img_path_list)
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!