b8745dc6 by 周伟奇

Merge branch 'feature/ltgt' into feature/0611

2 parents d78669c5 f63f9c2a
...@@ -1448,4 +1448,16 @@ SE_SECOND_ID_FIELD_MAPPING = { ...@@ -1448,4 +1448,16 @@ SE_SECOND_ID_FIELD_MAPPING = {
1448 1448
1449 HEAD_LIST = ['Info', 'Index', 'License', 'Field', 'Input', 'OCR', 'Result', 'Position', 'Image', 'errorType'] 1449 HEAD_LIST = ['Info', 'Index', 'License', 'Field', 'Input', 'OCR', 'Result', 'Position', 'Image', 'errorType']
1450 1450
1451 # ----------------litigation------------------------
1452 IC_FIELD_ORDER_2 = (('姓名', '姓名'),
1453 ('公民身份号码', '公民身份号码'),
1454 ('出生年月', '出生年月'),
1455 ('住址', '住址'),
1456 ('性别', '性别'),
1457 ('民族', '民族'),)
1458 IC_FIELD_ORDER_3 = (('有效期限', '有效期限'), ('签发机关', '签发机关'),)
1459
1460 BC_FIELD_ORDER_2 = (('BankName', '发卡行名称'),
1461 ('CardNum', '银行卡号'),
1462 ('CardType', '银行卡类型'),)
1451 1463
......
...@@ -13,6 +13,9 @@ class OCR2Exception(Exception): ...@@ -13,6 +13,9 @@ class OCR2Exception(Exception):
13 class OCR4Exception(Exception): 13 class OCR4Exception(Exception):
14 pass 14 pass
15 15
16 class LTGTException(Exception):
17 pass
18
16 19
17 class GCAPException(Exception): 20 class GCAPException(Exception):
18 pass 21 pass
......
...@@ -61,13 +61,11 @@ class Command(BaseCommand, LoggerMixin): ...@@ -61,13 +61,11 @@ class Command(BaseCommand, LoggerMixin):
61 def signal_handler(self, sig, frame): 61 def signal_handler(self, sig, frame):
62 self.switch = False # 停止处理文件 62 self.switch = False # 停止处理文件
63 63
64 def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path): 64 def license1_process(self, ocr_data, license_summary, classify, img_path):
65 # 类别:'0'身份证, '1'居住证 65 # 类别:'0'身份证, '1'居住证
66 license_data = ocr_data.get('data', []) 66 license_data = ocr_data.get('data', [])
67 if not license_data: 67 if not license_data:
68 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY))
69 return 68 return
70 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS))
71 if classify == consts.MVC_CLASSIFY: # 车辆登记证 3/4页结果整合 69 if classify == consts.MVC_CLASSIFY: # 车辆登记证 3/4页结果整合
72 for mvc_dict in license_data: 70 for mvc_dict in license_data:
73 try: 71 try:
...@@ -154,29 +152,21 @@ class Command(BaseCommand, LoggerMixin): ...@@ -154,29 +152,21 @@ class Command(BaseCommand, LoggerMixin):
154 def res_process(self, all_res, classify, excel_path): 152 def res_process(self, all_res, classify, excel_path):
155 try: 153 try:
156 license_summary = {} 154 license_summary = {}
157 res_list = []
158 155
159 if not all_res: 156 if not all_res:
160 return 157 return
161 else: 158 else:
162 for img_path, ocr_res in all_res.items(): 159 for img_path, ocr_res in all_res.items():
163 img_name, pno, ino = self.parse_img_path(img_path) 160 # img_name, pno, ino = self.parse_img_path(img_path)
164 part_idx = 1 161 # part_idx = 1
165 162
166 if isinstance(ocr_res, dict): 163 if isinstance(ocr_res, dict):
167 if ocr_res.get('code') == 1: 164 if ocr_res.get('code') == 1:
168 data_list = ocr_res.get('data', []) 165 data_list = ocr_res.get('data', [])
169 if isinstance(data_list, list): 166 if isinstance(data_list, list):
170 for part_idx, ocr_data in enumerate(data_list): 167 for ocr_data in data_list:
171 part_idx = part_idx + 1 168 # part_idx = part_idx + 1
172 self.license1_process(ocr_data, license_summary, classify, 169 self.license1_process(ocr_data, license_summary, classify, img_path)
173 res_list, pno, ino, part_idx, img_path)
174 else:
175 res_list.append((pno, ino, part_idx, consts.RES_FAILED_3))
176 else:
177 res_list.append((pno, ino, part_idx, consts.RES_FAILED))
178 else:
179 res_list.append((pno, ino, part_idx, consts.RES_FAILED))
180 170
181 wb = BSWorkbook(set(), set(), set(), set(), set()) 171 wb = BSWorkbook(set(), set(), set(), set(), set())
182 wb.simple_license_rebuild(license_summary, consts.DOC_SCHEME_LIST[0]) 172 wb.simple_license_rebuild(license_summary, consts.DOC_SCHEME_LIST[0])
...@@ -217,6 +207,13 @@ class Command(BaseCommand, LoggerMixin): ...@@ -217,6 +207,13 @@ class Command(BaseCommand, LoggerMixin):
217 else: 207 else:
218 self.folder_log.warn('{0} [ocr failed] [img_path={1}]'.format(self.log_base, img_path)) 208 self.folder_log.warn('{0} [ocr failed] [img_path={1}]'.format(self.log_base, img_path))
219 209
210 def images_process(self, img_path_list, classify, excel_path):
211 all_res = {}
212 for img_path in img_path_list:
213 ocr_res = self.ocr_process(img_path, classify)
214 all_res[img_path] = ocr_res
215 self.res_process(all_res, classify, excel_path)
216
220 def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir): 217 def pdf_process(self, name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir):
221 if os.path.exists(path): 218 if os.path.exists(path):
222 try: 219 try:
...@@ -230,11 +227,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -230,11 +227,7 @@ class Command(BaseCommand, LoggerMixin):
230 self.log_base, path, traceback.format_exc())) 227 self.log_base, path, traceback.format_exc()))
231 raise e 228 raise e
232 else: 229 else:
233 all_res = {} 230 self.images_process(pdf_handler.img_path_list, classify, excel_path)
234 for img_path in pdf_handler.img_path_list:
235 ocr_res = self.ocr_process(img_path, classify)
236 all_res[img_path] = ocr_res
237 self.res_process(all_res, classify, excel_path)
238 shutil.move(path, pdf_save_path) 231 shutil.move(path, pdf_save_path)
239 232
240 def tif_process(self, name, path, classify, img_output_dir, wb_output_dir, tiff_output_dir): 233 def tif_process(self, name, path, classify, img_output_dir, wb_output_dir, tiff_output_dir):
...@@ -250,23 +243,18 @@ class Command(BaseCommand, LoggerMixin): ...@@ -250,23 +243,18 @@ class Command(BaseCommand, LoggerMixin):
250 self.log_base, path, traceback.format_exc())) 243 self.log_base, path, traceback.format_exc()))
251 raise e 244 raise e
252 else: 245 else:
253 all_res = {} 246 self.images_process(tiff_handler.img_path_list, classify, excel_path)
254 for img_path in tiff_handler.img_path_list:
255 ocr_res = self.ocr_process(img_path, classify)
256 all_res[img_path] = ocr_res
257 self.res_process(all_res, classify, excel_path)
258 shutil.move(path, tiff_save_path) 247 shutil.move(path, tiff_save_path)
259 248
260 def img_process(self, name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir): 249 def img_process(self, name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir):
261 ocr_res = self.ocr_process(path, classify)
262 all_res = {path: ocr_res}
263
264 try: 250 try:
265 img_save_path, excel_path, _ = self.get_path(name, img_output_dir, wb_output_dir, pdf_output_dir) 251 img_save_path, excel_path, _ = self.get_path(name, img_output_dir, wb_output_dir, pdf_output_dir)
266 except Exception as e: 252 except Exception as e:
267 self.folder_log.error('{0} [get path error] [path={1}] [error={2}]'.format( 253 self.folder_log.error('{0} [get path error] [path={1}] [error={2}]'.format(
268 self.log_base, path, traceback.format_exc())) 254 self.log_base, path, traceback.format_exc()))
269 else: 255 else:
256 ocr_res = self.ocr_process(path, classify)
257 all_res = {path: ocr_res}
270 self.res_process(all_res, classify, excel_path) 258 self.res_process(all_res, classify, excel_path)
271 shutil.move(path, img_save_path) 259 shutil.move(path, img_save_path)
272 260
...@@ -312,9 +300,9 @@ class Command(BaseCommand, LoggerMixin): ...@@ -312,9 +300,9 @@ class Command(BaseCommand, LoggerMixin):
312 try: 300 try:
313 if os.path.isfile(path): 301 if os.path.isfile(path):
314 self.folder_log.info('{0} [file start] [path={1}]'.format(self.log_base, path)) 302 self.folder_log.info('{0} [file start] [path={1}]'.format(self.log_base, path))
315 if name.endswith('.pdf'): 303 if name.endswith('.pdf') or name.endswith('.PDF'):
316 self.pdf_process(name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir) 304 self.pdf_process(name, path, classify, img_output_dir, wb_output_dir, pdf_output_dir)
317 elif name.endswith('.tif'): 305 elif name.endswith('.tif') or name.endswith('.TIF'):
318 self.tif_process(name, path, classify, img_output_dir, wb_output_dir, tiff_output_dir) 306 self.tif_process(name, path, classify, img_output_dir, wb_output_dir, tiff_output_dir)
319 else: 307 else:
320 self.img_process(name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir) 308 self.img_process(name, path, classify, wb_output_dir, img_output_dir, pdf_output_dir)
......
...@@ -702,6 +702,31 @@ class BSWorkbook(Workbook): ...@@ -702,6 +702,31 @@ class BSWorkbook(Workbook):
702 if field_str is not None: 702 if field_str is not None:
703 count_list.append((field_str, count)) 703 count_list.append((field_str, count))
704 704
705 def ltgt_build(self, label, result_dict):
706 ws = self.create_sheet(label)
707 rebuild_res = {}
708 for key, value in result_dict.items():
709 if isinstance(value, list):
710 value_list = [dict_item.get('words') for dict_item in value]
711 ws.append((key, '、'.join(value_list)))
712 rebuild_res[key] = '、'.join(value_list)
713 elif isinstance(value, dict):
714 if 'words' in value:
715 ws.append((key, value['words']))
716 rebuild_res[key] = value['words']
717 else:
718 for sub_key, sub_value in value.items():
719 if isinstance(sub_value, dict):
720 ws.append(('{0}: {1}'.format(key, sub_key), sub_value.get('words', '')))
721 rebuild_res['{0}: {1}'.format(key, sub_key)] = sub_value.get('words', '')
722 else:
723 ws.append(('{0}: {1}'.format(key, sub_key), sub_value))
724 rebuild_res['{0}: {1}'.format(key, sub_key)] = sub_value
725 else:
726 ws.append((key, value))
727 rebuild_res[key] = value
728 return rebuild_res
729
705 def simple_license_rebuild(self, license_summary, document_scheme): 730 def simple_license_rebuild(self, license_summary, document_scheme):
706 # for ic_license_dict in license_summary.get(consts.IC_CLASSIFY, []): 731 # for ic_license_dict in license_summary.get(consts.IC_CLASSIFY, []):
707 # if ic_license_dict.get('类别') == '1': 732 # if ic_license_dict.get('类别') == '1':
......
...@@ -225,3 +225,13 @@ class PDFHandler: ...@@ -225,3 +225,13 @@ class PDFHandler:
225 else: 225 else:
226 self.merge_il(pdf, pno, il) 226 self.merge_il(pdf, pno, il)
227 self.img_count = len(self.img_path_list) 227 self.img_count = len(self.img_path_list)
228
229 def extract_page_image(self):
230 self.img_path_list = []
231 self.xref_set = set()
232 os.makedirs(self.img_dir_path, exist_ok=True)
233 with fitz.Document(self.path) as pdf:
234 for pno in range(pdf.pageCount):
235 page = pdf.loadPage(pno)
236 self.page_to_png(page)
237 self.img_count = len(self.img_path_list)
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!