98e8884c by chenyao

ocr_process添加try-except处理前半部分

1 parent 75d18a3c
...@@ -178,171 +178,180 @@ class Command(BaseCommand, LoggerMixin): ...@@ -178,171 +178,180 @@ class Command(BaseCommand, LoggerMixin):
178 # self.online_log.info('{0} [edms download success] [pdf_path={1}]'.format(self.log_base, pdf_path)) 178 # self.online_log.info('{0} [edms download success] [pdf_path={1}]'.format(self.log_base, pdf_path))
179 179
180 def bs_process(self, wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx): 180 def bs_process(self, wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx):
181 sheets = ocr_data.get('data', []) 181 # 添加 try-except 处理
182 if not sheets: 182 try:
183 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) 183 sheets = ocr_data.get('data', [])
184 return 184 if not sheets:
185 # confidence = ocr_data.get('confidence', 1) 185 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY))
186 img_name = 'page_{0}_img_{1}_{2}'.format(pno, ino, part_idx) 186 return
187 cells_exists = False 187 # confidence = ocr_data.get('confidence', 1)
188 for i, sheet in enumerate(sheets): 188 img_name = 'page_{0}_img_{1}_{2}'.format(pno, ino, part_idx)
189 cells = sheet.get('cells') 189 cells_exists = False
190 if not cells: 190 for i, sheet in enumerate(sheets):
191 continue 191 cells = sheet.get('cells')
192 cells_exists = True 192 if not cells:
193 sheet_name = '{0}_{1}'.format(img_name, i) 193 continue
194 ws = wb.create_sheet(sheet_name) 194 cells_exists = True
195 for cell in cells: 195 sheet_name = '{0}_{1}'.format(img_name, i)
196 c1 = cell.get('start_column') 196 ws = wb.create_sheet(sheet_name)
197 r1 = cell.get('start_row') 197 for cell in cells:
198 words = cell.get('words') 198 c1 = cell.get('start_column')
199 ws.cell(row=r1 + 1, column=c1 + 1, value=words) 199 r1 = cell.get('start_row')
200 200 words = cell.get('words')
201 # 真伪 201 ws.cell(row=r1 + 1, column=c1 + 1, value=words)
202 verify_info = [] 202
203 verify_dict = sheet.get('verify', {}) 203 # 真伪
204 if verify_dict.get('verify_res') == 'fake': 204 verify_info = []
205 verify_info.extend(verify_dict.get('verify_info', [])) 205 verify_dict = sheet.get('verify', {})
206 206 if verify_dict.get('verify_res') == 'fake':
207 # ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'] 207 verify_info.extend(verify_dict.get('verify_info', []))
208 summary = sheet.get('summary') 208
209 card = summary[1] 209 # ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间']
210 if card is None: 210 summary = sheet.get('summary')
211 classify_dict = unknown_summary.setdefault(classify, {}) 211 card = summary[1]
212 role = consts.UNKNOWN_ROLE if summary[0] is None else summary[0] 212 if card is None:
213 role_dict = classify_dict.setdefault(role, {}) 213 classify_dict = unknown_summary.setdefault(classify, {})
214 role_dict['classify'] = classify 214 role = consts.UNKNOWN_ROLE if summary[0] is None else summary[0]
215 role_dict['role'] = role 215 role_dict = classify_dict.setdefault(role, {})
216 role_dict.setdefault('sheet', []).append(sheet_name) 216 role_dict['classify'] = classify
217 # role_dict.setdefault('confidence', []).append(confidence) 217 role_dict['role'] = role
218 code_list = role_dict.setdefault('code', []) 218 role_dict.setdefault('sheet', []).append(sheet_name)
219 pt_list = role_dict.setdefault('print_time', []) 219 # role_dict.setdefault('confidence', []).append(confidence)
220 sd_list = role_dict.setdefault('start_date', []) 220 code_list = role_dict.setdefault('code', [])
221 ed_list = role_dict.setdefault('end_date', []) 221 pt_list = role_dict.setdefault('print_time', [])
222 verify_list = role_dict.setdefault('verify', []) 222 sd_list = role_dict.setdefault('start_date', [])
223 if summary[3] is not None: 223 ed_list = role_dict.setdefault('end_date', [])
224 code_list.append((summary[2], summary[3])) 224 verify_list = role_dict.setdefault('verify', [])
225 if summary[4] is not None: 225 if summary[3] is not None:
226 pt_list.append(summary[4]) 226 code_list.append((summary[2], summary[3]))
227 if summary[5] is not None: 227 if summary[4] is not None:
228 sd_list.append(summary[5]) 228 pt_list.append(summary[4])
229 if summary[6] is not None: 229 if summary[5] is not None:
230 ed_list.append(summary[6]) 230 sd_list.append(summary[5])
231 if len(verify_info) > 0: 231 if summary[6] is not None:
232 verify_list.append( 232 ed_list.append(summary[6])
233 (pno, ino, '、'.join(verify_info)) 233 if len(verify_info) > 0:
234 ) 234 verify_list.append(
235 (pno, ino, '、'.join(verify_info))
236 )
237 else:
238 card_dict = bs_summary.setdefault(card, {})
239 card_dict['count'] = card_dict.get('count', 0) + 1
240 card_dict.setdefault('classify', []).append(classify)
241 # card_dict.setdefault('confidence', []).append(confidence)
242 card_dict.setdefault('sheet', []).append(sheet_name)
243 role_list = card_dict.setdefault('role', [])
244 role_set = card_dict.setdefault('role_set', set())
245 code_list = card_dict.setdefault('code', [])
246 pt_list = card_dict.setdefault('print_time', [])
247 sd_list = card_dict.setdefault('start_date', [])
248 ed_list = card_dict.setdefault('end_date', [])
249 verify_list = card_dict.setdefault('verify', [])
250 if summary[0] is not None:
251 role_list.append(summary[0])
252 role_set.add(summary[0])
253 if summary[3] is not None:
254 code_list.append((summary[2], summary[3]))
255 if summary[4] is not None:
256 pt_list.append(summary[4])
257 if summary[5] is not None:
258 sd_list.append(summary[5])
259 if summary[6] is not None:
260 ed_list.append(summary[6])
261 if len(verify_info) > 0:
262 verify_list.append(
263 (pno, ino, '、'.join(verify_info))
264 )
265
266 if cells_exists:
267 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS))
235 else: 268 else:
236 card_dict = bs_summary.setdefault(card, {}) 269 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY))
237 card_dict['count'] = card_dict.get('count', 0) + 1 270 except Exception as e:
238 card_dict.setdefault('classify', []).append(classify) 271 res_list.append((pno, ino, part_idx, consts.RES_FAILED))
239 # card_dict.setdefault('confidence', []).append(confidence) 272 self.online_log.error('{0} [bs_process error] [error={1}]'.format(self.log_base, traceback.format_exc()))
240 card_dict.setdefault('sheet', []).append(sheet_name)
241 role_list = card_dict.setdefault('role', [])
242 role_set = card_dict.setdefault('role_set', set())
243 code_list = card_dict.setdefault('code', [])
244 pt_list = card_dict.setdefault('print_time', [])
245 sd_list = card_dict.setdefault('start_date', [])
246 ed_list = card_dict.setdefault('end_date', [])
247 verify_list = card_dict.setdefault('verify', [])
248 if summary[0] is not None:
249 role_list.append(summary[0])
250 role_set.add(summary[0])
251 if summary[3] is not None:
252 code_list.append((summary[2], summary[3]))
253 if summary[4] is not None:
254 pt_list.append(summary[4])
255 if summary[5] is not None:
256 sd_list.append(summary[5])
257 if summary[6] is not None:
258 ed_list.append(summary[6])
259 if len(verify_info) > 0:
260 verify_list.append(
261 (pno, ino, '、'.join(verify_info))
262 )
263
264 if cells_exists:
265 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS))
266 else:
267 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY))
268 273
269 def contract_process(self, classify, ocr_data, contract_result, res_list, pno, ino, part_idx, 274 def contract_process(self, classify, ocr_data, contract_result, res_list, pno, ino, part_idx,
270 img_path, contract_result_compare): 275 img_path, contract_result_compare):
271 contract_dict = ocr_data.get('data') 276 # 添加 try-except 处理
272 if not contract_dict or contract_dict.get('page_num') is None or contract_dict.get('page_info') is None: 277 try:
273 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) 278 contract_dict = ocr_data.get('data')
274 return 279 if not contract_dict or contract_dict.get('page_num') is None or contract_dict.get('page_info') is None:
275 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS)) 280 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY))
276 page_num = contract_dict.get('page_num') 281 return
277 if page_num.startswith('page_'): 282 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS))
278 page_num_only = page_num.split('_')[-1] 283 page_num = contract_dict.get('page_num')
279 else: 284 if page_num.startswith('page_'):
280 page_num_only = page_num 285 page_num_only = page_num.split('_')[-1]
281 rebuild_page_info = []
282 text_key = 'words'
283 position_key = 'position'
284 for key, value in contract_dict.get('page_info', {}).items():
285 if value is None:
286 rebuild_page_info.append((key, ))
287 elif text_key in value:
288 if value[text_key] is None:
289 rebuild_page_info.append((key,))
290 elif isinstance(value[text_key], str):
291 rebuild_page_info.append((key, value[text_key]))
292 elif isinstance(value[text_key], list):
293 rebuild_page_info.append((key,))
294 for row_list in value[text_key]:
295 rebuild_page_info.append(row_list)
296 else: 286 else:
297 rebuild_page_info.append((key,)) 287 page_num_only = page_num
298 for sub_key, sub_value in value.items(): 288 rebuild_page_info = []
299 if sub_value is None: 289 text_key = 'words'
300 rebuild_page_info.append((sub_key,)) 290 position_key = 'position'
301 elif text_key in sub_value: 291 for key, value in contract_dict.get('page_info', {}).items():
302 if sub_value[text_key] is None: 292 if value is None:
303 rebuild_page_info.append((sub_key,)) 293 rebuild_page_info.append((key, ))
304 elif isinstance(sub_value[text_key], str): 294 elif text_key in value:
305 rebuild_page_info.append((sub_key, sub_value[text_key])) 295 if value[text_key] is None:
306 elif isinstance(sub_value[text_key], list): 296 rebuild_page_info.append((key,))
297 elif isinstance(value[text_key], str):
298 rebuild_page_info.append((key, value[text_key]))
299 elif isinstance(value[text_key], list):
300 rebuild_page_info.append((key,))
301 for row_list in value[text_key]:
302 rebuild_page_info.append(row_list)
303 else:
304 rebuild_page_info.append((key,))
305 for sub_key, sub_value in value.items():
306 if sub_value is None:
307 rebuild_page_info.append((sub_key,)) 307 rebuild_page_info.append((sub_key,))
308 for row_list in sub_value[text_key]: 308 elif text_key in sub_value:
309 rebuild_page_info.append(row_list) 309 if sub_value[text_key] is None:
310 rebuild_page_info.append((sub_key,))
311 elif isinstance(sub_value[text_key], str):
312 rebuild_page_info.append((sub_key, sub_value[text_key]))
313 elif isinstance(sub_value[text_key], list):
314 rebuild_page_info.append((sub_key,))
315 for row_list in sub_value[text_key]:
316 rebuild_page_info.append(row_list)
317
318 contract_result.setdefault(classify, dict()).setdefault(page_num_only, []).append(rebuild_page_info)
319
320 page_compare_dict = {
321 consts.IMG_PATH_KEY: img_path,
322 consts.ALL_POSITION_KEY: {},
323 }
324 for key, value in contract_dict.get('page_info', {}).items():
325 if not isinstance(value, dict):
326 continue
327 elif text_key in value:
328 position_list = value.get(position_key, [])
329 page_compare_dict[consts.ALL_POSITION_KEY][key] = position_list if isinstance(position_list, list) else []
330
331 if value[text_key] is None:
332 page_compare_dict[key] = ''
333 elif isinstance(value[text_key], str):
334 page_compare_dict[key] = value[text_key]
335 elif isinstance(value[text_key], list):
336 page_compare_dict[key] = value[text_key]
337 else:
338 page_compare_dict[key] = {}
339 page_compare_dict[consts.ALL_POSITION_KEY][key] = {}
340 for sub_key, sub_value in value.items():
341 position_list = sub_value.get(position_key, [])
342 page_compare_dict[consts.ALL_POSITION_KEY][key][sub_key] = position_list if isinstance(
343 position_list, list) else []
310 344
311 contract_result.setdefault(classify, dict()).setdefault(page_num_only, []).append(rebuild_page_info) 345 if sub_value[text_key] is None:
346 page_compare_dict[key][sub_key] = ''
347 elif isinstance(sub_value[text_key], str):
348 page_compare_dict[key][sub_key] = sub_value[text_key]
312 349
313 page_compare_dict = { 350 contract_result_compare.setdefault(classify, dict())[consts.ASP_KEY] = contract_dict.get(consts.ASP_KEY, False)
314 consts.IMG_PATH_KEY: img_path, 351 # "position" = [xmin, ymin, xmax, ymax]
315 consts.ALL_POSITION_KEY: {}, 352 contract_result_compare.setdefault(classify, dict())[page_num_only] = page_compare_dict
316 } 353 except Exception as e:
317 for key, value in contract_dict.get('page_info', {}).items(): 354 self.online_log.error('{0} [contract_process error] [error={1}]'.format(self.log_base, traceback.format_exc()))
318 if not isinstance(value, dict):
319 continue
320 elif text_key in value:
321 position_list = value.get(position_key, [])
322 page_compare_dict[consts.ALL_POSITION_KEY][key] = position_list if isinstance(position_list, list) else []
323
324 if value[text_key] is None:
325 page_compare_dict[key] = ''
326 elif isinstance(value[text_key], str):
327 page_compare_dict[key] = value[text_key]
328 elif isinstance(value[text_key], list):
329 page_compare_dict[key] = value[text_key]
330 else:
331 page_compare_dict[key] = {}
332 page_compare_dict[consts.ALL_POSITION_KEY][key] = {}
333 for sub_key, sub_value in value.items():
334 position_list = sub_value.get(position_key, [])
335 page_compare_dict[consts.ALL_POSITION_KEY][key][sub_key] = position_list if isinstance(
336 position_list, list) else []
337
338 if sub_value[text_key] is None:
339 page_compare_dict[key][sub_key] = ''
340 elif isinstance(sub_value[text_key], str):
341 page_compare_dict[key][sub_key] = sub_value[text_key]
342
343 contract_result_compare.setdefault(classify, dict())[consts.ASP_KEY] = contract_dict.get(consts.ASP_KEY, False)
344 # "position" = [xmin, ymin, xmax, ymax]
345 contract_result_compare.setdefault(classify, dict())[page_num_only] = page_compare_dict
346 355
347 @staticmethod 356 @staticmethod
348 def rebuild_position(src_position): 357 def rebuild_position(src_position):
...@@ -372,499 +381,525 @@ class Command(BaseCommand, LoggerMixin): ...@@ -372,499 +381,525 @@ class Command(BaseCommand, LoggerMixin):
372 381
373 def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path, do_dda, 382 def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path, do_dda,
374 dda_id_bc_mapping): 383 dda_id_bc_mapping):
375 # 类别:'0'身份证, '1'居住证 384 # 添加 try-except 处理
376 license_data = ocr_data.get('data') 385 try:
377 if not license_data: 386 # 类别:'0'身份证, '1'居住证
378 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) 387 license_data = ocr_data.get('data')
379 return 388 if not license_data:
380 if isinstance(license_data, dict):
381 pre, suf = os.path.splitext(img_path)
382 base64_img = license_data.pop('base64_img', '')
383 is_save = True if len(base64_img) > 0 else False
384 section_img_path = '{0}_{1}{2}'.format(pre, part_idx, suf) if is_save else img_path
385 if is_save:
386 try:
387 with open(section_img_path, "wb") as fh:
388 fh.write(base64.b64decode(base64_img.encode()))
389 except Exception as e:
390 self.online_log.warn(
391 '{0} [section img save failed] [img_path={1}]'
392 ' [part_idx={2}]'.format(self.log_base, img_path, part_idx))
393 else:
394 is_save = False
395 section_img_path = img_path
396
397 # 保单
398 if classify == consts.INSURANCE_CLASSIFY:
399 product_result = ['', '', '']
400 product_result_position = [dict(), dict(), dict()]
401 min_char_count_1 = 1000
402 min_char_count_2 = 1000
403 for product in license_data.get('result', {}).get('productList', []):
404 name = product.get('name', {}).get('words', '')
405 if name.find('机动车损失') != -1 or name.find('汽车损失') != -1 or name.find('车损险') != -1 or \
406 name.find('车损失险') != -1 or name.find('车损失保险') != -1:
407 if len(name) < min_char_count_1:
408 min_char_count_1 = len(name)
409 product_result[0] = product.get('coverage', {}).get('words', '')
410 product_result[2] = product.get('deductible_franchise', {}).get('words', '')
411 product_result_position[0] = self.rebuild_position(product.get('coverage', {}).get(
412 'position', {}))
413 product_result_position[2] = self.rebuild_position(product.get('deductible_franchise', {}).get(
414 'position', {}))
415 elif name.find('第三者责任') != -1:
416 if len(name) < min_char_count_2:
417 min_char_count_2 = len(name)
418 product_result[1] = product.get('coverage', {}).get('words', '')
419 product_result_position[1] = self.rebuild_position(product.get('coverage', {}).get(
420 'position', {}))
421
422 special_str = license_data.get('result', {}).get('1stBeneficiary', {}).get('words', '')
423 special = '无'
424 if special_str.find('宝马') != -1 or special_str.find('先锋国际融资租赁有限公司') != -1:
425 special = '有'
426 insurance_ocr_result = {
427 '被保险人姓名': license_data.get('result', {}).get('insured', {}).get('name', {}).get('words', ''),
428 '被保险人证件号码': license_data.get('result', {}).get('insured', {}).get('certiCode', {}).get('words', ''),
429 '车架号': license_data.get('result', {}).get('vehicle', {}).get('VIN', {}).get('words', ''),
430 '机动车损失保险金额': product_result[0],
431 '机动车第三者责任保险金额': product_result[1],
432 '机动车损失保险绝对免赔率/绝对免赔额': product_result[2],
433 '保险费合计': license_data.get('result', {}).get('premiumSum', {}).get('words', ''),
434 '保险起始日期': license_data.get('result', {}).get('startDate', {}).get('words', ''),
435 '保险截止日期': license_data.get('result', {}).get('endDate', {}).get('words', ''),
436 '保单章': license_data.get('result', {}).get('seal', {}).get('words', ''),
437 '特别约定第一受益人': special,
438 consts.IMG_PATH_KEY: img_path,
439 consts.SECTION_IMG_PATH_KEY: section_img_path,
440 }
441
442 position_dict = {
443 '被保险人姓名': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
444 'insured', {}).get('name', {}).get('position', {}))},
445 '被保险人证件号码': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
446 'insured', {}).get('certiCode', {}).get('position', {}))},
447 '车架号': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
448 'vehicle', {}).get('VIN', {}).get('position', {}))},
449 '机动车损失保险金额': {consts.FIELD_POSITION_KEY: product_result_position[0]},
450 '机动车第三者责任保险金额': {consts.FIELD_POSITION_KEY: product_result_position[1]},
451 '机动车损失保险绝对免赔率/绝对免赔额': {consts.FIELD_POSITION_KEY: product_result_position[2]},
452 '保险费合计': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
453 'premiumSum', {}).get('position', {}))},
454 '保险起始日期': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
455 'startDate', {}).get('position', {}))},
456 '保险截止日期': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
457 'endDate', {}).get('position', {}))},
458 '保单章': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
459 'seal', {}).get('position', {}))},
460 '特别约定第一受益人': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
461 '1stBeneficiary', {}).get('position', {}))},
462 }
463 insurance_ocr_result[consts.ALL_POSITION_KEY] = position_dict
464 license_summary.setdefault(classify, []).append(insurance_ocr_result)
465 # DDA
466 elif classify == consts.DDA_CLASSIFY:
467 pro = ocr_data.get('confidence', 0)
468 if pro < consts.DDA_PRO_MIN:
469 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) 389 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY))
470 return 390 return
471 dda_ocr_result = {} 391 if isinstance(license_data, dict):
472 position_dict = {} 392 pre, suf = os.path.splitext(img_path)
473 for key, value in license_data.get('result', {}).items(): 393 base64_img = license_data.pop('base64_img', '')
474 dda_ocr_result[key] = value.get('words', '') 394 is_save = True if len(base64_img) > 0 else False
475 position_dict[key] = { 395 section_img_path = '{0}_{1}{2}'.format(pre, part_idx, suf) if is_save else img_path
476 consts.FIELD_POSITION_KEY: value.get('position', {}) 396 if is_save:
397 try:
398 with open(section_img_path, "wb") as fh:
399 fh.write(base64.b64decode(base64_img.encode()))
400 except Exception as e:
401 self.online_log.warn(
402 '{0} [section img save failed] [img_path={1}]'
403 ' [part_idx={2}]'.format(self.log_base, img_path, part_idx))
404 else:
405 is_save = False
406 section_img_path = img_path
407
408 # 保单
409 if classify == consts.INSURANCE_CLASSIFY:
410 product_result = ['', '', '']
411 product_result_position = [dict(), dict(), dict()]
412 min_char_count_1 = 1000
413 min_char_count_2 = 1000
414 for product in license_data.get('result', {}).get('productList', []):
415 name = product.get('name', {}).get('words', '')
416 if name.find('机动车损失') != -1 or name.find('汽车损失') != -1 or name.find('车损险') != -1 or \
417 name.find('车损失险') != -1 or name.find('车损失保险') != -1:
418 if len(name) < min_char_count_1:
419 min_char_count_1 = len(name)
420 product_result[0] = product.get('coverage', {}).get('words', '')
421 product_result[2] = product.get('deductible_franchise', {}).get('words', '')
422 product_result_position[0] = self.rebuild_position(product.get('coverage', {}).get(
423 'position', {}))
424 product_result_position[2] = self.rebuild_position(product.get('deductible_franchise', {}).get(
425 'position', {}))
426 elif name.find('第三者责任') != -1:
427 if len(name) < min_char_count_2:
428 min_char_count_2 = len(name)
429 product_result[1] = product.get('coverage', {}).get('words', '')
430 product_result_position[1] = self.rebuild_position(product.get('coverage', {}).get(
431 'position', {}))
432
433 special_str = license_data.get('result', {}).get('1stBeneficiary', {}).get('words', '')
434 special = '无'
435 if special_str.find('宝马') != -1 or special_str.find('先锋国际融资租赁有限公司') != -1:
436 special = '有'
437 insurance_ocr_result = {
438 '被保险人姓名': license_data.get('result', {}).get('insured', {}).get('name', {}).get('words', ''),
439 '被保险人证件号码': license_data.get('result', {}).get('insured', {}).get('certiCode', {}).get('words', ''),
440 '车架号': license_data.get('result', {}).get('vehicle', {}).get('VIN', {}).get('words', ''),
441 '机动车损失保险金额': product_result[0],
442 '机动车第三者责任保险金额': product_result[1],
443 '机动车损失保险绝对免赔率/绝对免赔额': product_result[2],
444 '保险费合计': license_data.get('result', {}).get('premiumSum', {}).get('words', ''),
445 '保险起始日期': license_data.get('result', {}).get('startDate', {}).get('words', ''),
446 '保险截止日期': license_data.get('result', {}).get('endDate', {}).get('words', ''),
447 '保单章': license_data.get('result', {}).get('seal', {}).get('words', ''),
448 '特别约定第一受益人': special,
449 consts.IMG_PATH_KEY: img_path,
450 consts.SECTION_IMG_PATH_KEY: section_img_path,
477 } 451 }
478 dda_ocr_result[consts.DDA_IMG_PATH] = img_path 452
479 dda_ocr_result[consts.DDA_PRO] = pro 453 position_dict = {
480 dda_ocr_result[consts.IMG_PATH_KEY] = img_path 454 '被保险人姓名': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
481 dda_ocr_result[consts.SECTION_IMG_PATH_KEY] = section_img_path 455 'insured', {}).get('name', {}).get('position', {}))},
482 dda_ocr_result[consts.ALL_POSITION_KEY] = position_dict 456 '被保险人证件号码': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
483 license_summary.setdefault(classify, []).append(dda_ocr_result) 457 'insured', {}).get('certiCode', {}).get('position', {}))},
484 # 抵押登记豁免函 458 '车架号': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
485 elif classify == consts.HMH_CLASSIFY: 459 'vehicle', {}).get('VIN', {}).get('position', {}))},
486 hmh_ocr_result = {} 460 '机动车损失保险金额': {consts.FIELD_POSITION_KEY: product_result_position[0]},
487 position_dict = {} 461 '机动车第三者责任保险金额': {consts.FIELD_POSITION_KEY: product_result_position[1]},
488 for key, value in license_data.get('words_result', {}).items(): 462 '机动车损失保险绝对免赔率/绝对免赔额': {consts.FIELD_POSITION_KEY: product_result_position[2]},
489 hmh_ocr_result[key] = value.get('words', '') 463 '保险费合计': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
490 location_list = value.get('location', [-1, -1, -1, -1]) 464 'premiumSum', {}).get('position', {}))},
491 if len(location_list) == 4: 465 '保险起始日期': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
466 'startDate', {}).get('position', {}))},
467 '保险截止日期': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
468 'endDate', {}).get('position', {}))},
469 '保单章': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
470 'seal', {}).get('position', {}))},
471 '特别约定第一受益人': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
472 '1stBeneficiary', {}).get('position', {}))},
473 }
474 insurance_ocr_result[consts.ALL_POSITION_KEY] = position_dict
475 license_summary.setdefault(classify, []).append(insurance_ocr_result)
476 # DDA
477 elif classify == consts.DDA_CLASSIFY:
478 pro = ocr_data.get('confidence', 0)
479 if pro < consts.DDA_PRO_MIN:
480 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY))
481 return
482 dda_ocr_result = {}
483 position_dict = {}
484 for key, value in license_data.get('result', {}).items():
485 dda_ocr_result[key] = value.get('words', '')
492 position_dict[key] = { 486 position_dict[key] = {
493 consts.FIELD_POSITION_KEY: { 487 consts.FIELD_POSITION_KEY: value.get('position', {})
494 'top': location_list[1],
495 'left': location_list[0],
496 'height': location_list[-1] - location_list[1],
497 'width': location_list[2] - location_list[0]
498 }
499 } 488 }
500 hmh_ocr_result[consts.IMG_PATH_KEY] = img_path 489 dda_ocr_result[consts.DDA_IMG_PATH] = img_path
501 hmh_ocr_result[consts.SECTION_IMG_PATH_KEY] = section_img_path 490 dda_ocr_result[consts.DDA_PRO] = pro
502 hmh_ocr_result[consts.ALL_POSITION_KEY] = position_dict 491 dda_ocr_result[consts.IMG_PATH_KEY] = img_path
503 license_summary.setdefault(classify, []).append(hmh_ocr_result) 492 dda_ocr_result[consts.SECTION_IMG_PATH_KEY] = section_img_path
504 # 二手车交易凭证 493 dda_ocr_result[consts.ALL_POSITION_KEY] = position_dict
505 elif classify == consts.JYPZ_CLASSIFY: 494 license_summary.setdefault(classify, []).append(dda_ocr_result)
506 jypz_ocr_result = {} 495 # 抵押登记豁免函
507 position_dict = {} 496 elif classify == consts.HMH_CLASSIFY:
508 for key, value in license_data.get('result', {}).items(): 497 hmh_ocr_result = {}
509 jypz_ocr_result[key] = value.get('words', '') 498 position_dict = {}
510 position_dict[key] = { 499 for key, value in license_data.get('words_result', {}).items():
511 consts.FIELD_POSITION_KEY: value.get('position', {}) 500 hmh_ocr_result[key] = value.get('words', '')
512 } 501 location_list = value.get('location', [-1, -1, -1, -1])
513 jypz_ocr_result[consts.IMG_PATH_KEY] = img_path 502 if len(location_list) == 4:
514 jypz_ocr_result[consts.SECTION_IMG_PATH_KEY] = section_img_path 503 position_dict[key] = {
515 jypz_ocr_result[consts.ALL_POSITION_KEY] = position_dict 504 consts.FIELD_POSITION_KEY: {
516 license_summary.setdefault(classify, []).append(jypz_ocr_result) 505 'top': location_list[1],
517 # 车辆登记证 3/4页结果整合 506 'left': location_list[0],
518 elif classify == consts.MVC_CLASSIFY: 507 'height': location_list[-1] - location_list[1],
519 rebuild_data_dict = {} 508 'width': location_list[2] - location_list[0]
520 position_dict = {} 509 }
521 mvc_page = license_data.pop('page', 'VehicleRCI')
522 mvc_res = license_data.pop('results', {})
523 if mvc_page == 'VehicleRegArea':
524 rebuild_data_dict['机动车登记证书编号'] = mvc_res.get('机动车登记证书编号', {}).get('words', '')
525 code_position_list = mvc_res.get('机动车登记证书编号', {}).get('position', [0, 0, 0, 0])
526 if len(code_position_list) == 4:
527 position_dict['机动车登记证书编号'] = {
528 consts.FIELD_POSITION_KEY: {
529 'top': code_position_list[1],
530 'left': code_position_list[0],
531 'height': code_position_list[-1],
532 'width': code_position_list[2],
533 } 510 }
511 hmh_ocr_result[consts.IMG_PATH_KEY] = img_path
512 hmh_ocr_result[consts.SECTION_IMG_PATH_KEY] = section_img_path
513 hmh_ocr_result[consts.ALL_POSITION_KEY] = position_dict
514 license_summary.setdefault(classify, []).append(hmh_ocr_result)
515 # 二手车交易凭证
516 elif classify == consts.JYPZ_CLASSIFY:
517 jypz_ocr_result = {}
518 position_dict = {}
519 for key, value in license_data.get('result', {}).items():
520 jypz_ocr_result[key] = value.get('words', '')
521 position_dict[key] = {
522 consts.FIELD_POSITION_KEY: value.get('position', {})
534 } 523 }
535 for register_info in mvc_res.get('登记信息', []): 524 jypz_ocr_result[consts.IMG_PATH_KEY] = img_path
536 register_info.pop('register_type', None) 525 jypz_ocr_result[consts.SECTION_IMG_PATH_KEY] = section_img_path
537 register_info.pop('register_type_name', None) 526 jypz_ocr_result[consts.ALL_POSITION_KEY] = position_dict
538 for cn_key, detail_dict in register_info.items(): 527 license_summary.setdefault(classify, []).append(jypz_ocr_result)
539 rebuild_data_dict.setdefault(cn_key, []).append( 528 # 车辆登记证 3/4页结果整合
540 detail_dict.get('words', '')) 529 elif classify == consts.MVC_CLASSIFY:
541 tmp_position_list = detail_dict.get('position', [0, 0, 0, 0]) 530 rebuild_data_dict = {}
542 if len(tmp_position_list) == 4: 531 position_dict = {}
543 position_dict.setdefault(cn_key, []).append( 532 mvc_page = license_data.pop('page', 'VehicleRCI')
544 { 533 mvc_res = license_data.pop('results', {})
545 consts.FIELD_POSITION_KEY: { 534 if mvc_page == 'VehicleRegArea':
546 'top': tmp_position_list[1], 535 rebuild_data_dict['机动车登记证书编号'] = mvc_res.get('机动车登记证书编号', {}).get('words', '')
547 'left': tmp_position_list[0], 536 code_position_list = mvc_res.get('机动车登记证书编号', {}).get('position', [0, 0, 0, 0])
548 'height': tmp_position_list[-1], 537 if len(code_position_list) == 4:
549 'width': tmp_position_list[2], 538 position_dict['机动车登记证书编号'] = {
539 consts.FIELD_POSITION_KEY: {
540 'top': code_position_list[1],
541 'left': code_position_list[0],
542 'height': code_position_list[-1],
543 'width': code_position_list[2],
544 }
545 }
546 for register_info in mvc_res.get('登记信息', []):
547 register_info.pop('register_type', None)
548 register_info.pop('register_type_name', None)
549 for cn_key, detail_dict in register_info.items():
550 rebuild_data_dict.setdefault(cn_key, []).append(
551 detail_dict.get('words', ''))
552 tmp_position_list = detail_dict.get('position', [0, 0, 0, 0])
553 if len(tmp_position_list) == 4:
554 position_dict.setdefault(cn_key, []).append(
555 {
556 consts.FIELD_POSITION_KEY: {
557 'top': tmp_position_list[1],
558 'left': tmp_position_list[0],
559 'height': tmp_position_list[-1],
560 'width': tmp_position_list[2],
561 }
550 } 562 }
563 )
564
565 rebuild_data_dict[consts.ALL_POSITION_KEY_2] = position_dict
566 rebuild_data_dict[consts.IMG_PATH_KEY_2] = img_path
567 rebuild_data_dict[consts.SECTION_IMG_PATH_KEY_2] = section_img_path
568 else:
569 for cn_key, detail_dict in mvc_res.items():
570 rebuild_data_dict[cn_key] = detail_dict.get('words', '')
571 position_list = detail_dict.get('position', [0, 0, 0, 0])
572 if len(position_list) == 4:
573 position_dict[cn_key] = {
574 consts.FIELD_POSITION_KEY: {
575 'top': position_list[1],
576 'left': position_list[0],
577 'height': position_list[-1],
578 'width': position_list[2],
551 } 579 }
552 ) 580 }
553 581 rebuild_data_dict[consts.ALL_POSITION_KEY] = position_dict
554 rebuild_data_dict[consts.ALL_POSITION_KEY_2] = position_dict 582 rebuild_data_dict[consts.IMG_PATH_KEY] = img_path
555 rebuild_data_dict[consts.IMG_PATH_KEY_2] = img_path 583 rebuild_data_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path
556 rebuild_data_dict[consts.SECTION_IMG_PATH_KEY_2] = section_img_path 584 del mvc_res
557 else: 585 license_summary.setdefault(classify, []).append(rebuild_data_dict)
558 for cn_key, detail_dict in mvc_res.items(): 586
559 rebuild_data_dict[cn_key] = detail_dict.get('words', '') 587
560 position_list = detail_dict.get('position', [0, 0, 0, 0]) 588 # for mvc_dict in license_data:
561 if len(position_list) == 4: 589 # mvc_dict[consts.IMG_PATH_KEY] = img_path
562 position_dict[cn_key] = { 590 # try:
591 # mvc_page = mvc_dict.pop('page')
592 # except Exception as e:
593 # pass
594 # else:
595 # if mvc_page == 'VehicleRegArea':
596 # mvc_res = mvc_dict.pop('results', {})
597 # mvc_dict['机动车登记证书编号'] = mvc_res.get('register_no', {}).get('words', '')
598 # for register_info in mvc_res.get('register_info', []):
599 # for detail_dict in register_info.get('details', {}).values():
600 # mvc_dict.setdefault(detail_dict.get('chinese_key', '未知'), []).append(
601 # detail_dict.get('words', ''))
602 # del mvc_res
603 # license_summary.setdefault(classify, []).extend(license_data)
604
605 # 身份证真伪
606 elif classify == consts.IC_CLASSIFY:
607 id_card_dict = {}
608 position_dict = {}
609 card_type = license_data.get('type', '')
610 is_ic = card_type.startswith('身份证')
611 is_info_side = card_type.endswith('信息面')
612 id_card_dict['类别'] = '0' if is_ic else '1'
613 if is_ic:
614 field_map = consts.IC_MAP_0 if is_info_side else consts.IC_MAP_1
615 else:
616 field_map = consts.RP_MAP_0 if is_info_side else consts.RP_MAP_1
617 for write_field, search_field in field_map:
618 id_card_dict[write_field] = license_data.get('words_result', {}).get(search_field, {}).get('words', '')
619 location_list = license_data.get('words_result', {}).get(search_field, {}).get(
620 'location', [-1, -1, -1, -1])
621 if len(location_list) == 4:
622 position_dict[write_field] = {
563 consts.FIELD_POSITION_KEY: { 623 consts.FIELD_POSITION_KEY: {
564 'top': position_list[1], 624 'top': location_list[1],
565 'left': position_list[0], 625 'left': location_list[0],
566 'height': position_list[-1], 626 'height': location_list[-1] - location_list[1],
567 'width': position_list[2], 627 'width': location_list[2] - location_list[0]
568 } 628 }
569 } 629 }
570 rebuild_data_dict[consts.ALL_POSITION_KEY] = position_dict 630 if not is_info_side:
571 rebuild_data_dict[consts.IMG_PATH_KEY] = img_path 631 start_time = license_data.get('words_result', {}).get('签发日期', {}).get('words', '')
572 rebuild_data_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path 632 end_time = license_data.get('words_result', {}).get('失效日期', {}).get('words', '')
573 del mvc_res 633 id_card_dict['有效期限'] = '{0}-{1}'.format(start_time, end_time)
574 license_summary.setdefault(classify, []).append(rebuild_data_dict) 634 end_time_location_list = license_data.get('words_result', {}).get('失效日期', {}).get(
575 635 'location', [-1, -1, -1, -1])
576 636 if len(end_time_location_list) == 4:
577 # for mvc_dict in license_data: 637 position_dict['有效期限'] = {
578 # mvc_dict[consts.IMG_PATH_KEY] = img_path 638 consts.FIELD_POSITION_KEY: {
579 # try: 639 'top': end_time_location_list[1],
580 # mvc_page = mvc_dict.pop('page') 640 'left': end_time_location_list[0],
581 # except Exception as e: 641 'height': end_time_location_list[-1] - end_time_location_list[1],
582 # pass 642 'width': end_time_location_list[2] - end_time_location_list[0]
583 # else: 643 }
584 # if mvc_page == 'VehicleRegArea':
585 # mvc_res = mvc_dict.pop('results', {})
586 # mvc_dict['机动车登记证书编号'] = mvc_res.get('register_no', {}).get('words', '')
587 # for register_info in mvc_res.get('register_info', []):
588 # for detail_dict in register_info.get('details', {}).values():
589 # mvc_dict.setdefault(detail_dict.get('chinese_key', '未知'), []).append(
590 # detail_dict.get('words', ''))
591 # del mvc_res
592 # license_summary.setdefault(classify, []).extend(license_data)
593
594 # 身份证真伪
595 elif classify == consts.IC_CLASSIFY:
596 id_card_dict = {}
597 position_dict = {}
598 card_type = license_data.get('type', '')
599 is_ic = card_type.startswith('身份证')
600 is_info_side = card_type.endswith('信息面')
601 id_card_dict['类别'] = '0' if is_ic else '1'
602 if is_ic:
603 field_map = consts.IC_MAP_0 if is_info_side else consts.IC_MAP_1
604 else:
605 field_map = consts.RP_MAP_0 if is_info_side else consts.RP_MAP_1
606 for write_field, search_field in field_map:
607 id_card_dict[write_field] = license_data.get('words_result', {}).get(search_field, {}).get('words', '')
608 location_list = license_data.get('words_result', {}).get(search_field, {}).get(
609 'location', [-1, -1, -1, -1])
610 if len(location_list) == 4:
611 position_dict[write_field] = {
612 consts.FIELD_POSITION_KEY: {
613 'top': location_list[1],
614 'left': location_list[0],
615 'height': location_list[-1] - location_list[1],
616 'width': location_list[2] - location_list[0]
617 }
618 }
619 if not is_info_side:
620 start_time = license_data.get('words_result', {}).get('签发日期', {}).get('words', '')
621 end_time = license_data.get('words_result', {}).get('失效日期', {}).get('words', '')
622 id_card_dict['有效期限'] = '{0}-{1}'.format(start_time, end_time)
623 end_time_location_list = license_data.get('words_result', {}).get('失效日期', {}).get(
624 'location', [-1, -1, -1, -1])
625 if len(end_time_location_list) == 4:
626 position_dict['有效期限'] = {
627 consts.FIELD_POSITION_KEY: {
628 'top': end_time_location_list[1],
629 'left': end_time_location_list[0],
630 'height': end_time_location_list[-1] - end_time_location_list[1],
631 'width': end_time_location_list[2] - end_time_location_list[0]
632 } 644 }
633 }
634 645
635 646
636 if not is_info_side: 647 if not is_info_side:
637 id_card_dict[consts.IMG_PATH_KEY_2] = img_path 648 id_card_dict[consts.IMG_PATH_KEY_2] = img_path
638 id_card_dict[consts.ALL_POSITION_KEY_2] = position_dict 649 id_card_dict[consts.ALL_POSITION_KEY_2] = position_dict
639 id_card_dict[consts.SECTION_IMG_PATH_KEY_2] = section_img_path 650 id_card_dict[consts.SECTION_IMG_PATH_KEY_2] = section_img_path
640 651
641 else:
642 id_card_dict[consts.ALL_POSITION_KEY] = position_dict
643 id_card_dict[consts.IMG_PATH_KEY] = img_path
644 id_card_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path
645
646 if is_ic and is_save:
647 card_type = -1
648 json_data_4 = {
649 'mode': 1,
650 'user_info': {
651 'image_content': base64_img,
652 },
653 'options': {
654 'distinguish_type': 1,
655 'auto_rotate': True,
656 },
657 }
658 for times in range(consts.RETRY_TIMES):
659 try:
660 start_time = time.time()
661 ocr_4_response = requests.post(self.ocr_url_4, json=json_data_4)
662 if ocr_4_response.status_code != 200:
663 raise OCR4Exception('ocr_4 status code: {0}'.format(ocr_4_response.status_code))
664 except Exception as e:
665 self.online_log.warn(
666 '{0} [ocr_4 failed] [times={1}] [img_path={2}] [error={3}]'.format(
667 self.log_base, times, img_path, traceback.format_exc()))
668 else:
669 ocr_4_res = ocr_4_response.json()
670 end_time = time.time()
671 speed_time = int(end_time - start_time)
672
673 if ocr_4_res.get('code') == 0 and ocr_4_res.get('result', {}).get('rtn') == 0:
674 card_type = ocr_4_res.get('result', {}).get(
675 'idcard_distinguish_result', {}).get('result', -1)
676
677 self.online_log.info(
678 '{0} [ocr_4 success] [img_path={1}] [speed_time={2}]'.format(
679 self.log_base, img_path, speed_time))
680 break
681 else: 652 else:
682 self.online_log.warn( 653 id_card_dict[consts.ALL_POSITION_KEY] = position_dict
683 '{0} [ocr_4 failed] [img_path={1}]'.format(self.log_base, img_path)) 654 id_card_dict[consts.IMG_PATH_KEY] = img_path
684 655 id_card_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path
685 id_card_dict[consts.IC_TURE_OR_FALSE] = consts.IC_RES_MAPPING.get(card_type) 656
686 657 if is_ic and is_save:
687 if do_dda and isinstance(id_card_dict.get(consts.IC_KEY_FIELD[0]), str) and \ 658 card_type = -1
688 isinstance(id_card_dict.get(consts.IC_KEY_FIELD[1]), str): 659 json_data_4 = {
689 ic_name = id_card_dict.get(consts.IC_KEY_FIELD[0], '').strip() 660 'mode': 1,
690 ic_id = id_card_dict.get(consts.IC_KEY_FIELD[1], '').strip() 661 'user_info': {
691 if len(ic_name) > 0 and len(ic_id) > 0: 662 'image_content': base64_img,
692 dda_id_bc_mapping.setdefault(consts.IC_FIELD, []).append((ic_name, ic_id, img_path)) 663 },
693 license_summary.setdefault(classify, []).append(id_card_dict) 664 'options': {
694 # 购车发票 & 二手车发票 665 'distinguish_type': 1,
695 elif classify == consts.MVI_CLASSIFY or classify == consts.UCI_CLASSIFY: 666 'auto_rotate': True,
696 rebuild_data_dict = {} 667 },
697 position_dict = {} 668 }
698 mvi_res = license_data.pop('result', {}) 669 for times in range(consts.RETRY_TIMES):
699 for en_key, detail_dict in mvi_res.items():
700 rebuild_data_dict[detail_dict.get('chinese_key', '')] = detail_dict.get('words', '')
701 position_dict[detail_dict.get('chinese_key', '')] = {
702 consts.FIELD_POSITION_KEY: detail_dict.get('position', {})
703 }
704 rebuild_data_dict['新旧版式'] = license_data.get('layout', '')
705 rebuild_data_dict[consts.IMG_PATH_KEY] = img_path
706 rebuild_data_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path
707 rebuild_data_dict[consts.ALL_POSITION_KEY] = position_dict
708 license_summary.setdefault(classify, []).append(rebuild_data_dict)
709 # 其他
710 else:
711 for res_dict in license_data:
712 res_dict[consts.IMG_PATH_KEY] = img_path
713 res_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path
714 license_summary.setdefault(classify, []).extend(license_data)
715 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS))
716
717 def license2_process(self, ocr_res_2, license_summary, pid, classify, res_list, pno, ino, part_idx, img_path, do_dda, dda_id_bc_mapping, file_data):
718 if ocr_res_2.get('ErrorCode') in consts.SUCCESS_CODE_SET:
719 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS))
720 if pid == consts.BC_PID:
721 # 银行卡
722 # res_dict = {}
723 # for en_key, chn_key in consts.BC_FIELD:
724 # res_dict[chn_key] = ocr_res_2.get(en_key, '')
725 ocr_res_2[consts.IMG_PATH_KEY] = img_path
726 license_summary.setdefault(classify, []).append(ocr_res_2)
727 if do_dda and isinstance(ocr_res_2.get(consts.BC_KEY_FIELD), str):
728 bc_no = ocr_res_2[consts.BC_KEY_FIELD].strip()
729 if len(bc_no) > 0:
730 dda_id_bc_mapping.setdefault(consts.BC_FIELD, []).append((bc_no, img_path))
731 else:
732 # 营业执照等
733 pre, suf = os.path.splitext(img_path)
734 src_section_img_path = img_path if file_data is None else '{0}_{1}{2}'.format(pre, part_idx, suf)
735
736 is_save = False
737 for res_idx, result_dict in enumerate(ocr_res_2.get('ResultList', [])):
738 image_data = result_dict.get('image_data', '')
739 if len(image_data) > 0:
740 position = {}
741 angle = 0
742 section_img_path = '{0}_{1}_{2}{3}'.format(pre, part_idx, res_idx, suf)
743 try: 670 try:
744 with open(section_img_path, "wb") as fh: 671 start_time = time.time()
745 fh.write(base64.b64decode(image_data.encode())) 672 ocr_4_response = requests.post(self.ocr_url_4, json=json_data_4)
673 if ocr_4_response.status_code != 200:
674 raise OCR4Exception('ocr_4 status code: {0}'.format(ocr_4_response.status_code))
746 except Exception as e: 675 except Exception as e:
747 self.online_log.warn( 676 self.online_log.warn(
748 '{0} [section img save failed] [img_path={1}]' 677 '{0} [ocr_4 failed] [times={1}] [img_path={2}] [error={3}]'.format(
749 ' [part_idx={2}] [res_idx={3}]'.format(self.log_base, img_path, part_idx, res_idx)) 678 self.log_base, times, img_path, traceback.format_exc()))
679 else:
680 ocr_4_res = ocr_4_response.json()
681 end_time = time.time()
682 speed_time = int(end_time - start_time)
683
684 if ocr_4_res.get('code') == 0 and ocr_4_res.get('result', {}).get('rtn') == 0:
685 card_type = ocr_4_res.get('result', {}).get(
686 'idcard_distinguish_result', {}).get('result', -1)
687
688 self.online_log.info(
689 '{0} [ocr_4 success] [img_path={1}] [speed_time={2}]'.format(
690 self.log_base, img_path, speed_time))
691 break
750 else: 692 else:
751 is_save = True 693 self.online_log.warn(
752 section_img_path = src_section_img_path 694 '{0} [ocr_4 failed] [img_path={1}]'.format(self.log_base, img_path))
753 position = result_dict.get('position', {}) 695
754 angle = result_dict.get('angle', 0) 696 id_card_dict[consts.IC_TURE_OR_FALSE] = consts.IC_RES_MAPPING.get(card_type)
755 res_dict = {} 697
756 position_dict = {} 698 if do_dda and isinstance(id_card_dict.get(consts.IC_KEY_FIELD[0]), str) and \
757 for field_dict in result_dict.get('FieldList', []): 699 isinstance(id_card_dict.get(consts.IC_KEY_FIELD[1]), str):
758 res_dict[field_dict.get('chn_key', '')] = field_dict.get('value', '') 700 ic_name = id_card_dict.get(consts.IC_KEY_FIELD[0], '').strip()
759 position_dict[field_dict.get('chn_key', '')] = { 701 ic_id = id_card_dict.get(consts.IC_KEY_FIELD[1], '').strip()
760 consts.FIELD_POSITION_KEY: field_dict.get('position', {}), 702 if len(ic_name) > 0 and len(ic_id) > 0:
761 consts.FIELD_QUAD_KEY: field_dict.get('quad', []), 703 dda_id_bc_mapping.setdefault(consts.IC_FIELD, []).append((ic_name, ic_id, img_path))
762 } 704 license_summary.setdefault(classify, []).append(id_card_dict)
763 position_dict[consts.POSITION_KEY] = position 705 # 购车发票 & 二手车发票
764 position_dict[consts.ANGLE_KEY] = angle 706 elif classify == consts.MVI_CLASSIFY or classify == consts.UCI_CLASSIFY:
707 rebuild_data_dict = {}
708 position_dict = {}
709 mvi_res = license_data.pop('result', {})
710 for en_key, detail_dict in mvi_res.items():
711 rebuild_data_dict[detail_dict.get('chinese_key', '')] = detail_dict.get('words', '')
712 position_dict[detail_dict.get('chinese_key', '')] = {
713 consts.FIELD_POSITION_KEY: detail_dict.get('position', {})
714 }
715 rebuild_data_dict['新旧版式'] = license_data.get('layout', '')
716 rebuild_data_dict[consts.IMG_PATH_KEY] = img_path
717 rebuild_data_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path
718 rebuild_data_dict[consts.ALL_POSITION_KEY] = position_dict
719 license_summary.setdefault(classify, []).append(rebuild_data_dict)
720 # 其他
721 else:
722 for res_dict in license_data:
765 res_dict[consts.IMG_PATH_KEY] = img_path 723 res_dict[consts.IMG_PATH_KEY] = img_path
766 res_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path 724 res_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path
767 res_dict[consts.ALL_POSITION_KEY] = position_dict 725 license_summary.setdefault(classify, []).extend(license_data)
768 license_summary.setdefault(classify, []).append(res_dict) 726 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS))
727 except Exception as e:
728 res_list.append((pno, ino, part_idx, consts.RES_FAILED))
729 self.online_log.error('{0} [license1_process error] [error={1}]'.format(self.log_base, traceback.format_exc()))
769 730
770 if is_save and file_data is not None: 731 def license2_process(self, ocr_res_2, license_summary, pid, classify, res_list, pno, ino, part_idx, img_path, do_dda, dda_id_bc_mapping, file_data):
771 try: 732 # 添加 try-except 处理
772 with open(src_section_img_path, "wb") as fh: 733 try:
773 fh.write(base64.b64decode(file_data.encode())) 734 if ocr_res_2.get('ErrorCode') in consts.SUCCESS_CODE_SET:
774 except Exception as e: 735 res_list.append((pno, ino, part_idx, consts.RES_SUCCESS))
775 self.online_log.warn( 736 if pid == consts.BC_PID:
776 '{0} [section img save failed] [img_path={1}]' 737 # 银行卡
777 ' [part_idx={2}]'.format(self.log_base, img_path, part_idx)) 738 # res_dict = {}
778 else: 739 # for en_key, chn_key in consts.BC_FIELD:
740 # res_dict[chn_key] = ocr_res_2.get(en_key, '')
741 ocr_res_2[consts.IMG_PATH_KEY] = img_path
742 license_summary.setdefault(classify, []).append(ocr_res_2)
743 if do_dda and isinstance(ocr_res_2.get(consts.BC_KEY_FIELD), str):
744 bc_no = ocr_res_2[consts.BC_KEY_FIELD].strip()
745 if len(bc_no) > 0:
746 dda_id_bc_mapping.setdefault(consts.BC_FIELD, []).append((bc_no, img_path))
747 else:
748 # 营业执照等
749 pre, suf = os.path.splitext(img_path)
750 src_section_img_path = img_path if file_data is None else '{0}_{1}{2}'.format(pre, part_idx, suf)
751
752 is_save = False
753 for res_idx, result_dict in enumerate(ocr_res_2.get('ResultList', [])):
754 image_data = result_dict.get('image_data', '')
755 if len(image_data) > 0:
756 position = {}
757 angle = 0
758 section_img_path = '{0}_{1}_{2}{3}'.format(pre, part_idx, res_idx, suf)
759 try:
760 with open(section_img_path, "wb") as fh:
761 fh.write(base64.b64decode(image_data.encode()))
762 except Exception as e:
763 self.online_log.warn(
764 '{0} [section img save failed] [img_path={1}]'
765 ' [part_idx={2}] [res_idx={3}]'.format(self.log_base, img_path, part_idx, res_idx))
766 else:
767 is_save = True
768 section_img_path = src_section_img_path
769 position = result_dict.get('position', {})
770 angle = result_dict.get('angle', 0)
771 res_dict = {}
772 position_dict = {}
773 for field_dict in result_dict.get('FieldList', []):
774 res_dict[field_dict.get('chn_key', '')] = field_dict.get('value', '')
775 position_dict[field_dict.get('chn_key', '')] = {
776 consts.FIELD_POSITION_KEY: field_dict.get('position', {}),
777 consts.FIELD_QUAD_KEY: field_dict.get('quad', []),
778 }
779 position_dict[consts.POSITION_KEY] = position
780 position_dict[consts.ANGLE_KEY] = angle
781 res_dict[consts.IMG_PATH_KEY] = img_path
782 res_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path
783 res_dict[consts.ALL_POSITION_KEY] = position_dict
784 license_summary.setdefault(classify, []).append(res_dict)
785
786 if is_save and file_data is not None:
787 try:
788 with open(src_section_img_path, "wb") as fh:
789 fh.write(base64.b64decode(file_data.encode()))
790 except Exception as e:
791 self.online_log.warn(
792 '{0} [section img save failed] [img_path={1}]'
793 ' [part_idx={2}]'.format(self.log_base, img_path, part_idx))
794 else:
795 res_list.append((pno, ino, part_idx, consts.RES_FAILED_2))
796 except Exception as e:
779 res_list.append((pno, ino, part_idx, consts.RES_FAILED_2)) 797 res_list.append((pno, ino, part_idx, consts.RES_FAILED_2))
798 self.online_log.error('{0} [license2_process error] [error={1}]'.format(self.log_base, traceback.format_exc()))
780 799
781 @staticmethod 800 @staticmethod
782 def license_rebuild(license_summary): 801 def license_rebuild(license_summary):
783 ic_merge = False 802 # 添加 try-except 处理
784 rp_merge = False 803 try:
804 ic_merge = False
805 rp_merge = False
785 806
786 for classify in (consts.IC_CLASSIFY, consts.MVI_CLASSIFY, consts.MVC_CLASSIFY): 807 for classify in (consts.IC_CLASSIFY, consts.MVI_CLASSIFY, consts.MVC_CLASSIFY):
787 808
788 license_list = license_summary.get(classify) 809 license_list = license_summary.get(classify)
789 810
790 if not license_list: 811 if not license_list:
791 continue 812 continue
792 813
793 if classify == consts.IC_CLASSIFY: # 身份证、居住证分开,先正面,后反面 814 if classify == consts.IC_CLASSIFY: # 身份证、居住证分开,先正面,后反面
794 key, _, _ = consts.FIELD_ORDER_MAP.get(classify) 815 key, _, _ = consts.FIELD_ORDER_MAP.get(classify)
795 ic_side1_list = [] 816 ic_side1_list = []
796 ic_side2_list = [] 817 ic_side2_list = []
797 rp_side1_list = [] 818 rp_side1_list = []
798 rp_side2_list = [] 819 rp_side2_list = []
799 for license_dict in license_list: 820 for license_dict in license_list:
800 is_rp = license_dict.pop('类别', '0') 821 is_rp = license_dict.pop('类别', '0')
801 if key in license_dict: 822 if key in license_dict:
802 if is_rp == '1': 823 if is_rp == '1':
803 rp_side2_list.append(license_dict) 824 rp_side2_list.append(license_dict)
825 else:
826 ic_side2_list.append(license_dict)
827 elif is_rp == '1':
828 rp_side1_list.append(license_dict)
804 else: 829 else:
805 ic_side2_list.append(license_dict) 830 ic_side1_list.append(license_dict)
806 elif is_rp == '1':
807 rp_side1_list.append(license_dict)
808 else:
809 ic_side1_list.append(license_dict)
810 831
811 ic_merge = len(ic_side1_list) == len(ic_side2_list) == 1 832 ic_merge = len(ic_side1_list) == len(ic_side2_list) == 1
812 rp_merge = len(rp_side1_list) == len(rp_side2_list) == 1 833 rp_merge = len(rp_side1_list) == len(rp_side2_list) == 1
813 834
814 ic_side1_list.extend(ic_side2_list) 835 ic_side1_list.extend(ic_side2_list)
815 rp_side1_list.extend(rp_side2_list) 836 rp_side1_list.extend(rp_side2_list)
816 837
817 if ic_side1_list: 838 if ic_side1_list:
818 # license_list = ic_side1_list 839 # license_list = ic_side1_list
819 license_summary[classify] = ic_side1_list 840 license_summary[classify] = ic_side1_list
820 else: 841 else:
821 license_summary.pop(classify, None) 842 license_summary.pop(classify, None)
822 843
823 if rp_side1_list: 844 if rp_side1_list:
824 license_summary[consts.RP_CLASSIFY] = rp_side1_list 845 license_summary[consts.RP_CLASSIFY] = rp_side1_list
825 846
826 ic_side1_list = ic_side2_list = rp_side1_list = rp_side2_list = None 847 ic_side1_list = ic_side2_list = rp_side1_list = rp_side2_list = None
827 848
828 if classify == consts.MVI_CLASSIFY: # 机动车销售统一发票, 增加不含税价(逻辑计算) 849 if classify == consts.MVI_CLASSIFY: # 机动车销售统一发票, 增加不含税价(逻辑计算)
829 for license_dict in license_list: 850 for license_dict in license_list:
830 price = '' 851 price = ''
831 rate_str = license_dict.get('增值税税率') 852 rate_str = license_dict.get('增值税税率')
832 price_total_str = license_dict.get('价税合计小写') 853 price_total_str = license_dict.get('价税合计小写')
833 if rate_str is not None and price_total_str is not None: 854 if rate_str is not None and price_total_str is not None:
834 try: 855 try:
835 rate = int(rate_str.rstrip('%')) 856 rate = int(rate_str.rstrip('%'))
836 price_total = float(price_total_str) 857 price_total = float(price_total_str)
837 except Exception as e: 858 except Exception as e:
838 pass 859 pass
860 else:
861 price = round(price_total * 100 / (rate + 100), 2)
862 license_dict['不含税价(逻辑计算)'] = price
863
864 if classify == consts.MVC_CLASSIFY: # 机动车登记证先1/2页,后3/4页
865 key, _, _ = consts.FIELD_ORDER_MAP.get(classify)
866 page_1_2 = []
867 page_3_4 = []
868 for license_dict in license_list:
869 if key in license_dict:
870 page_3_4.append(license_dict)
839 else: 871 else:
840 price = round(price_total * 100 / (rate + 100), 2) 872 page_1_2.append(license_dict)
841 license_dict['不含税价(逻辑计算)'] = price 873 page_1_2.extend(page_3_4)
842 874 license_summary[classify] = page_1_2
843 if classify == consts.MVC_CLASSIFY: # 机动车登记证先1/2页,后3/4页 875 page_1_2 = page_3_4 = None
844 key, _, _ = consts.FIELD_ORDER_MAP.get(classify)
845 page_1_2 = []
846 page_3_4 = []
847 for license_dict in license_list:
848 if key in license_dict:
849 page_3_4.append(license_dict)
850 else:
851 page_1_2.append(license_dict)
852 page_1_2.extend(page_3_4)
853 license_summary[classify] = page_1_2
854 page_1_2 = page_3_4 = None
855 876
856 return ic_merge, rp_merge 877 return ic_merge, rp_merge
878 except Exception as e:
879 print("license_rebuild error")
880 print(traceback.format_exc())
881 return False, False
857 882
858 def parse_img_path(self, img_path): 883 def parse_img_path(self, img_path):
859 img_name, _ = os.path.splitext(os.path.basename(img_path)) 884 # 添加 try-except 处理
860 part_list = img_name.split('_') 885 try:
861 # page_7_img_11_0 886 img_name, _ = os.path.splitext(os.path.basename(img_path))
862 return int(part_list[1])+1, int(part_list[3])+1 887 part_list = img_name.split('_')
888 # page_7_img_11_0
889 return int(part_list[1])+1, int(part_list[3])+1
890 except Exception as e:
891 self.online_log.error('{0} [parse_img_path error] [error={1}]'.format(self.log_base, traceback.format_exc()))
892 return 0, 0
863 893
864 def get_most(self, value_list): 894 def get_most(self, value_list):
865 if value_list: 895 # 添加 try-except 处理
866 most_common = Counter(value_list).most_common(1) 896 try:
867 return most_common[0][0] if most_common else None 897 if value_list:
898 most_common = Counter(value_list).most_common(1)
899 return most_common[0][0] if most_common else None
900 except Exception as e:
901 self.online_log.error('{0} [get_most error] [error={1}]'.format(self.log_base, traceback.format_exc()))
902 return None
868 903
869 def date_format(self, date_str, format_str): 904 def date_format(self, date_str, format_str):
870 try: 905 try:
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!