add position
Showing
1 changed file
with
168 additions
and
55 deletions
... | @@ -298,14 +298,26 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -298,14 +298,26 @@ class Command(BaseCommand, LoggerMixin): |
298 | # rebuild_data_dict[detail_dict.get('chinese_key', '')] = detail_dict.get('words', '') | 298 | # rebuild_data_dict[detail_dict.get('chinese_key', '')] = detail_dict.get('words', '') |
299 | # return [rebuild_data_dict] | 299 | # return [rebuild_data_dict] |
300 | 300 | ||
301 | 301 | def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path, do_dda, | |
302 | 302 | dda_id_bc_mapping): | |
303 | def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path, do_dda, dda_id_bc_mapping): | ||
304 | # 类别:'0'身份证, '1'居住证 | 303 | # 类别:'0'身份证, '1'居住证 |
305 | license_data = ocr_data.get('data') | 304 | license_data = ocr_data.get('data') |
306 | if not license_data: | 305 | if not license_data: |
307 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) | 306 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) |
308 | return | 307 | return |
308 | pre, suf = os.path.splitext(img_path) | ||
309 | base64_img = license_data.pop('base64_img', '') | ||
310 | is_save = True if len(base64_img) > 0 else False | ||
311 | section_img_path = '{0}_{1}{2}'.format(pre, part_idx, suf) if is_save else img_path | ||
312 | if is_save: | ||
313 | try: | ||
314 | with open(section_img_path, "wb") as fh: | ||
315 | fh.write(base64.b64decode(base64_img.encode())) | ||
316 | except Exception as e: | ||
317 | self.online_log.warn( | ||
318 | '{0} [section img save failed] [img_path={1}]' | ||
319 | ' [part_idx={2}]'.format(self.log_base, img_path, part_idx)) | ||
320 | |||
309 | # 保单 | 321 | # 保单 |
310 | if classify == consts.INSURANCE_CLASSIFY: | 322 | if classify == consts.INSURANCE_CLASSIFY: |
311 | product_result = ['', '', ''] | 323 | product_result = ['', '', ''] |
... | @@ -333,7 +345,13 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -333,7 +345,13 @@ class Command(BaseCommand, LoggerMixin): |
333 | '保险截止日期': license_data.get('result', {}).get('endDate', {}).get('words', ''), | 345 | '保险截止日期': license_data.get('result', {}).get('endDate', {}).get('words', ''), |
334 | '保单章': license_data.get('result', {}).get('seal', {}).get('words', ''), | 346 | '保单章': license_data.get('result', {}).get('seal', {}).get('words', ''), |
335 | '特别约定第一受益人': special, | 347 | '特别约定第一受益人': special, |
348 | consts.IMG_PATH_KEY: img_path, | ||
349 | consts.SECTION_IMG_PATH_KEY: section_img_path, | ||
336 | } | 350 | } |
351 | # position_dict = { | ||
352 | # '': {consts.FIELD_POSITION_KEY: {}} | ||
353 | # } | ||
354 | # insurance_ocr_result[consts.ALL_POSITION_KEY] = position_dict | ||
337 | license_summary.setdefault(classify, []).append(insurance_ocr_result) | 355 | license_summary.setdefault(classify, []).append(insurance_ocr_result) |
338 | # DDA | 356 | # DDA |
339 | elif classify == consts.DDA_CLASSIFY: | 357 | elif classify == consts.DDA_CLASSIFY: |
... | @@ -341,37 +359,103 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -341,37 +359,103 @@ class Command(BaseCommand, LoggerMixin): |
341 | if pro < consts.DDA_PRO_MIN: | 359 | if pro < consts.DDA_PRO_MIN: |
342 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) | 360 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) |
343 | return | 361 | return |
344 | dda_ocr_result = {key: value.get('words', '') for key, value in license_data.get('result', {}).items()} | 362 | dda_ocr_result = {} |
363 | position_dict = {} | ||
364 | for key, value in license_data.get('result', {}).items(): | ||
365 | dda_ocr_result[key] = value.get('words', '') | ||
366 | position_dict[key] = { | ||
367 | consts.FIELD_POSITION_KEY: value.get('position', {}) | ||
368 | } | ||
345 | dda_ocr_result[consts.DDA_IMG_PATH] = img_path | 369 | dda_ocr_result[consts.DDA_IMG_PATH] = img_path |
346 | dda_ocr_result[consts.DDA_PRO] = pro | 370 | dda_ocr_result[consts.DDA_PRO] = pro |
347 | dda_ocr_result[consts.IMG_PATH_KEY] = img_path | 371 | dda_ocr_result[consts.IMG_PATH_KEY] = img_path |
372 | dda_ocr_result[consts.SECTION_IMG_PATH_KEY] = section_img_path | ||
373 | dda_ocr_result[consts.ALL_POSITION_KEY] = position_dict | ||
348 | license_summary.setdefault(classify, []).append(dda_ocr_result) | 374 | license_summary.setdefault(classify, []).append(dda_ocr_result) |
349 | # 抵押登记豁免函 | 375 | # 抵押登记豁免函 |
350 | elif classify == consts.HMH_CLASSIFY: | 376 | elif classify == consts.HMH_CLASSIFY: |
351 | hmh_ocr_result = {key: value.get('words', '') for key, value in license_data.get('words_result', {}).items()} | 377 | hmh_ocr_result = {} |
378 | position_dict = {} | ||
379 | for key, value in license_data.get('words_result', {}).items(): | ||
380 | hmh_ocr_result[key] = value.get('words', '') | ||
381 | location_list = value.get('location', [-1, -1, -1, -1]) | ||
382 | if len(location_list) == 4: | ||
383 | position_dict[key] = { | ||
384 | consts.FIELD_POSITION_KEY: { | ||
385 | 'top': location_list[1], | ||
386 | 'left': location_list[0], | ||
387 | 'height': location_list[-1] - location_list[1], | ||
388 | 'width': location_list[2] - location_list[0] | ||
389 | } | ||
390 | } | ||
391 | hmh_ocr_result[consts.IMG_PATH_KEY] = img_path | ||
392 | hmh_ocr_result[consts.SECTION_IMG_PATH_KEY] = section_img_path | ||
393 | hmh_ocr_result[consts.ALL_POSITION_KEY] = position_dict | ||
352 | license_summary.setdefault(classify, []).append(hmh_ocr_result) | 394 | license_summary.setdefault(classify, []).append(hmh_ocr_result) |
353 | # 二手车交易凭证 | 395 | # 二手车交易凭证 |
354 | elif classify == consts.JYPZ_CLASSIFY: | 396 | elif classify == consts.JYPZ_CLASSIFY: |
355 | jypz_ocr_result = {key: value.get('words', '') for key, value in license_data.get('result', {}).items()} | 397 | jypz_ocr_result = {} |
398 | position_dict = {} | ||
399 | for key, value in license_data.get('result', {}).items(): | ||
400 | jypz_ocr_result[key] = value.get('words', '') | ||
401 | position_dict[key] = { | ||
402 | consts.FIELD_POSITION_KEY: value.get('position', {}) | ||
403 | } | ||
404 | jypz_ocr_result[consts.IMG_PATH_KEY] = img_path | ||
405 | jypz_ocr_result[consts.SECTION_IMG_PATH_KEY] = section_img_path | ||
406 | jypz_ocr_result[consts.ALL_POSITION_KEY] = position_dict | ||
356 | license_summary.setdefault(classify, []).append(jypz_ocr_result) | 407 | license_summary.setdefault(classify, []).append(jypz_ocr_result) |
357 | # 车辆登记证 3/4页结果整合 | 408 | # 车辆登记证 3/4页结果整合 |
358 | elif classify == consts.MVC_CLASSIFY: | 409 | elif classify == consts.MVC_CLASSIFY: |
359 | rebuild_data_dict = {} | 410 | rebuild_data_dict = {} |
411 | position_dict = {} | ||
360 | rebuild_data_dict[consts.IMG_PATH_KEY] = img_path | 412 | rebuild_data_dict[consts.IMG_PATH_KEY] = img_path |
413 | rebuild_data_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path | ||
361 | mvc_page = license_data.pop('page', 'VehicleRCI') | 414 | mvc_page = license_data.pop('page', 'VehicleRCI') |
362 | mvc_res = license_data.pop('results', {}) | 415 | mvc_res = license_data.pop('results', {}) |
363 | if mvc_page == 'VehicleRegArea': | 416 | if mvc_page == 'VehicleRegArea': |
364 | rebuild_data_dict['机动车登记证书编号'] = mvc_res.get('机动车登记证书编号', {}).get('words', '') | 417 | rebuild_data_dict['机动车登记证书编号'] = mvc_res.get('机动车登记证书编号', {}).get('words', '') |
418 | code_position_list = mvc_res.get('机动车登记证书编号', {}).get('position', [0, 0, 0, 0]) | ||
419 | if len(code_position_list) == 4: | ||
420 | position_dict['机动车登记证书编号'] = { | ||
421 | consts.FIELD_POSITION_KEY: { | ||
422 | 'top': code_position_list[1], | ||
423 | 'left': code_position_list[0], | ||
424 | 'height': code_position_list[-1], | ||
425 | 'width': code_position_list[2], | ||
426 | } | ||
427 | } | ||
365 | for register_info in mvc_res.get('登记信息', []): | 428 | for register_info in mvc_res.get('登记信息', []): |
366 | register_info.pop('register_type', None) | 429 | register_info.pop('register_type', None) |
367 | register_info.pop('register_type_name', None) | 430 | register_info.pop('register_type_name', None) |
368 | for cn_key, detail_dict in register_info.items(): | 431 | for cn_key, detail_dict in register_info.items(): |
369 | rebuild_data_dict.setdefault(cn_key, []).append( | 432 | rebuild_data_dict.setdefault(cn_key, []).append( |
370 | detail_dict.get('words', '')) | 433 | detail_dict.get('words', '')) |
434 | tmp_position_list = detail_dict.get('position', [0, 0, 0, 0]) | ||
435 | if len(tmp_position_list) == 4: | ||
436 | position_dict[cn_key] = { | ||
437 | consts.FIELD_POSITION_KEY: { | ||
438 | 'top': tmp_position_list[1], | ||
439 | 'left': tmp_position_list[0], | ||
440 | 'height': tmp_position_list[-1], | ||
441 | 'width': tmp_position_list[2], | ||
442 | } | ||
443 | } | ||
371 | else: | 444 | else: |
372 | for cn_key, detail_dict in mvc_res.items(): | 445 | for cn_key, detail_dict in mvc_res.items(): |
373 | rebuild_data_dict[cn_key] = detail_dict.get('words', '') | 446 | rebuild_data_dict[cn_key] = detail_dict.get('words', '') |
447 | position_list = detail_dict.get('position', [0, 0, 0, 0]) | ||
448 | if len(position_list) == 4: | ||
449 | position_dict[cn_key] = { | ||
450 | consts.FIELD_POSITION_KEY: { | ||
451 | 'top': position_list[1], | ||
452 | 'left': position_list[0], | ||
453 | 'height': position_list[-1], | ||
454 | 'width': position_list[2], | ||
455 | } | ||
456 | } | ||
374 | del mvc_res | 457 | del mvc_res |
458 | rebuild_data_dict[consts.ALL_POSITION_KEY] = position_dict | ||
375 | license_summary.setdefault(classify, []).append(rebuild_data_dict) | 459 | license_summary.setdefault(classify, []).append(rebuild_data_dict) |
376 | 460 | ||
377 | 461 | ||
... | @@ -395,6 +479,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -395,6 +479,7 @@ class Command(BaseCommand, LoggerMixin): |
395 | # 身份证真伪 | 479 | # 身份证真伪 |
396 | elif classify == consts.IC_CLASSIFY: | 480 | elif classify == consts.IC_CLASSIFY: |
397 | id_card_dict = {} | 481 | id_card_dict = {} |
482 | position_dict = {} | ||
398 | card_type = license_data.get('type', '') | 483 | card_type = license_data.get('type', '') |
399 | is_ic = card_type.startswith('身份证') | 484 | is_ic = card_type.startswith('身份证') |
400 | is_info_side = card_type.endswith('信息面') | 485 | is_info_side = card_type.endswith('信息面') |
... | @@ -405,78 +490,106 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -405,78 +490,106 @@ class Command(BaseCommand, LoggerMixin): |
405 | field_map = consts.RP_MAP_0 if is_info_side else consts.RP_MAP_1 | 490 | field_map = consts.RP_MAP_0 if is_info_side else consts.RP_MAP_1 |
406 | for write_field, search_field in field_map: | 491 | for write_field, search_field in field_map: |
407 | id_card_dict[write_field] = license_data.get('words_result', {}).get(search_field, {}).get('words', '') | 492 | id_card_dict[write_field] = license_data.get('words_result', {}).get(search_field, {}).get('words', '') |
493 | location_list = license_data.get('words_result', {}).get(search_field, {}).get( | ||
494 | 'location', [-1, -1, -1, -1]) | ||
495 | if len(location_list) == 4: | ||
496 | position_dict[write_field] = { | ||
497 | consts.FIELD_POSITION_KEY: { | ||
498 | 'top': location_list[1], | ||
499 | 'left': location_list[0], | ||
500 | 'height': location_list[-1] - location_list[1], | ||
501 | 'width': location_list[2] - location_list[0] | ||
502 | } | ||
503 | } | ||
408 | if not is_info_side: | 504 | if not is_info_side: |
409 | start_time = license_data.get('words_result', {}).get('签发日期', {}).get('words', '') | 505 | start_time = license_data.get('words_result', {}).get('签发日期', {}).get('words', '') |
410 | end_time = license_data.get('words_result', {}).get('失效日期', {}).get('words', '') | 506 | end_time = license_data.get('words_result', {}).get('失效日期', {}).get('words', '') |
411 | id_card_dict['有效期限'] = '{0}-{1}'.format(start_time, end_time) | 507 | id_card_dict['有效期限'] = '{0}-{1}'.format(start_time, end_time) |
508 | end_time_location_list = license_data.get('words_result', {}).get('失效日期', {}).get( | ||
509 | 'location', [-1, -1, -1, -1]) | ||
510 | if len(end_time_location_list) == 4: | ||
511 | position_dict['有效期限'] = { | ||
512 | consts.FIELD_POSITION_KEY: { | ||
513 | 'top': end_time_location_list[1], | ||
514 | 'left': end_time_location_list[0], | ||
515 | 'height': end_time_location_list[-1] - end_time_location_list[1], | ||
516 | 'width': end_time_location_list[2] - end_time_location_list[0] | ||
517 | } | ||
518 | } | ||
412 | 519 | ||
413 | 520 | id_card_dict[consts.ALL_POSITION_KEY] = position_dict | |
521 | id_card_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path | ||
414 | if not is_info_side: | 522 | if not is_info_side: |
415 | id_card_dict[consts.IMG_PATH_KEY_2] = img_path | 523 | id_card_dict[consts.IMG_PATH_KEY_2] = img_path |
416 | else: | 524 | else: |
417 | id_card_dict[consts.IMG_PATH_KEY] = img_path | 525 | id_card_dict[consts.IMG_PATH_KEY] = img_path |
418 | if is_ic: | 526 | if is_ic and is_save: |
419 | try: | 527 | card_type = -1 |
420 | base64_img = license_data.pop('base64_img') | 528 | json_data_4 = { |
421 | except Exception as e: | 529 | 'mode': 1, |
422 | pass | 530 | 'user_info': { |
531 | 'image_content': base64_img, | ||
532 | }, | ||
533 | 'options': { | ||
534 | 'distinguish_type': 1, | ||
535 | 'auto_rotate': True, | ||
536 | }, | ||
537 | } | ||
538 | for times in range(consts.RETRY_TIMES): | ||
539 | try: | ||
540 | start_time = time.time() | ||
541 | ocr_4_response = requests.post(self.ocr_url_4, json=json_data_4) | ||
542 | if ocr_4_response.status_code != 200: | ||
543 | raise OCR4Exception('ocr_4 status code: {0}'.format(ocr_4_response.status_code)) | ||
544 | except Exception as e: | ||
545 | self.online_log.warn( | ||
546 | '{0} [ocr_4 failed] [times={1}] [img_path={2}] [error={3}]'.format( | ||
547 | self.log_base, times, img_path, traceback.format_exc())) | ||
548 | else: | ||
549 | ocr_4_res = ocr_4_response.json() | ||
550 | end_time = time.time() | ||
551 | speed_time = int(end_time - start_time) | ||
552 | |||
553 | if ocr_4_res.get('code') == 0 and ocr_4_res.get('result', {}).get('rtn') == 0: | ||
554 | card_type = ocr_4_res.get('result', {}).get( | ||
555 | 'idcard_distinguish_result', {}).get('result', -1) | ||
556 | |||
557 | self.online_log.info( | ||
558 | '{0} [ocr_4 success] [img_path={1}] [speed_time={2}]'.format( | ||
559 | self.log_base, img_path, speed_time)) | ||
560 | break | ||
423 | else: | 561 | else: |
424 | card_type = -1 | 562 | self.online_log.warn( |
425 | json_data_4 = { | 563 | '{0} [ocr_4 failed] [img_path={1}]'.format(self.log_base, img_path)) |
426 | 'mode': 1, | ||
427 | 'user_info': { | ||
428 | 'image_content': base64_img, | ||
429 | }, | ||
430 | 'options': { | ||
431 | 'distinguish_type': 1, | ||
432 | 'auto_rotate': True, | ||
433 | }, | ||
434 | } | ||
435 | for times in range(consts.RETRY_TIMES): | ||
436 | try: | ||
437 | start_time = time.time() | ||
438 | ocr_4_response = requests.post(self.ocr_url_4, json=json_data_4) | ||
439 | if ocr_4_response.status_code != 200: | ||
440 | raise OCR4Exception('ocr_4 status code: {0}'.format(ocr_4_response.status_code)) | ||
441 | except Exception as e: | ||
442 | self.online_log.warn( | ||
443 | '{0} [ocr_4 failed] [times={1}] [img_path={2}] [error={3}]'.format( | ||
444 | self.log_base, times, img_path, traceback.format_exc())) | ||
445 | else: | ||
446 | ocr_4_res = ocr_4_response.json() | ||
447 | end_time = time.time() | ||
448 | speed_time = int(end_time - start_time) | ||
449 | 564 | ||
450 | if ocr_4_res.get('code') == 0 and ocr_4_res.get('result', {}).get('rtn') == 0: | 565 | id_card_dict[consts.IC_TURE_OR_FALSE] = consts.IC_RES_MAPPING.get(card_type) |
451 | card_type = ocr_4_res.get('result', {}).get( | ||
452 | 'idcard_distinguish_result', {}).get('result', -1) | ||
453 | 566 | ||
454 | self.online_log.info( | 567 | if do_dda and isinstance(id_card_dict.get(consts.IC_KEY_FIELD[0]), str) and \ |
455 | '{0} [ocr_4 success] [img_path={1}] [speed_time={2}]'.format( | 568 | isinstance(id_card_dict.get(consts.IC_KEY_FIELD[1]), str): |
456 | self.log_base, img_path, speed_time)) | 569 | ic_name = id_card_dict.get(consts.IC_KEY_FIELD[0], '').strip() |
457 | break | 570 | ic_id = id_card_dict.get(consts.IC_KEY_FIELD[1], '').strip() |
458 | else: | 571 | if len(ic_name) > 0 and len(ic_id) > 0: |
459 | self.online_log.warn( | 572 | dda_id_bc_mapping.setdefault(consts.IC_FIELD, []).append((ic_name, ic_id, img_path)) |
460 | '{0} [ocr_4 failed] [img_path={1}]'.format(self.log_base, img_path)) | ||
461 | |||
462 | id_card_dict[consts.IC_TURE_OR_FALSE] = consts.IC_RES_MAPPING.get(card_type) | ||
463 | finally: | ||
464 | if do_dda and isinstance(id_card_dict.get(consts.IC_KEY_FIELD[0]), str) and isinstance(id_card_dict.get(consts.IC_KEY_FIELD[1]), str): | ||
465 | ic_name = id_card_dict[consts.IC_KEY_FIELD[0]].strip() | ||
466 | ic_id = id_card_dict[consts.IC_KEY_FIELD[1]].strip() | ||
467 | if len(ic_name) > 0 and len(ic_id) > 0: | ||
468 | dda_id_bc_mapping.setdefault(consts.IC_FIELD, []).append((ic_name, ic_id, img_path)) | ||
469 | license_summary.setdefault(classify, []).append(id_card_dict) | 573 | license_summary.setdefault(classify, []).append(id_card_dict) |
574 | # 购车发票 & 二手车发票 | ||
470 | elif classify == consts.MVI_CLASSIFY or classify == consts.UCI_CLASSIFY: | 575 | elif classify == consts.MVI_CLASSIFY or classify == consts.UCI_CLASSIFY: |
471 | rebuild_data_dict = {} | 576 | rebuild_data_dict = {} |
577 | position_dict = {} | ||
472 | mvi_res = license_data.pop('result', {}) | 578 | mvi_res = license_data.pop('result', {}) |
473 | for en_key, detail_dict in mvi_res.items(): | 579 | for en_key, detail_dict in mvi_res.items(): |
474 | rebuild_data_dict[detail_dict.get('chinese_key', '')] = detail_dict.get('words', '') | 580 | rebuild_data_dict[detail_dict.get('chinese_key', '')] = detail_dict.get('words', '') |
581 | position_dict[detail_dict.get('chinese_key', '')] = { | ||
582 | consts.FIELD_POSITION_KEY: detail_dict.get('position', {}) | ||
583 | } | ||
584 | rebuild_data_dict[consts.IMG_PATH_KEY] = img_path | ||
585 | rebuild_data_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path | ||
586 | rebuild_data_dict[consts.ALL_POSITION_KEY] = position_dict | ||
475 | license_summary.setdefault(classify, []).append(rebuild_data_dict) | 587 | license_summary.setdefault(classify, []).append(rebuild_data_dict) |
476 | # 其他 | 588 | # 其他 |
477 | else: | 589 | else: |
478 | for res_dict in license_data: | 590 | for res_dict in license_data: |
479 | res_dict[consts.IMG_PATH_KEY] = img_path | 591 | res_dict[consts.IMG_PATH_KEY] = img_path |
592 | res_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path | ||
480 | license_summary.setdefault(classify, []).extend(license_data) | 593 | license_summary.setdefault(classify, []).extend(license_data) |
481 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS)) | 594 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS)) |
482 | 595 | ... | ... |
-
Please register or sign in to post a comment