Merge branch 'feature/CHINARPA-4962' into feature/uat-tmp
Showing
6 changed files
with
28 additions
and
12 deletions
... | @@ -1018,9 +1018,9 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1018,9 +1018,9 @@ class Command(BaseCommand, LoggerMixin): |
1018 | # 添加处理, | 1018 | # 添加处理, |
1019 | # [售后回租合同] - 如果 key 是 "承租人签字", 且内容中包含 签署日期:XXXX, 则将签署日期去除 | 1019 | # [售后回租合同] - 如果 key 是 "承租人签字", 且内容中包含 签署日期:XXXX, 则将签署日期去除 |
1020 | # [车辆租赁抵押合同] - 如果 key 是 "" | 1020 | # [车辆租赁抵押合同] - 如果 key 是 "" |
1021 | if key == '承租人签字' and '签署日期' in tmp_res: | 1021 | if key == '承租人签字' and tmp_res is not None and '签署日期' in tmp_res: |
1022 | res[key] = tmp_res.split('签署日期')[0] | 1022 | res[key] = tmp_res.split('签署日期')[0] |
1023 | if key == "抵押人签字" and "签署日期" in tmp_res: | 1023 | if key == "抵押人签字" and tmp_res is not None and "签署日期" in tmp_res: |
1024 | res[key] = tmp_res.split("签署日期")[0] | 1024 | res[key] = tmp_res.split("签署日期")[0] |
1025 | res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(img_pno), {}).get( | 1025 | res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(img_pno), {}).get( |
1026 | consts.IMG_PATH_KEY, '') | 1026 | consts.IMG_PATH_KEY, '') |
... | @@ -1624,7 +1624,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1624,7 +1624,7 @@ class Command(BaseCommand, LoggerMixin): |
1624 | try: | 1624 | try: |
1625 | channel, img_path, text_list = img_queue.get(block=False) | 1625 | channel, img_path, text_list = img_queue.get(block=False) |
1626 | except Exception as e: | 1626 | except Exception as e: |
1627 | # self.online_log.info('{0} [img_2_ocr_1] [queue empty]'.format(self.log_base)) | 1627 | self.online_log.info('{0} [img_2_ocr_1] [queue empty]'.format(self.log_base)) |
1628 | time.sleep(self.sleep_time_img_get) | 1628 | time.sleep(self.sleep_time_img_get) |
1629 | continue | 1629 | continue |
1630 | else: | 1630 | else: |
... | @@ -1653,6 +1653,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1653,6 +1653,7 @@ class Command(BaseCommand, LoggerMixin): |
1653 | '[error={4}]'.format(self.log_base, times, url, img_path, | 1653 | '[error={4}]'.format(self.log_base, times, url, img_path, |
1654 | traceback.format_exc())) | 1654 | traceback.format_exc())) |
1655 | else: | 1655 | else: |
1656 | self.online_log.info('{0} [ocr_1 start] [img={1}] [url={2}]'.format(self.log_base, img_path, url)) | ||
1656 | ocr_1_res = ocr_1_response.json() | 1657 | ocr_1_res = ocr_1_response.json() |
1657 | end_time = time.time() | 1658 | end_time = time.time() |
1658 | speed_time = int(end_time - start_time) | 1659 | speed_time = int(end_time - start_time) |
... | @@ -1699,8 +1700,9 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1699,8 +1700,9 @@ class Command(BaseCommand, LoggerMixin): |
1699 | self.online_log.info('{0} [res_2_wb] [get task] [queue running] [finish_queue_size={1}]'.format(self.log_base, finish_queue.qsize())) | 1700 | self.online_log.info('{0} [res_2_wb] [get task] [queue running] [finish_queue_size={1}]'.format(self.log_base, finish_queue.qsize())) |
1700 | while len(error_list) == 0 or not img_queue.empty() or not finish_queue.empty(): | 1701 | while len(error_list) == 0 or not img_queue.empty() or not finish_queue.empty(): |
1701 | try: | 1702 | try: |
1703 | self.online_log.info('{0} [res_2_wb] [finish_queue.get1] [finish_queue_size={1}] [img_queue_size={2}]'.format(self.log_base, finish_queue.qsize(), img_queue.qsize())) | ||
1702 | task_str = finish_queue.get(block=False) | 1704 | task_str = finish_queue.get(block=False) |
1703 | self.online_log.info('{0} [res_2_wb] [finish_queue.get]'.format(self.log_base)) | 1705 | self.online_log.info('{0} [res_2_wb] [finish_queue.get2]'.format(self.log_base)) |
1704 | except Exception as e: | 1706 | except Exception as e: |
1705 | self.online_log.info('{0} [res_2_wb] [queue empty]'.format(self.log_base)) | 1707 | self.online_log.info('{0} [res_2_wb] [queue empty]'.format(self.log_base)) |
1706 | time.sleep(self.sleep_time_task_get) | 1708 | time.sleep(self.sleep_time_task_get) |
... | @@ -2463,6 +2465,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -2463,6 +2465,7 @@ class Command(BaseCommand, LoggerMixin): |
2463 | except Exception as e: | 2465 | except Exception as e: |
2464 | self.online_log.error('{0} [process error (pdf & img remove)] [task={1}] [error={2}]'.format( | 2466 | self.online_log.error('{0} [process error (pdf & img remove)] [task={1}] [error={2}]'.format( |
2465 | self.log_base, task_str, traceback.format_exc())) | 2467 | self.log_base, task_str, traceback.format_exc())) |
2468 | self.online_log.info('{0} [res_2_wb after while] [len(error_list)={1}] [img_queue={2}] [finish_queue={3}]'.format(self.log_base, len(error_list), img_queue.empty(), finish_queue.empty())) | ||
2466 | 2469 | ||
2467 | def handle(self, *args, **kwargs): | 2470 | def handle(self, *args, **kwargs): |
2468 | db.close_old_connections() | 2471 | db.close_old_connections() | ... | ... |
... | @@ -2458,7 +2458,7 @@ def se_bs_compare(license_en, ocr_res_dict, strip_list, is_auto, aa_type): | ... | @@ -2458,7 +2458,7 @@ def se_bs_compare(license_en, ocr_res_dict, strip_list, is_auto, aa_type): |
2458 | return result_field_list, field_img_path_dict | 2458 | return result_field_list, field_img_path_dict |
2459 | 2459 | ||
2460 | 2460 | ||
2461 | def se_compare_license(license_en, ocr_res_dict, field_list): | 2461 | def se_compare_license(license_en, ocr_res_dict, field_list, is_auto): |
2462 | ocr_field, compare_logic, special_expiry_date = consts.SE_COMPARE_FIELD[license_en] | 2462 | ocr_field, compare_logic, special_expiry_date = consts.SE_COMPARE_FIELD[license_en] |
2463 | 2463 | ||
2464 | is_find = False | 2464 | is_find = False |
... | @@ -2513,6 +2513,14 @@ def se_compare_license(license_en, ocr_res_dict, field_list): | ... | @@ -2513,6 +2513,14 @@ def se_compare_license(license_en, ocr_res_dict, field_list): |
2513 | ocr_res_list[res_idx].get(consts.LOWER_AMOUNT_FIELD, ''), | 2513 | ocr_res_list[res_idx].get(consts.LOWER_AMOUNT_FIELD, ''), |
2514 | ocr_res_list[res_idx].get(consts.UPPER_AMOUNT_FIELD, ''), | 2514 | ocr_res_list[res_idx].get(consts.UPPER_AMOUNT_FIELD, ''), |
2515 | ) | 2515 | ) |
2516 | # auto 保单 保险费合计 ocr结果需要加上一个基数,再与cms结果做比对 | ||
2517 | elif is_auto and ocr_field == consts.BD_FIELD and name == consts.SE_BD_FIELD[10]: | ||
2518 | ocr_str = ocr_res_list[res_idx].get(compare_logic[name][0]) | ||
2519 | compare_log.info('{0} [bd_4962_price] [ori ocr_str:{1}] '.format(log_base, ocr_str)) | ||
2520 | add_price = conf.BD_PRICE | ||
2521 | compare_log.info('{0} [bd_4962_price] [add_price:{1}] '.format(log_base, add_price)) | ||
2522 | ocr_str = float(ocr_str) + float(add_price) | ||
2523 | compare_log.info('{0} [bd_4962_price] [final ocr_str:{1}] '.format(log_base, ocr_str)) | ||
2516 | else: | 2524 | else: |
2517 | ocr_str = ocr_res_list[res_idx].get(compare_logic[name][0]) | 2525 | ocr_str = ocr_res_list[res_idx].get(compare_logic[name][0]) |
2518 | 2526 | ||
... | @@ -3287,7 +3295,7 @@ def se_compare_process(compare_info, ocr_res_dict, is_gsyh, is_auto, id_res_list | ... | @@ -3287,7 +3295,7 @@ def se_compare_process(compare_info, ocr_res_dict, is_gsyh, is_auto, id_res_list |
3287 | license_en, id_res_list, strip_list, is_auto) | 3295 | license_en, id_res_list, strip_list, is_auto) |
3288 | else: | 3296 | else: |
3289 | result_field_list, no_ocr_result, field_img_path_dict = se_compare_license( | 3297 | result_field_list, no_ocr_result, field_img_path_dict = se_compare_license( |
3290 | license_en, ocr_res_dict, strip_list) | 3298 | license_en, ocr_res_dict, strip_list, is_auto) |
3291 | each_license_failed_count = 0 | 3299 | each_license_failed_count = 0 |
3292 | for name, value, result, ocr_str, img_path, error_type, cn_reason in result_field_list: | 3300 | for name, value, result, ocr_str, img_path, error_type, cn_reason in result_field_list: |
3293 | if license_en not in consts.SKIP_CARD or not no_ocr_result: | 3301 | if license_en not in consts.SKIP_CARD or not no_ocr_result: |
... | @@ -3346,7 +3354,7 @@ def se_compare_process(compare_info, ocr_res_dict, is_gsyh, is_auto, id_res_list | ... | @@ -3346,7 +3354,7 @@ def se_compare_process(compare_info, ocr_res_dict, is_gsyh, is_auto, id_res_list |
3346 | elif license_en == consts.FS_EN: | 3354 | elif license_en == consts.FS_EN: |
3347 | result_field_list, field_img_path_dict = se_fs_compare(license_en, ocr_res_dict, strip_list) | 3355 | result_field_list, field_img_path_dict = se_fs_compare(license_en, ocr_res_dict, strip_list) |
3348 | else: | 3356 | else: |
3349 | result_field_list, _, field_img_path_dict = se_compare_license(license_en, ocr_res_dict, strip_list) | 3357 | result_field_list, _, field_img_path_dict = se_compare_license(license_en, ocr_res_dict, strip_list, is_auto) |
3350 | 3358 | ||
3351 | each_license_failed_count = 0 | 3359 | each_license_failed_count = 0 |
3352 | for name, value, result, ocr_str, img_path, error_type, cn_reason in result_field_list: | 3360 | for name, value, result, ocr_str, img_path, error_type, cn_reason in result_field_list: | ... | ... |
... | @@ -345,7 +345,7 @@ class PDFHandler: | ... | @@ -345,7 +345,7 @@ class PDFHandler: |
345 | # 3.3 碎图分组大于2、全过滤、含特殊filter,特殊处理:整个页面保存为png图片 | 345 | # 3.3 碎图分组大于2、全过滤、含特殊filter,特殊处理:整个页面保存为png图片 |
346 | if page_to_png: | 346 | if page_to_png: |
347 | page = pdf.loadPage(pno) | 347 | page = pdf.loadPage(pno) |
348 | self.page_to_png(page, is_big_img=True) | 348 | self.page_to_png(page) |
349 | 349 | ||
350 | def title_is_ebank(self, char): | 350 | def title_is_ebank(self, char): |
351 | new_char = normalize('NFKC', char) | 351 | new_char = normalize('NFKC', char) |
... | @@ -450,7 +450,7 @@ class PDFHandler: | ... | @@ -450,7 +450,7 @@ class PDFHandler: |
450 | # 1.页面图片对象数目为0时,保存整个页面为png图片 | 450 | # 1.页面图片对象数目为0时,保存整个页面为png图片 |
451 | if self.is_e_pdf or self.is_ebank or len(il) == 0: | 451 | if self.is_e_pdf or self.is_ebank or len(il) == 0: |
452 | page = pdf.loadPage(pno) | 452 | page = pdf.loadPage(pno) |
453 | self.page_to_png(page, is_big_img=True) | 453 | self.page_to_png(page) |
454 | # 2.页面图片对象数目为1时: | 454 | # 2.页面图片对象数目为1时: |
455 | # 小图(如电子账单的盖章):保存整个页面为png图片 | 455 | # 小图(如电子账单的盖章):保存整个页面为png图片 |
456 | # 大图:提取图片对象 | 456 | # 大图:提取图片对象 |
... | @@ -459,13 +459,13 @@ class PDFHandler: | ... | @@ -459,13 +459,13 @@ class PDFHandler: |
459 | # 小图 | 459 | # 小图 |
460 | if width < WH_COUPLE_1[0] and height < WH_COUPLE_1[1]: | 460 | if width < WH_COUPLE_1[0] and height < WH_COUPLE_1[1]: |
461 | page = pdf.loadPage(pno) | 461 | page = pdf.loadPage(pno) |
462 | self.page_to_png(page, is_big_img=True) | 462 | self.page_to_png(page) |
463 | # 大图 | 463 | # 大图 |
464 | elif width >= WH_COUPLE_6[0] or height >= WH_COUPLE_6[1]: | 464 | elif width >= WH_COUPLE_6[0] or height >= WH_COUPLE_6[1]: |
465 | self.is_new_modify = 1 | 465 | self.is_new_modify = 1 |
466 | is_big_img = (width < WH_COUPLE_7[0] and height < WH_COUPLE_7[1]) # 防止图片过大 | 466 | is_big_img = (width < WH_COUPLE_7[0] and height < WH_COUPLE_7[1]) # 防止图片过大 |
467 | page = pdf.loadPage(pno) | 467 | page = pdf.loadPage(pno) |
468 | self.page_to_png(page, is_big_img=True) | 468 | self.page_to_png(page, is_big_img=is_big_img) |
469 | elif xref not in self.xref_set: | 469 | elif xref not in self.xref_set: |
470 | self.extract_single_image(pdf, xref, smask, colorspace, pno) | 470 | self.extract_single_image(pdf, xref, smask, colorspace, pno) |
471 | # 3.页面图片对象数目大于1时,特殊处理 | 471 | # 3.页面图片对象数目大于1时,特殊处理 |
... | @@ -480,7 +480,7 @@ class PDFHandler: | ... | @@ -480,7 +480,7 @@ class PDFHandler: |
480 | with fitz.Document(self.path) as pdf: | 480 | with fitz.Document(self.path) as pdf: |
481 | for pno in range(pdf.pageCount): | 481 | for pno in range(pdf.pageCount): |
482 | page = pdf.loadPage(pno) | 482 | page = pdf.loadPage(pno) |
483 | self.page_to_png(page, is_big_img=True) | 483 | self.page_to_png(page) |
484 | self.img_count = len(self.img_path_list) | 484 | self.img_count = len(self.img_path_list) |
485 | 485 | ||
486 | def ebank_draw(self): | 486 | def ebank_draw(self): | ... | ... |
... | @@ -15,3 +15,5 @@ DEALER_CODE = ocr_situ_group | ... | @@ -15,3 +15,5 @@ DEALER_CODE = ocr_situ_group |
15 | BASE_URL = https://staging-bmw-ocr.situdata.com | 15 | BASE_URL = https://staging-bmw-ocr.situdata.com |
16 | 16 | ||
17 | DELAY_SECONDS = 60 | 17 | DELAY_SECONDS = 60 |
18 | |||
19 | BD_PRICE = 950 | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
... | @@ -15,3 +15,5 @@ DEALER_CODE = ocr_situ_group | ... | @@ -15,3 +15,5 @@ DEALER_CODE = ocr_situ_group |
15 | BASE_URL = https://sfocr-uat.bmwgroup.net | 15 | BASE_URL = https://sfocr-uat.bmwgroup.net |
16 | 16 | ||
17 | DELAY_SECONDS = 60 | 17 | DELAY_SECONDS = 60 |
18 | |||
19 | BD_PRICE = 950 | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
-
Please register or sign in to post a comment