eb259387 by 冯轩

Merge branch 'feature/CHINARPA-4962' into feature/uat-tmp

2 parents 07007f09 2be87904
...@@ -1018,9 +1018,9 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1018,9 +1018,9 @@ class Command(BaseCommand, LoggerMixin):
1018 # 添加处理, 1018 # 添加处理,
1019 # [售后回租合同] - 如果 key 是 "承租人签字", 且内容中包含 签署日期:XXXX, 则将签署日期去除 1019 # [售后回租合同] - 如果 key 是 "承租人签字", 且内容中包含 签署日期:XXXX, 则将签署日期去除
1020 # [车辆租赁抵押合同] - 如果 key 是 "" 1020 # [车辆租赁抵押合同] - 如果 key 是 ""
1021 if key == '承租人签字' and '签署日期' in tmp_res: 1021 if key == '承租人签字' and tmp_res is not None and '签署日期' in tmp_res:
1022 res[key] = tmp_res.split('签署日期')[0] 1022 res[key] = tmp_res.split('签署日期')[0]
1023 if key == "抵押人签字" and "签署日期" in tmp_res: 1023 if key == "抵押人签字" and tmp_res is not None and "签署日期" in tmp_res:
1024 res[key] = tmp_res.split("签署日期")[0] 1024 res[key] = tmp_res.split("签署日期")[0]
1025 res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(img_pno), {}).get( 1025 res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(img_pno), {}).get(
1026 consts.IMG_PATH_KEY, '') 1026 consts.IMG_PATH_KEY, '')
...@@ -1624,7 +1624,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1624,7 +1624,7 @@ class Command(BaseCommand, LoggerMixin):
1624 try: 1624 try:
1625 channel, img_path, text_list = img_queue.get(block=False) 1625 channel, img_path, text_list = img_queue.get(block=False)
1626 except Exception as e: 1626 except Exception as e:
1627 # self.online_log.info('{0} [img_2_ocr_1] [queue empty]'.format(self.log_base)) 1627 self.online_log.info('{0} [img_2_ocr_1] [queue empty]'.format(self.log_base))
1628 time.sleep(self.sleep_time_img_get) 1628 time.sleep(self.sleep_time_img_get)
1629 continue 1629 continue
1630 else: 1630 else:
...@@ -1653,6 +1653,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1653,6 +1653,7 @@ class Command(BaseCommand, LoggerMixin):
1653 '[error={4}]'.format(self.log_base, times, url, img_path, 1653 '[error={4}]'.format(self.log_base, times, url, img_path,
1654 traceback.format_exc())) 1654 traceback.format_exc()))
1655 else: 1655 else:
1656 self.online_log.info('{0} [ocr_1 start] [img={1}] [url={2}]'.format(self.log_base, img_path, url))
1656 ocr_1_res = ocr_1_response.json() 1657 ocr_1_res = ocr_1_response.json()
1657 end_time = time.time() 1658 end_time = time.time()
1658 speed_time = int(end_time - start_time) 1659 speed_time = int(end_time - start_time)
...@@ -1699,8 +1700,9 @@ class Command(BaseCommand, LoggerMixin): ...@@ -1699,8 +1700,9 @@ class Command(BaseCommand, LoggerMixin):
1699 self.online_log.info('{0} [res_2_wb] [get task] [queue running] [finish_queue_size={1}]'.format(self.log_base, finish_queue.qsize())) 1700 self.online_log.info('{0} [res_2_wb] [get task] [queue running] [finish_queue_size={1}]'.format(self.log_base, finish_queue.qsize()))
1700 while len(error_list) == 0 or not img_queue.empty() or not finish_queue.empty(): 1701 while len(error_list) == 0 or not img_queue.empty() or not finish_queue.empty():
1701 try: 1702 try:
1703 self.online_log.info('{0} [res_2_wb] [finish_queue.get1] [finish_queue_size={1}] [img_queue_size={2}]'.format(self.log_base, finish_queue.qsize(), img_queue.qsize()))
1702 task_str = finish_queue.get(block=False) 1704 task_str = finish_queue.get(block=False)
1703 self.online_log.info('{0} [res_2_wb] [finish_queue.get]'.format(self.log_base)) 1705 self.online_log.info('{0} [res_2_wb] [finish_queue.get2]'.format(self.log_base))
1704 except Exception as e: 1706 except Exception as e:
1705 self.online_log.info('{0} [res_2_wb] [queue empty]'.format(self.log_base)) 1707 self.online_log.info('{0} [res_2_wb] [queue empty]'.format(self.log_base))
1706 time.sleep(self.sleep_time_task_get) 1708 time.sleep(self.sleep_time_task_get)
...@@ -2463,6 +2465,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -2463,6 +2465,7 @@ class Command(BaseCommand, LoggerMixin):
2463 except Exception as e: 2465 except Exception as e:
2464 self.online_log.error('{0} [process error (pdf & img remove)] [task={1}] [error={2}]'.format( 2466 self.online_log.error('{0} [process error (pdf & img remove)] [task={1}] [error={2}]'.format(
2465 self.log_base, task_str, traceback.format_exc())) 2467 self.log_base, task_str, traceback.format_exc()))
2468 self.online_log.info('{0} [res_2_wb after while] [len(error_list)={1}] [img_queue={2}] [finish_queue={3}]'.format(self.log_base, len(error_list), img_queue.empty(), finish_queue.empty()))
2466 2469
2467 def handle(self, *args, **kwargs): 2470 def handle(self, *args, **kwargs):
2468 db.close_old_connections() 2471 db.close_old_connections()
......
...@@ -2458,7 +2458,7 @@ def se_bs_compare(license_en, ocr_res_dict, strip_list, is_auto, aa_type): ...@@ -2458,7 +2458,7 @@ def se_bs_compare(license_en, ocr_res_dict, strip_list, is_auto, aa_type):
2458 return result_field_list, field_img_path_dict 2458 return result_field_list, field_img_path_dict
2459 2459
2460 2460
2461 def se_compare_license(license_en, ocr_res_dict, field_list): 2461 def se_compare_license(license_en, ocr_res_dict, field_list, is_auto):
2462 ocr_field, compare_logic, special_expiry_date = consts.SE_COMPARE_FIELD[license_en] 2462 ocr_field, compare_logic, special_expiry_date = consts.SE_COMPARE_FIELD[license_en]
2463 2463
2464 is_find = False 2464 is_find = False
...@@ -2513,6 +2513,14 @@ def se_compare_license(license_en, ocr_res_dict, field_list): ...@@ -2513,6 +2513,14 @@ def se_compare_license(license_en, ocr_res_dict, field_list):
2513 ocr_res_list[res_idx].get(consts.LOWER_AMOUNT_FIELD, ''), 2513 ocr_res_list[res_idx].get(consts.LOWER_AMOUNT_FIELD, ''),
2514 ocr_res_list[res_idx].get(consts.UPPER_AMOUNT_FIELD, ''), 2514 ocr_res_list[res_idx].get(consts.UPPER_AMOUNT_FIELD, ''),
2515 ) 2515 )
2516 # auto 保单 保险费合计 ocr结果需要加上一个基数,再与cms结果做比对
2517 elif is_auto and ocr_field == consts.BD_FIELD and name == consts.SE_BD_FIELD[10]:
2518 ocr_str = ocr_res_list[res_idx].get(compare_logic[name][0])
2519 compare_log.info('{0} [bd_4962_price] [ori ocr_str:{1}] '.format(log_base, ocr_str))
2520 add_price = conf.BD_PRICE
2521 compare_log.info('{0} [bd_4962_price] [add_price:{1}] '.format(log_base, add_price))
2522 ocr_str = float(ocr_str) + float(add_price)
2523 compare_log.info('{0} [bd_4962_price] [final ocr_str:{1}] '.format(log_base, ocr_str))
2516 else: 2524 else:
2517 ocr_str = ocr_res_list[res_idx].get(compare_logic[name][0]) 2525 ocr_str = ocr_res_list[res_idx].get(compare_logic[name][0])
2518 2526
...@@ -3287,7 +3295,7 @@ def se_compare_process(compare_info, ocr_res_dict, is_gsyh, is_auto, id_res_list ...@@ -3287,7 +3295,7 @@ def se_compare_process(compare_info, ocr_res_dict, is_gsyh, is_auto, id_res_list
3287 license_en, id_res_list, strip_list, is_auto) 3295 license_en, id_res_list, strip_list, is_auto)
3288 else: 3296 else:
3289 result_field_list, no_ocr_result, field_img_path_dict = se_compare_license( 3297 result_field_list, no_ocr_result, field_img_path_dict = se_compare_license(
3290 license_en, ocr_res_dict, strip_list) 3298 license_en, ocr_res_dict, strip_list, is_auto)
3291 each_license_failed_count = 0 3299 each_license_failed_count = 0
3292 for name, value, result, ocr_str, img_path, error_type, cn_reason in result_field_list: 3300 for name, value, result, ocr_str, img_path, error_type, cn_reason in result_field_list:
3293 if license_en not in consts.SKIP_CARD or not no_ocr_result: 3301 if license_en not in consts.SKIP_CARD or not no_ocr_result:
...@@ -3346,7 +3354,7 @@ def se_compare_process(compare_info, ocr_res_dict, is_gsyh, is_auto, id_res_list ...@@ -3346,7 +3354,7 @@ def se_compare_process(compare_info, ocr_res_dict, is_gsyh, is_auto, id_res_list
3346 elif license_en == consts.FS_EN: 3354 elif license_en == consts.FS_EN:
3347 result_field_list, field_img_path_dict = se_fs_compare(license_en, ocr_res_dict, strip_list) 3355 result_field_list, field_img_path_dict = se_fs_compare(license_en, ocr_res_dict, strip_list)
3348 else: 3356 else:
3349 result_field_list, _, field_img_path_dict = se_compare_license(license_en, ocr_res_dict, strip_list) 3357 result_field_list, _, field_img_path_dict = se_compare_license(license_en, ocr_res_dict, strip_list, is_auto)
3350 3358
3351 each_license_failed_count = 0 3359 each_license_failed_count = 0
3352 for name, value, result, ocr_str, img_path, error_type, cn_reason in result_field_list: 3360 for name, value, result, ocr_str, img_path, error_type, cn_reason in result_field_list:
......
...@@ -345,7 +345,7 @@ class PDFHandler: ...@@ -345,7 +345,7 @@ class PDFHandler:
345 # 3.3 碎图分组大于2、全过滤、含特殊filter,特殊处理:整个页面保存为png图片 345 # 3.3 碎图分组大于2、全过滤、含特殊filter,特殊处理:整个页面保存为png图片
346 if page_to_png: 346 if page_to_png:
347 page = pdf.loadPage(pno) 347 page = pdf.loadPage(pno)
348 self.page_to_png(page, is_big_img=True) 348 self.page_to_png(page)
349 349
350 def title_is_ebank(self, char): 350 def title_is_ebank(self, char):
351 new_char = normalize('NFKC', char) 351 new_char = normalize('NFKC', char)
...@@ -450,7 +450,7 @@ class PDFHandler: ...@@ -450,7 +450,7 @@ class PDFHandler:
450 # 1.页面图片对象数目为0时,保存整个页面为png图片 450 # 1.页面图片对象数目为0时,保存整个页面为png图片
451 if self.is_e_pdf or self.is_ebank or len(il) == 0: 451 if self.is_e_pdf or self.is_ebank or len(il) == 0:
452 page = pdf.loadPage(pno) 452 page = pdf.loadPage(pno)
453 self.page_to_png(page, is_big_img=True) 453 self.page_to_png(page)
454 # 2.页面图片对象数目为1时: 454 # 2.页面图片对象数目为1时:
455 # 小图(如电子账单的盖章):保存整个页面为png图片 455 # 小图(如电子账单的盖章):保存整个页面为png图片
456 # 大图:提取图片对象 456 # 大图:提取图片对象
...@@ -459,13 +459,13 @@ class PDFHandler: ...@@ -459,13 +459,13 @@ class PDFHandler:
459 # 小图 459 # 小图
460 if width < WH_COUPLE_1[0] and height < WH_COUPLE_1[1]: 460 if width < WH_COUPLE_1[0] and height < WH_COUPLE_1[1]:
461 page = pdf.loadPage(pno) 461 page = pdf.loadPage(pno)
462 self.page_to_png(page, is_big_img=True) 462 self.page_to_png(page)
463 # 大图 463 # 大图
464 elif width >= WH_COUPLE_6[0] or height >= WH_COUPLE_6[1]: 464 elif width >= WH_COUPLE_6[0] or height >= WH_COUPLE_6[1]:
465 self.is_new_modify = 1 465 self.is_new_modify = 1
466 is_big_img = (width < WH_COUPLE_7[0] and height < WH_COUPLE_7[1]) # 防止图片过大 466 is_big_img = (width < WH_COUPLE_7[0] and height < WH_COUPLE_7[1]) # 防止图片过大
467 page = pdf.loadPage(pno) 467 page = pdf.loadPage(pno)
468 self.page_to_png(page, is_big_img=True) 468 self.page_to_png(page, is_big_img=is_big_img)
469 elif xref not in self.xref_set: 469 elif xref not in self.xref_set:
470 self.extract_single_image(pdf, xref, smask, colorspace, pno) 470 self.extract_single_image(pdf, xref, smask, colorspace, pno)
471 # 3.页面图片对象数目大于1时,特殊处理 471 # 3.页面图片对象数目大于1时,特殊处理
...@@ -480,7 +480,7 @@ class PDFHandler: ...@@ -480,7 +480,7 @@ class PDFHandler:
480 with fitz.Document(self.path) as pdf: 480 with fitz.Document(self.path) as pdf:
481 for pno in range(pdf.pageCount): 481 for pno in range(pdf.pageCount):
482 page = pdf.loadPage(pno) 482 page = pdf.loadPage(pno)
483 self.page_to_png(page, is_big_img=True) 483 self.page_to_png(page)
484 self.img_count = len(self.img_path_list) 484 self.img_count = len(self.img_path_list)
485 485
486 def ebank_draw(self): 486 def ebank_draw(self):
......
...@@ -16,3 +16,4 @@ BASE_URL = https://sfocr-prod.bmwgroup.net ...@@ -16,3 +16,4 @@ BASE_URL = https://sfocr-prod.bmwgroup.net
16 16
17 DELAY_SECONDS = 60 17 DELAY_SECONDS = 60
18 18
19 BD_PRICE = 950
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -15,3 +15,5 @@ DEALER_CODE = ocr_situ_group ...@@ -15,3 +15,5 @@ DEALER_CODE = ocr_situ_group
15 BASE_URL = https://staging-bmw-ocr.situdata.com 15 BASE_URL = https://staging-bmw-ocr.situdata.com
16 16
17 DELAY_SECONDS = 60 17 DELAY_SECONDS = 60
18
19 BD_PRICE = 950
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -15,3 +15,5 @@ DEALER_CODE = ocr_situ_group ...@@ -15,3 +15,5 @@ DEALER_CODE = ocr_situ_group
15 BASE_URL = https://sfocr-uat.bmwgroup.net 15 BASE_URL = https://sfocr-uat.bmwgroup.net
16 16
17 DELAY_SECONDS = 60 17 DELAY_SECONDS = 60
18
19 BD_PRICE = 950
...\ No newline at end of file ...\ No newline at end of file
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!