Merge branch 'feature/CHINARPA-4962' into feature/uat-tmp
Showing
6 changed files
with
28 additions
and
12 deletions
| ... | @@ -1018,9 +1018,9 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1018,9 +1018,9 @@ class Command(BaseCommand, LoggerMixin): |
| 1018 | # 添加处理, | 1018 | # 添加处理, |
| 1019 | # [售后回租合同] - 如果 key 是 "承租人签字", 且内容中包含 签署日期:XXXX, 则将签署日期去除 | 1019 | # [售后回租合同] - 如果 key 是 "承租人签字", 且内容中包含 签署日期:XXXX, 则将签署日期去除 |
| 1020 | # [车辆租赁抵押合同] - 如果 key 是 "" | 1020 | # [车辆租赁抵押合同] - 如果 key 是 "" |
| 1021 | if key == '承租人签字' and '签署日期' in tmp_res: | 1021 | if key == '承租人签字' and tmp_res is not None and '签署日期' in tmp_res: |
| 1022 | res[key] = tmp_res.split('签署日期')[0] | 1022 | res[key] = tmp_res.split('签署日期')[0] |
| 1023 | if key == "抵押人签字" and "签署日期" in tmp_res: | 1023 | if key == "抵押人签字" and tmp_res is not None and "签署日期" in tmp_res: |
| 1024 | res[key] = tmp_res.split("签署日期")[0] | 1024 | res[key] = tmp_res.split("签署日期")[0] |
| 1025 | res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(img_pno), {}).get( | 1025 | res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(img_pno), {}).get( |
| 1026 | consts.IMG_PATH_KEY, '') | 1026 | consts.IMG_PATH_KEY, '') |
| ... | @@ -1624,7 +1624,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1624,7 +1624,7 @@ class Command(BaseCommand, LoggerMixin): |
| 1624 | try: | 1624 | try: |
| 1625 | channel, img_path, text_list = img_queue.get(block=False) | 1625 | channel, img_path, text_list = img_queue.get(block=False) |
| 1626 | except Exception as e: | 1626 | except Exception as e: |
| 1627 | # self.online_log.info('{0} [img_2_ocr_1] [queue empty]'.format(self.log_base)) | 1627 | self.online_log.info('{0} [img_2_ocr_1] [queue empty]'.format(self.log_base)) |
| 1628 | time.sleep(self.sleep_time_img_get) | 1628 | time.sleep(self.sleep_time_img_get) |
| 1629 | continue | 1629 | continue |
| 1630 | else: | 1630 | else: |
| ... | @@ -1653,6 +1653,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1653,6 +1653,7 @@ class Command(BaseCommand, LoggerMixin): |
| 1653 | '[error={4}]'.format(self.log_base, times, url, img_path, | 1653 | '[error={4}]'.format(self.log_base, times, url, img_path, |
| 1654 | traceback.format_exc())) | 1654 | traceback.format_exc())) |
| 1655 | else: | 1655 | else: |
| 1656 | self.online_log.info('{0} [ocr_1 start] [img={1}] [url={2}]'.format(self.log_base, img_path, url)) | ||
| 1656 | ocr_1_res = ocr_1_response.json() | 1657 | ocr_1_res = ocr_1_response.json() |
| 1657 | end_time = time.time() | 1658 | end_time = time.time() |
| 1658 | speed_time = int(end_time - start_time) | 1659 | speed_time = int(end_time - start_time) |
| ... | @@ -1699,8 +1700,9 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -1699,8 +1700,9 @@ class Command(BaseCommand, LoggerMixin): |
| 1699 | self.online_log.info('{0} [res_2_wb] [get task] [queue running] [finish_queue_size={1}]'.format(self.log_base, finish_queue.qsize())) | 1700 | self.online_log.info('{0} [res_2_wb] [get task] [queue running] [finish_queue_size={1}]'.format(self.log_base, finish_queue.qsize())) |
| 1700 | while len(error_list) == 0 or not img_queue.empty() or not finish_queue.empty(): | 1701 | while len(error_list) == 0 or not img_queue.empty() or not finish_queue.empty(): |
| 1701 | try: | 1702 | try: |
| 1703 | self.online_log.info('{0} [res_2_wb] [finish_queue.get1] [finish_queue_size={1}] [img_queue_size={2}]'.format(self.log_base, finish_queue.qsize(), img_queue.qsize())) | ||
| 1702 | task_str = finish_queue.get(block=False) | 1704 | task_str = finish_queue.get(block=False) |
| 1703 | self.online_log.info('{0} [res_2_wb] [finish_queue.get]'.format(self.log_base)) | 1705 | self.online_log.info('{0} [res_2_wb] [finish_queue.get2]'.format(self.log_base)) |
| 1704 | except Exception as e: | 1706 | except Exception as e: |
| 1705 | self.online_log.info('{0} [res_2_wb] [queue empty]'.format(self.log_base)) | 1707 | self.online_log.info('{0} [res_2_wb] [queue empty]'.format(self.log_base)) |
| 1706 | time.sleep(self.sleep_time_task_get) | 1708 | time.sleep(self.sleep_time_task_get) |
| ... | @@ -2463,6 +2465,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -2463,6 +2465,7 @@ class Command(BaseCommand, LoggerMixin): |
| 2463 | except Exception as e: | 2465 | except Exception as e: |
| 2464 | self.online_log.error('{0} [process error (pdf & img remove)] [task={1}] [error={2}]'.format( | 2466 | self.online_log.error('{0} [process error (pdf & img remove)] [task={1}] [error={2}]'.format( |
| 2465 | self.log_base, task_str, traceback.format_exc())) | 2467 | self.log_base, task_str, traceback.format_exc())) |
| 2468 | self.online_log.info('{0} [res_2_wb after while] [len(error_list)={1}] [img_queue={2}] [finish_queue={3}]'.format(self.log_base, len(error_list), img_queue.empty(), finish_queue.empty())) | ||
| 2466 | 2469 | ||
| 2467 | def handle(self, *args, **kwargs): | 2470 | def handle(self, *args, **kwargs): |
| 2468 | db.close_old_connections() | 2471 | db.close_old_connections() | ... | ... |
| ... | @@ -2458,7 +2458,7 @@ def se_bs_compare(license_en, ocr_res_dict, strip_list, is_auto, aa_type): | ... | @@ -2458,7 +2458,7 @@ def se_bs_compare(license_en, ocr_res_dict, strip_list, is_auto, aa_type): |
| 2458 | return result_field_list, field_img_path_dict | 2458 | return result_field_list, field_img_path_dict |
| 2459 | 2459 | ||
| 2460 | 2460 | ||
| 2461 | def se_compare_license(license_en, ocr_res_dict, field_list): | 2461 | def se_compare_license(license_en, ocr_res_dict, field_list, is_auto): |
| 2462 | ocr_field, compare_logic, special_expiry_date = consts.SE_COMPARE_FIELD[license_en] | 2462 | ocr_field, compare_logic, special_expiry_date = consts.SE_COMPARE_FIELD[license_en] |
| 2463 | 2463 | ||
| 2464 | is_find = False | 2464 | is_find = False |
| ... | @@ -2513,6 +2513,14 @@ def se_compare_license(license_en, ocr_res_dict, field_list): | ... | @@ -2513,6 +2513,14 @@ def se_compare_license(license_en, ocr_res_dict, field_list): |
| 2513 | ocr_res_list[res_idx].get(consts.LOWER_AMOUNT_FIELD, ''), | 2513 | ocr_res_list[res_idx].get(consts.LOWER_AMOUNT_FIELD, ''), |
| 2514 | ocr_res_list[res_idx].get(consts.UPPER_AMOUNT_FIELD, ''), | 2514 | ocr_res_list[res_idx].get(consts.UPPER_AMOUNT_FIELD, ''), |
| 2515 | ) | 2515 | ) |
| 2516 | # auto 保单 保险费合计 ocr结果需要加上一个基数,再与cms结果做比对 | ||
| 2517 | elif is_auto and ocr_field == consts.BD_FIELD and name == consts.SE_BD_FIELD[10]: | ||
| 2518 | ocr_str = ocr_res_list[res_idx].get(compare_logic[name][0]) | ||
| 2519 | compare_log.info('{0} [bd_4962_price] [ori ocr_str:{1}] '.format(log_base, ocr_str)) | ||
| 2520 | add_price = conf.BD_PRICE | ||
| 2521 | compare_log.info('{0} [bd_4962_price] [add_price:{1}] '.format(log_base, add_price)) | ||
| 2522 | ocr_str = float(ocr_str) + float(add_price) | ||
| 2523 | compare_log.info('{0} [bd_4962_price] [final ocr_str:{1}] '.format(log_base, ocr_str)) | ||
| 2516 | else: | 2524 | else: |
| 2517 | ocr_str = ocr_res_list[res_idx].get(compare_logic[name][0]) | 2525 | ocr_str = ocr_res_list[res_idx].get(compare_logic[name][0]) |
| 2518 | 2526 | ||
| ... | @@ -3287,7 +3295,7 @@ def se_compare_process(compare_info, ocr_res_dict, is_gsyh, is_auto, id_res_list | ... | @@ -3287,7 +3295,7 @@ def se_compare_process(compare_info, ocr_res_dict, is_gsyh, is_auto, id_res_list |
| 3287 | license_en, id_res_list, strip_list, is_auto) | 3295 | license_en, id_res_list, strip_list, is_auto) |
| 3288 | else: | 3296 | else: |
| 3289 | result_field_list, no_ocr_result, field_img_path_dict = se_compare_license( | 3297 | result_field_list, no_ocr_result, field_img_path_dict = se_compare_license( |
| 3290 | license_en, ocr_res_dict, strip_list) | 3298 | license_en, ocr_res_dict, strip_list, is_auto) |
| 3291 | each_license_failed_count = 0 | 3299 | each_license_failed_count = 0 |
| 3292 | for name, value, result, ocr_str, img_path, error_type, cn_reason in result_field_list: | 3300 | for name, value, result, ocr_str, img_path, error_type, cn_reason in result_field_list: |
| 3293 | if license_en not in consts.SKIP_CARD or not no_ocr_result: | 3301 | if license_en not in consts.SKIP_CARD or not no_ocr_result: |
| ... | @@ -3346,7 +3354,7 @@ def se_compare_process(compare_info, ocr_res_dict, is_gsyh, is_auto, id_res_list | ... | @@ -3346,7 +3354,7 @@ def se_compare_process(compare_info, ocr_res_dict, is_gsyh, is_auto, id_res_list |
| 3346 | elif license_en == consts.FS_EN: | 3354 | elif license_en == consts.FS_EN: |
| 3347 | result_field_list, field_img_path_dict = se_fs_compare(license_en, ocr_res_dict, strip_list) | 3355 | result_field_list, field_img_path_dict = se_fs_compare(license_en, ocr_res_dict, strip_list) |
| 3348 | else: | 3356 | else: |
| 3349 | result_field_list, _, field_img_path_dict = se_compare_license(license_en, ocr_res_dict, strip_list) | 3357 | result_field_list, _, field_img_path_dict = se_compare_license(license_en, ocr_res_dict, strip_list, is_auto) |
| 3350 | 3358 | ||
| 3351 | each_license_failed_count = 0 | 3359 | each_license_failed_count = 0 |
| 3352 | for name, value, result, ocr_str, img_path, error_type, cn_reason in result_field_list: | 3360 | for name, value, result, ocr_str, img_path, error_type, cn_reason in result_field_list: | ... | ... |
| ... | @@ -345,7 +345,7 @@ class PDFHandler: | ... | @@ -345,7 +345,7 @@ class PDFHandler: |
| 345 | # 3.3 碎图分组大于2、全过滤、含特殊filter,特殊处理:整个页面保存为png图片 | 345 | # 3.3 碎图分组大于2、全过滤、含特殊filter,特殊处理:整个页面保存为png图片 |
| 346 | if page_to_png: | 346 | if page_to_png: |
| 347 | page = pdf.loadPage(pno) | 347 | page = pdf.loadPage(pno) |
| 348 | self.page_to_png(page, is_big_img=True) | 348 | self.page_to_png(page) |
| 349 | 349 | ||
| 350 | def title_is_ebank(self, char): | 350 | def title_is_ebank(self, char): |
| 351 | new_char = normalize('NFKC', char) | 351 | new_char = normalize('NFKC', char) |
| ... | @@ -450,7 +450,7 @@ class PDFHandler: | ... | @@ -450,7 +450,7 @@ class PDFHandler: |
| 450 | # 1.页面图片对象数目为0时,保存整个页面为png图片 | 450 | # 1.页面图片对象数目为0时,保存整个页面为png图片 |
| 451 | if self.is_e_pdf or self.is_ebank or len(il) == 0: | 451 | if self.is_e_pdf or self.is_ebank or len(il) == 0: |
| 452 | page = pdf.loadPage(pno) | 452 | page = pdf.loadPage(pno) |
| 453 | self.page_to_png(page, is_big_img=True) | 453 | self.page_to_png(page) |
| 454 | # 2.页面图片对象数目为1时: | 454 | # 2.页面图片对象数目为1时: |
| 455 | # 小图(如电子账单的盖章):保存整个页面为png图片 | 455 | # 小图(如电子账单的盖章):保存整个页面为png图片 |
| 456 | # 大图:提取图片对象 | 456 | # 大图:提取图片对象 |
| ... | @@ -459,13 +459,13 @@ class PDFHandler: | ... | @@ -459,13 +459,13 @@ class PDFHandler: |
| 459 | # 小图 | 459 | # 小图 |
| 460 | if width < WH_COUPLE_1[0] and height < WH_COUPLE_1[1]: | 460 | if width < WH_COUPLE_1[0] and height < WH_COUPLE_1[1]: |
| 461 | page = pdf.loadPage(pno) | 461 | page = pdf.loadPage(pno) |
| 462 | self.page_to_png(page, is_big_img=True) | 462 | self.page_to_png(page) |
| 463 | # 大图 | 463 | # 大图 |
| 464 | elif width >= WH_COUPLE_6[0] or height >= WH_COUPLE_6[1]: | 464 | elif width >= WH_COUPLE_6[0] or height >= WH_COUPLE_6[1]: |
| 465 | self.is_new_modify = 1 | 465 | self.is_new_modify = 1 |
| 466 | is_big_img = (width < WH_COUPLE_7[0] and height < WH_COUPLE_7[1]) # 防止图片过大 | 466 | is_big_img = (width < WH_COUPLE_7[0] and height < WH_COUPLE_7[1]) # 防止图片过大 |
| 467 | page = pdf.loadPage(pno) | 467 | page = pdf.loadPage(pno) |
| 468 | self.page_to_png(page, is_big_img=True) | 468 | self.page_to_png(page, is_big_img=is_big_img) |
| 469 | elif xref not in self.xref_set: | 469 | elif xref not in self.xref_set: |
| 470 | self.extract_single_image(pdf, xref, smask, colorspace, pno) | 470 | self.extract_single_image(pdf, xref, smask, colorspace, pno) |
| 471 | # 3.页面图片对象数目大于1时,特殊处理 | 471 | # 3.页面图片对象数目大于1时,特殊处理 |
| ... | @@ -480,7 +480,7 @@ class PDFHandler: | ... | @@ -480,7 +480,7 @@ class PDFHandler: |
| 480 | with fitz.Document(self.path) as pdf: | 480 | with fitz.Document(self.path) as pdf: |
| 481 | for pno in range(pdf.pageCount): | 481 | for pno in range(pdf.pageCount): |
| 482 | page = pdf.loadPage(pno) | 482 | page = pdf.loadPage(pno) |
| 483 | self.page_to_png(page, is_big_img=True) | 483 | self.page_to_png(page) |
| 484 | self.img_count = len(self.img_path_list) | 484 | self.img_count = len(self.img_path_list) |
| 485 | 485 | ||
| 486 | def ebank_draw(self): | 486 | def ebank_draw(self): | ... | ... |
| ... | @@ -15,3 +15,5 @@ DEALER_CODE = ocr_situ_group | ... | @@ -15,3 +15,5 @@ DEALER_CODE = ocr_situ_group |
| 15 | BASE_URL = https://staging-bmw-ocr.situdata.com | 15 | BASE_URL = https://staging-bmw-ocr.situdata.com |
| 16 | 16 | ||
| 17 | DELAY_SECONDS = 60 | 17 | DELAY_SECONDS = 60 |
| 18 | |||
| 19 | BD_PRICE = 950 | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
| ... | @@ -15,3 +15,5 @@ DEALER_CODE = ocr_situ_group | ... | @@ -15,3 +15,5 @@ DEALER_CODE = ocr_situ_group |
| 15 | BASE_URL = https://sfocr-uat.bmwgroup.net | 15 | BASE_URL = https://sfocr-uat.bmwgroup.net |
| 16 | 16 | ||
| 17 | DELAY_SECONDS = 60 | 17 | DELAY_SECONDS = 60 |
| 18 | |||
| 19 | BD_PRICE = 950 | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
-
Please register or sign in to post a comment