Merge branch 'feature/CHINARPA-4962' into feature/uat-tmp

冯轩
Showing 6 changed files with 28 additions and 12 deletions
src/apps/doc/management/commands/ocr_process.py
src/celery_compare/tasks.py
src/common/tools/pdf_to_img.py
src/settings/conf/prd.ini
src/settings/conf/sit.ini
src/settings/conf/uat.ini
--- a/src/apps/doc/management/commands/ocr_process.py
View file @eb25938
+++ b/src/apps/doc/management/commands/ocr_process.py
View file @eb25938
@@ -1018,9 +1018,9 @@ class Command(BaseCommand, LoggerMixin):
                        # 添加处理, 
                        # [售后回租合同] - 如果 key 是 "承租人签字", 且内容中包含 签署日期:XXXX, 则将签署日期去除
                        # [车辆租赁抵押合同] - 如果 key 是 ""
-                        if key == '承租人签字' and '签署日期' in tmp_res:
+                        if key == '承租人签字' and tmp_res is not None and '签署日期' in tmp_res:
                            res[key] = tmp_res.split('签署日期')[0]
-                        if key == "抵押人签字" and "签署日期" in tmp_res:
+                        if key == "抵押人签字" and tmp_res is not None and "签署日期" in tmp_res:
                            res[key] = tmp_res.split("签署日期")[0]
                        res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(img_pno), {}).get(
                            consts.IMG_PATH_KEY, '')
@@ -1624,7 +1624,7 @@ class Command(BaseCommand, LoggerMixin):
            try:
                channel, img_path, text_list = img_queue.get(block=False)
            except Exception as e:
-                # self.online_log.info('{0} [img_2_ocr_1] [queue empty]'.format(self.log_base))
+                self.online_log.info('{0} [img_2_ocr_1] [queue empty]'.format(self.log_base))
                time.sleep(self.sleep_time_img_get)
                continue
            else:
@@ -1653,6 +1653,7 @@ class Command(BaseCommand, LoggerMixin):
                                                  '[error={4}]'.format(self.log_base, times, url, img_path,
                                                                       traceback.format_exc()))
                        else:
+                            self.online_log.info('{0} [ocr_1 start] [img={1}] [url={2}]'.format(self.log_base, img_path, url))
                            ocr_1_res = ocr_1_response.json()
                            end_time = time.time()
                            speed_time = int(end_time - start_time)
@@ -1699,8 +1700,9 @@ class Command(BaseCommand, LoggerMixin):
        self.online_log.info('{0} [res_2_wb] [get task] [queue running] [finish_queue_size={1}]'.format(self.log_base, finish_queue.qsize()))
        while len(error_list) == 0 or not img_queue.empty() or not finish_queue.empty():
            try:
+                self.online_log.info('{0} [res_2_wb] [finish_queue.get1] [finish_queue_size={1}] [img_queue_size={2}]'.format(self.log_base, finish_queue.qsize(), img_queue.qsize()))
                task_str = finish_queue.get(block=False)
-                self.online_log.info('{0} [res_2_wb] [finish_queue.get]'.format(self.log_base))
+                self.online_log.info('{0} [res_2_wb] [finish_queue.get2]'.format(self.log_base))
            except Exception as e:
                self.online_log.info('{0} [res_2_wb] [queue empty]'.format(self.log_base))
                time.sleep(self.sleep_time_task_get)
@@ -2463,6 +2465,7 @@ class Command(BaseCommand, LoggerMixin):
                    except Exception as e:
                        self.online_log.error('{0} [process error (pdf & img remove)] [task={1}] [error={2}]'.format(
                            self.log_base, task_str, traceback.format_exc()))
+        self.online_log.info('{0} [res_2_wb after while] [len(error_list)={1}] [img_queue={2}] [finish_queue={3}]'.format(self.log_base, len(error_list), img_queue.empty(), finish_queue.empty()))
    def handle(self, *args, **kwargs):
        db.close_old_connections()
--- a/src/celery_compare/tasks.py
View file @eb25938
+++ b/src/celery_compare/tasks.py
View file @eb25938
@@ -2458,7 +2458,7 @@ def se_bs_compare(license_en, ocr_res_dict, strip_list, is_auto, aa_type):
    return result_field_list, field_img_path_dict
-def se_compare_license(license_en, ocr_res_dict, field_list):
+def se_compare_license(license_en, ocr_res_dict, field_list, is_auto):
    ocr_field, compare_logic, special_expiry_date = consts.SE_COMPARE_FIELD[license_en]
    is_find = False
@@ -2513,6 +2513,14 @@ def se_compare_license(license_en, ocr_res_dict, field_list):
                        ocr_res_list[res_idx].get(consts.LOWER_AMOUNT_FIELD, ''),
                        ocr_res_list[res_idx].get(consts.UPPER_AMOUNT_FIELD, ''),
                    )
+                # auto 保单 保险费合计 ocr结果需要加上一个基数，再与cms结果做比对
+                elif is_auto and ocr_field == consts.BD_FIELD and name == consts.SE_BD_FIELD[10]:
+                    ocr_str = ocr_res_list[res_idx].get(compare_logic[name][0])
+                    compare_log.info('{0} [bd_4962_price] [ori ocr_str:{1}] '.format(log_base, ocr_str))
+                    add_price = conf.BD_PRICE
+                    compare_log.info('{0} [bd_4962_price] [add_price:{1}] '.format(log_base, add_price))
+                    ocr_str = float(ocr_str) + float(add_price)
+                    compare_log.info('{0} [bd_4962_price] [final ocr_str:{1}] '.format(log_base, ocr_str))
                else:
                    ocr_str = ocr_res_list[res_idx].get(compare_logic[name][0])
@@ -3287,7 +3295,7 @@ def se_compare_process(compare_info, ocr_res_dict, is_gsyh, is_auto, id_res_list
                                license_en, id_res_list, strip_list, is_auto)
                        else:
                            result_field_list, no_ocr_result, field_img_path_dict = se_compare_license(
-                                license_en, ocr_res_dict, strip_list)
+                                license_en, ocr_res_dict, strip_list, is_auto)
                        each_license_failed_count = 0
                        for name, value, result, ocr_str, img_path, error_type, cn_reason in result_field_list:
                            if license_en not in consts.SKIP_CARD or not no_ocr_result:
@@ -3346,7 +3354,7 @@ def se_compare_process(compare_info, ocr_res_dict, is_gsyh, is_auto, id_res_list
                elif license_en == consts.FS_EN:
                    result_field_list, field_img_path_dict = se_fs_compare(license_en, ocr_res_dict, strip_list)
                else:
-                    result_field_list, _, field_img_path_dict = se_compare_license(license_en, ocr_res_dict, strip_list)
+                    result_field_list, _, field_img_path_dict = se_compare_license(license_en, ocr_res_dict, strip_list, is_auto)
                each_license_failed_count = 0
                for name, value, result, ocr_str, img_path, error_type, cn_reason in result_field_list:
--- a/src/common/tools/pdf_to_img.py
View file @eb25938
+++ b/src/common/tools/pdf_to_img.py
View file @eb25938
@@ -345,7 +345,7 @@ class PDFHandler:
        # 3.3 碎图分组大于2、全过滤、含特殊filter，特殊处理：整个页面保存为png图片
        if page_to_png:
            page = pdf.loadPage(pno)
-            self.page_to_png(page, is_big_img=True)
+            self.page_to_png(page)
    def title_is_ebank(self, char):
        new_char = normalize('NFKC', char)
@@ -450,7 +450,7 @@ class PDFHandler:
                    # 1.页面图片对象数目为0时，保存整个页面为png图片
                    if self.is_e_pdf or self.is_ebank or len(il) == 0:
                        page = pdf.loadPage(pno)
-                        self.page_to_png(page, is_big_img=True)
+                        self.page_to_png(page)
                    # 2.页面图片对象数目为1时：
                    # 小图(如电子账单的盖章)：保存整个页面为png图片
                    # 大图：提取图片对象
@@ -459,13 +459,13 @@ class PDFHandler:
                        # 小图
                        if width < WH_COUPLE_1[0] and height < WH_COUPLE_1[1]:
                            page = pdf.loadPage(pno)
-                            self.page_to_png(page, is_big_img=True)
+                            self.page_to_png(page)
                        # 大图
                        elif width >= WH_COUPLE_6[0] or height >= WH_COUPLE_6[1]:
                            self.is_new_modify = 1
                            is_big_img = (width < WH_COUPLE_7[0] and height < WH_COUPLE_7[1]) # 防止图片过大
                            page = pdf.loadPage(pno)
-                            self.page_to_png(page, is_big_img=True) 
+                            self.page_to_png(page, is_big_img=is_big_img) 
                        elif xref not in self.xref_set:
                            self.extract_single_image(pdf, xref, smask, colorspace, pno)
                    # 3.页面图片对象数目大于1时，特殊处理
@@ -480,7 +480,7 @@ class PDFHandler:
        with fitz.Document(self.path) as pdf:
            for pno in range(pdf.pageCount):
                page = pdf.loadPage(pno)
-                self.page_to_png(page, is_big_img=True)
+                self.page_to_png(page)
        self.img_count = len(self.img_path_list)
    def ebank_draw(self):
--- a/src/settings/conf/prd.ini
View file @eb25938
+++ b/src/settings/conf/prd.ini
View file @eb25938
@@ -16,3 +16,4 @@ BASE_URL = https://sfocr-prod.bmwgroup.net
 DELAY_SECONDS = 60
+BD_PRICE = 950
\ No newline at end of file
--- a/src/settings/conf/sit.ini
View file @eb25938
+++ b/src/settings/conf/sit.ini
View file @eb25938
@@ -15,3 +15,5 @@ DEALER_CODE = ocr_situ_group
 BASE_URL = https://staging-bmw-ocr.situdata.com
 DELAY_SECONDS = 60
+BD_PRICE = 950
\ No newline at end of file
--- a/src/settings/conf/uat.ini
View file @eb25938
+++ b/src/settings/conf/uat.ini
View file @eb25938
@@ -15,3 +15,5 @@ DEALER_CODE = ocr_situ_group
 BASE_URL = https://sfocr-uat.bmwgroup.net
 DELAY_SECONDS = 60
+BD_PRICE = 950
\ No newline at end of file