eb259387 by 冯轩

Merge branch 'feature/CHINARPA-4962' into feature/uat-tmp

2 parents 07007f09 2be87904
......@@ -1018,9 +1018,9 @@ class Command(BaseCommand, LoggerMixin):
# 添加处理,
# [售后回租合同] - 如果 key 是 "承租人签字", 且内容中包含 签署日期:XXXX, 则将签署日期去除
# [车辆租赁抵押合同] - 如果 key 是 ""
if key == '承租人签字' and '签署日期' in tmp_res:
if key == '承租人签字' and tmp_res is not None and '签署日期' in tmp_res:
res[key] = tmp_res.split('签署日期')[0]
if key == "抵押人签字" and "签署日期" in tmp_res:
if key == "抵押人签字" and tmp_res is not None and "签署日期" in tmp_res:
res[key] = tmp_res.split("签署日期")[0]
res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(img_pno), {}).get(
consts.IMG_PATH_KEY, '')
......@@ -1624,7 +1624,7 @@ class Command(BaseCommand, LoggerMixin):
try:
channel, img_path, text_list = img_queue.get(block=False)
except Exception as e:
# self.online_log.info('{0} [img_2_ocr_1] [queue empty]'.format(self.log_base))
self.online_log.info('{0} [img_2_ocr_1] [queue empty]'.format(self.log_base))
time.sleep(self.sleep_time_img_get)
continue
else:
......@@ -1653,6 +1653,7 @@ class Command(BaseCommand, LoggerMixin):
'[error={4}]'.format(self.log_base, times, url, img_path,
traceback.format_exc()))
else:
self.online_log.info('{0} [ocr_1 start] [img={1}] [url={2}]'.format(self.log_base, img_path, url))
ocr_1_res = ocr_1_response.json()
end_time = time.time()
speed_time = int(end_time - start_time)
......@@ -1699,8 +1700,9 @@ class Command(BaseCommand, LoggerMixin):
self.online_log.info('{0} [res_2_wb] [get task] [queue running] [finish_queue_size={1}]'.format(self.log_base, finish_queue.qsize()))
while len(error_list) == 0 or not img_queue.empty() or not finish_queue.empty():
try:
self.online_log.info('{0} [res_2_wb] [finish_queue.get1] [finish_queue_size={1}] [img_queue_size={2}]'.format(self.log_base, finish_queue.qsize(), img_queue.qsize()))
task_str = finish_queue.get(block=False)
self.online_log.info('{0} [res_2_wb] [finish_queue.get]'.format(self.log_base))
self.online_log.info('{0} [res_2_wb] [finish_queue.get2]'.format(self.log_base))
except Exception as e:
self.online_log.info('{0} [res_2_wb] [queue empty]'.format(self.log_base))
time.sleep(self.sleep_time_task_get)
......@@ -2463,6 +2465,7 @@ class Command(BaseCommand, LoggerMixin):
except Exception as e:
self.online_log.error('{0} [process error (pdf & img remove)] [task={1}] [error={2}]'.format(
self.log_base, task_str, traceback.format_exc()))
self.online_log.info('{0} [res_2_wb after while] [len(error_list)={1}] [img_queue={2}] [finish_queue={3}]'.format(self.log_base, len(error_list), img_queue.empty(), finish_queue.empty()))
def handle(self, *args, **kwargs):
db.close_old_connections()
......
......@@ -2458,7 +2458,7 @@ def se_bs_compare(license_en, ocr_res_dict, strip_list, is_auto, aa_type):
return result_field_list, field_img_path_dict
def se_compare_license(license_en, ocr_res_dict, field_list):
def se_compare_license(license_en, ocr_res_dict, field_list, is_auto):
ocr_field, compare_logic, special_expiry_date = consts.SE_COMPARE_FIELD[license_en]
is_find = False
......@@ -2513,6 +2513,14 @@ def se_compare_license(license_en, ocr_res_dict, field_list):
ocr_res_list[res_idx].get(consts.LOWER_AMOUNT_FIELD, ''),
ocr_res_list[res_idx].get(consts.UPPER_AMOUNT_FIELD, ''),
)
# auto 保单 保险费合计 ocr结果需要加上一个基数,再与cms结果做比对
elif is_auto and ocr_field == consts.BD_FIELD and name == consts.SE_BD_FIELD[10]:
ocr_str = ocr_res_list[res_idx].get(compare_logic[name][0])
compare_log.info('{0} [bd_4962_price] [ori ocr_str:{1}] '.format(log_base, ocr_str))
add_price = conf.BD_PRICE
compare_log.info('{0} [bd_4962_price] [add_price:{1}] '.format(log_base, add_price))
ocr_str = float(ocr_str) + float(add_price)
compare_log.info('{0} [bd_4962_price] [final ocr_str:{1}] '.format(log_base, ocr_str))
else:
ocr_str = ocr_res_list[res_idx].get(compare_logic[name][0])
......@@ -3287,7 +3295,7 @@ def se_compare_process(compare_info, ocr_res_dict, is_gsyh, is_auto, id_res_list
license_en, id_res_list, strip_list, is_auto)
else:
result_field_list, no_ocr_result, field_img_path_dict = se_compare_license(
license_en, ocr_res_dict, strip_list)
license_en, ocr_res_dict, strip_list, is_auto)
each_license_failed_count = 0
for name, value, result, ocr_str, img_path, error_type, cn_reason in result_field_list:
if license_en not in consts.SKIP_CARD or not no_ocr_result:
......@@ -3346,7 +3354,7 @@ def se_compare_process(compare_info, ocr_res_dict, is_gsyh, is_auto, id_res_list
elif license_en == consts.FS_EN:
result_field_list, field_img_path_dict = se_fs_compare(license_en, ocr_res_dict, strip_list)
else:
result_field_list, _, field_img_path_dict = se_compare_license(license_en, ocr_res_dict, strip_list)
result_field_list, _, field_img_path_dict = se_compare_license(license_en, ocr_res_dict, strip_list, is_auto)
each_license_failed_count = 0
for name, value, result, ocr_str, img_path, error_type, cn_reason in result_field_list:
......
......@@ -345,7 +345,7 @@ class PDFHandler:
# 3.3 碎图分组大于2、全过滤、含特殊filter,特殊处理:整个页面保存为png图片
if page_to_png:
page = pdf.loadPage(pno)
self.page_to_png(page, is_big_img=True)
self.page_to_png(page)
def title_is_ebank(self, char):
new_char = normalize('NFKC', char)
......@@ -450,7 +450,7 @@ class PDFHandler:
# 1.页面图片对象数目为0时,保存整个页面为png图片
if self.is_e_pdf or self.is_ebank or len(il) == 0:
page = pdf.loadPage(pno)
self.page_to_png(page, is_big_img=True)
self.page_to_png(page)
# 2.页面图片对象数目为1时:
# 小图(如电子账单的盖章):保存整个页面为png图片
# 大图:提取图片对象
......@@ -459,13 +459,13 @@ class PDFHandler:
# 小图
if width < WH_COUPLE_1[0] and height < WH_COUPLE_1[1]:
page = pdf.loadPage(pno)
self.page_to_png(page, is_big_img=True)
self.page_to_png(page)
# 大图
elif width >= WH_COUPLE_6[0] or height >= WH_COUPLE_6[1]:
self.is_new_modify = 1
is_big_img = (width < WH_COUPLE_7[0] and height < WH_COUPLE_7[1]) # 防止图片过大
page = pdf.loadPage(pno)
self.page_to_png(page, is_big_img=True)
self.page_to_png(page, is_big_img=is_big_img)
elif xref not in self.xref_set:
self.extract_single_image(pdf, xref, smask, colorspace, pno)
# 3.页面图片对象数目大于1时,特殊处理
......@@ -480,7 +480,7 @@ class PDFHandler:
with fitz.Document(self.path) as pdf:
for pno in range(pdf.pageCount):
page = pdf.loadPage(pno)
self.page_to_png(page, is_big_img=True)
self.page_to_png(page)
self.img_count = len(self.img_path_list)
def ebank_draw(self):
......
......@@ -16,3 +16,4 @@ BASE_URL = https://sfocr-prod.bmwgroup.net
DELAY_SECONDS = 60
BD_PRICE = 950
\ No newline at end of file
......
......@@ -14,4 +14,6 @@ DEALER_CODE = ocr_situ_group
BASE_URL = https://staging-bmw-ocr.situdata.com
DELAY_SECONDS = 60
\ No newline at end of file
DELAY_SECONDS = 60
BD_PRICE = 950
\ No newline at end of file
......
......@@ -14,4 +14,6 @@ DEALER_CODE = ocr_situ_group
BASE_URL = https://sfocr-uat.bmwgroup.net
DELAY_SECONDS = 60
\ No newline at end of file
DELAY_SECONDS = 60
BD_PRICE = 950
\ No newline at end of file
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!