3d620b3b by 周伟奇

add slice

1 parent 84d79e06
...@@ -250,6 +250,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -250,6 +250,7 @@ class Command(BaseCommand, LoggerMixin):
250 page_num_only = page_num 250 page_num_only = page_num
251 rebuild_page_info = [] 251 rebuild_page_info = []
252 text_key = 'words' 252 text_key = 'words'
253 position_key = 'position'
253 for key, value in contract_dict.get('page_info', {}).items(): 254 for key, value in contract_dict.get('page_info', {}).items():
254 if value is None: 255 if value is None:
255 rebuild_page_info.append((key, )) 256 rebuild_page_info.append((key, ))
...@@ -279,11 +280,17 @@ class Command(BaseCommand, LoggerMixin): ...@@ -279,11 +280,17 @@ class Command(BaseCommand, LoggerMixin):
279 280
280 contract_result.setdefault(classify, dict()).setdefault(page_num_only, []).append(rebuild_page_info) 281 contract_result.setdefault(classify, dict()).setdefault(page_num_only, []).append(rebuild_page_info)
281 282
282 page_compare_dict = {} 283 page_compare_dict = {
284 consts.IMG_PATH_KEY: img_path,
285 consts.ALL_POSITION_KEY: {},
286 }
283 for key, value in contract_dict.get('page_info', {}).items(): 287 for key, value in contract_dict.get('page_info', {}).items():
284 if not isinstance(value, dict): 288 if not isinstance(value, dict):
285 continue 289 continue
286 elif text_key in value: 290 elif text_key in value:
291 position_list = value.get(position_key, [])
292 page_compare_dict[consts.ALL_POSITION_KEY][key] = position_list if isinstance(position_list, list) else []
293
287 if value[text_key] is None: 294 if value[text_key] is None:
288 page_compare_dict[key] = '' 295 page_compare_dict[key] = ''
289 elif isinstance(value[text_key], str): 296 elif isinstance(value[text_key], str):
...@@ -292,16 +299,47 @@ class Command(BaseCommand, LoggerMixin): ...@@ -292,16 +299,47 @@ class Command(BaseCommand, LoggerMixin):
292 page_compare_dict[key] = value[text_key] 299 page_compare_dict[key] = value[text_key]
293 else: 300 else:
294 page_compare_dict[key] = {} 301 page_compare_dict[key] = {}
302 page_compare_dict[consts.ALL_POSITION_KEY][key] = {}
295 for sub_key, sub_value in value.items(): 303 for sub_key, sub_value in value.items():
304 position_list = sub_value.get(position_key, [])
305 page_compare_dict[consts.ALL_POSITION_KEY][key][sub_key] = position_list if isinstance(
306 position_list, list) else []
307
296 if sub_value[text_key] is None: 308 if sub_value[text_key] is None:
297 page_compare_dict[key][sub_key] = '' 309 page_compare_dict[key][sub_key] = ''
298 elif isinstance(sub_value[text_key], str): 310 elif isinstance(sub_value[text_key], str):
299 page_compare_dict[key][sub_key] = sub_value[text_key] 311 page_compare_dict[key][sub_key] = sub_value[text_key]
300 312
301 page_compare_dict[consts.IMG_PATH_KEY] = img_path
302 contract_result_compare.setdefault(classify, dict())[consts.ASP_KEY] = contract_dict.get(consts.ASP_KEY, False) 313 contract_result_compare.setdefault(classify, dict())[consts.ASP_KEY] = contract_dict.get(consts.ASP_KEY, False)
314 # "position" = [xmin, ymin, xmax, ymax]
303 contract_result_compare.setdefault(classify, dict())[page_num_only] = page_compare_dict 315 contract_result_compare.setdefault(classify, dict())[page_num_only] = page_compare_dict
304 316
317 @staticmethod
318 def rebuild_position(src_position):
319 # 'position': {'left': 470, 'top': 671, 'right': 542, 'bottom': 694}
320 # 'width'='right-left', 'height'='bottom-top'
321 # 'position': {'left': 470, 'top': 671, 'width': 542, 'height': 694}
322 try:
323 left = src_position.get('left', 0)
324 top = src_position.get('top', 0)
325 right = src_position.get('right', 0)
326 bottom = src_position.get('bottom', 0)
327 width = right - left
328 height = bottom - top
329 return {
330 'left': left,
331 'top': top,
332 'width': width,
333 'height': height,
334 }
335 except Exception as e:
336 return {
337 'left': 0,
338 'top': 0,
339 'width': 0,
340 'height': 0,
341 }
342
305 def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path, do_dda, 343 def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path, do_dda,
306 dda_id_bc_mapping): 344 dda_id_bc_mapping):
307 # 类别:'0'身份证, '1'居住证 345 # 类别:'0'身份证, '1'居住证
...@@ -329,6 +367,7 @@ class Command(BaseCommand, LoggerMixin): ...@@ -329,6 +367,7 @@ class Command(BaseCommand, LoggerMixin):
329 # 保单 367 # 保单
330 if classify == consts.INSURANCE_CLASSIFY: 368 if classify == consts.INSURANCE_CLASSIFY:
331 product_result = ['', '', ''] 369 product_result = ['', '', '']
370 product_result_position = [dict(), dict(), dict()]
332 min_char_count_1 = 1000 371 min_char_count_1 = 1000
333 min_char_count_2 = 1000 372 min_char_count_2 = 1000
334 for product in license_data.get('result', {}).get('productList', []): 373 for product in license_data.get('result', {}).get('productList', []):
...@@ -338,10 +377,16 @@ class Command(BaseCommand, LoggerMixin): ...@@ -338,10 +377,16 @@ class Command(BaseCommand, LoggerMixin):
338 min_char_count_1 = len(name) 377 min_char_count_1 = len(name)
339 product_result[0] = product.get('coverage', {}).get('words', '') 378 product_result[0] = product.get('coverage', {}).get('words', '')
340 product_result[2] = product.get('deductible_franchise', {}).get('words', '') 379 product_result[2] = product.get('deductible_franchise', {}).get('words', '')
380 product_result_position[0] = self.rebuild_position(product.get('coverage', {}).get(
381 'position', {}))
382 product_result_position[2] = self.rebuild_position(product.get('deductible_franchise', {}).get(
383 'position', {}))
341 elif name.find('第三者责任') != -1: 384 elif name.find('第三者责任') != -1:
342 if len(name) < min_char_count_2: 385 if len(name) < min_char_count_2:
343 min_char_count_2 = len(name) 386 min_char_count_2 = len(name)
344 product_result[1] = product.get('coverage', {}).get('words', '') 387 product_result[1] = product.get('coverage', {}).get('words', '')
388 product_result_position[1] = self.rebuild_position(product.get('coverage', {}).get(
389 'position', {}))
345 390
346 special_str = license_data.get('result', {}).get('1stBeneficiary', {}).get('words', '') 391 special_str = license_data.get('result', {}).get('1stBeneficiary', {}).get('words', '')
347 special = '无' 392 special = '无'
...@@ -362,11 +407,29 @@ class Command(BaseCommand, LoggerMixin): ...@@ -362,11 +407,29 @@ class Command(BaseCommand, LoggerMixin):
362 consts.IMG_PATH_KEY: img_path, 407 consts.IMG_PATH_KEY: img_path,
363 consts.SECTION_IMG_PATH_KEY: section_img_path, 408 consts.SECTION_IMG_PATH_KEY: section_img_path,
364 } 409 }
365 # 'position': {'left': 470, 'top': 671, 'right': 542, 'bottom': 694} 410
366 # position_dict = { 411 position_dict = {
367 # '被保险人姓名': {consts.FIELD_POSITION_KEY: {}} 412 '被保险人姓名': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
368 # } 413 'insured', {}).get('name', {}).get('position', {}))},
369 # insurance_ocr_result[consts.ALL_POSITION_KEY] = position_dict 414 '被保险人证件号码': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
415 'insured', {}).get('certiCode', {}).get('position', {}))},
416 '车架号': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
417 'vehicle', {}).get('VIN', {}).get('position', {}))},
418 '机动车损失保险金额': {consts.FIELD_POSITION_KEY: product_result_position[0]},
419 '机动车第三者责任保险金额': {consts.FIELD_POSITION_KEY: product_result_position[1]},
420 '机动车损失保险绝对免赔率/绝对免赔额': {consts.FIELD_POSITION_KEY: product_result_position[2]},
421 '保险费合计': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
422 'premiumSum', {}).get('position', {}))},
423 '保险起始日期': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
424 'startDate', {}).get('position', {}))},
425 '保险截止日期': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
426 'endDate', {}).get('position', {}))},
427 '保单章': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
428 'seal', {}).get('position', {}))},
429 '特别约定第一受益人': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get(
430 '1stBeneficiary', {}).get('position', {}))},
431 }
432 insurance_ocr_result[consts.ALL_POSITION_KEY] = position_dict
370 license_summary.setdefault(classify, []).append(insurance_ocr_result) 433 license_summary.setdefault(classify, []).append(insurance_ocr_result)
371 # DDA 434 # DDA
372 elif classify == consts.DDA_CLASSIFY: 435 elif classify == consts.DDA_CLASSIFY:
...@@ -873,11 +936,24 @@ class Command(BaseCommand, LoggerMixin): ...@@ -873,11 +936,24 @@ class Command(BaseCommand, LoggerMixin):
873 res[key] = page_info_dict.get(str(pno), {}).get(key1, '') 936 res[key] = page_info_dict.get(str(pno), {}).get(key1, '')
874 res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get( 937 res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get(
875 consts.IMG_PATH_KEY, '') 938 consts.IMG_PATH_KEY, '')
939 res.setdefault(consts.ALL_POSITION_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get(
940 consts.ALL_POSITION_KEY, {}).get(key1, [])
876 else: 941 else:
877 res[key] = page_info_dict.get(str(pno), {}).get(key1, {}).get(key2, '') 942 res[key] = page_info_dict.get(str(pno), {}).get(key1, {}).get(key2, '')
878 res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get( 943 res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get(
879 consts.IMG_PATH_KEY, '') 944 consts.IMG_PATH_KEY, '')
880 945 res.setdefault(consts.ALL_POSITION_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get(
946 consts.ALL_POSITION_KEY, {}).get(key1, {}).get(key2, [])
947
948 # res = {
949 # 'key': 'list or str',
950 # 'uniq_img_path_key': {
951 # 'key': 'str',
952 # },
953 # 'uniq_all_position_key': {
954 # 'key': 'list'
955 # }
956 # }
881 license_summary[classify] = [res] 957 license_summary[classify] = [res]
882 else: 958 else:
883 res = {} 959 res = {}
......
...@@ -2405,11 +2405,13 @@ def se_contract_compare(license_en, ocr_res_dict, strip_list, is_gsyh): ...@@ -2405,11 +2405,13 @@ def se_contract_compare(license_en, ocr_res_dict, strip_list, is_gsyh):
2405 result_field_list = [] 2405 result_field_list = []
2406 field_img_path_dict = dict() 2406 field_img_path_dict = dict()
2407 2407
2408 ocr_res = dict()
2408 if ocr_res_str is not None: 2409 if ocr_res_str is not None:
2409 ocr_res_list = json.loads(ocr_res_str) 2410 ocr_res_list = json.loads(ocr_res_str)
2410 ocr_res = ocr_res_list.pop() 2411 ocr_res = ocr_res_list.pop()
2411 2412
2412 for name, value in strip_list: 2413 for name, value in strip_list:
2414 # 购置税校验
2413 if name == consts.SE_AFC_CON_FIELD[21]: 2415 if name == consts.SE_AFC_CON_FIELD[21]:
2414 if len(value) == 3: 2416 if len(value) == 3:
2415 reason = [] 2417 reason = []
...@@ -2471,6 +2473,29 @@ def se_contract_compare(license_en, ocr_res_dict, strip_list, is_gsyh): ...@@ -2471,6 +2473,29 @@ def se_contract_compare(license_en, ocr_res_dict, strip_list, is_gsyh):
2471 result_field_list.append((name, value, consts.RESULT_N, empty_str, empty_str, ErrorType.NF.value, 2473 result_field_list.append((name, value, consts.RESULT_N, empty_str, empty_str, ErrorType.NF.value,
2472 '{0}未找到'.format(license_en))) 2474 '{0}未找到'.format(license_en)))
2473 2475
2476 if ocr_res_str is not None:
2477 img_map = {}
2478 for name, _, result, _, img_path, _, _ in result_field_list:
2479 if result == consts.RESULT_N:
2480 img_map.setdefault(img_path, []).append(name)
2481 for path, field_list in img_map.items():
2482 if os.path.exists(path):
2483 pre, suf = os.path.splitext(path)
2484 last_img = cv2.imread(path)
2485 for field_idx, field in enumerate(field_list):
2486 try:
2487 save_path = '{0}_{1}{2}'.format(pre, str(field_idx), suf)
2488 section_position_list = ocr_res.get(consts.ALL_POSITION_KEY, {}).get(field, [])
2489 if isinstance(section_position_list, list) and len(section_position_list) == 4:
2490 field_img = last_img[section_position_list[1]: section_position_list[3],
2491 section_position_list[0]: section_position_list[2], :]
2492 cv2.imwrite(save_path, field_img)
2493 field_img_path_dict[field] = save_path
2494 else:
2495 field_img_path_dict[field] = path
2496 except Exception as e:
2497 field_img_path_dict[field] = path
2498
2474 return result_field_list, field_img_path_dict 2499 return result_field_list, field_img_path_dict
2475 2500
2476 2501
......
...@@ -788,6 +788,7 @@ class Finder: ...@@ -788,6 +788,7 @@ class Finder:
788 items = [] 788 items = []
789 start = False 789 start = False
790 page = None 790 page = None
791 greater_equal_v35 = False
791 for pno in self.pdf_info: 792 for pno in self.pdf_info:
792 condition = False 793 condition = False
793 for block in self.pdf_info[f'{pno}']['blocks']: 794 for block in self.pdf_info[f'{pno}']['blocks']:
...@@ -796,6 +797,8 @@ class Finder: ...@@ -796,6 +797,8 @@ class Finder:
796 for line in block['lines']: 797 for line in block['lines']:
797 for span in line['spans']: 798 for span in line['spans']:
798 bbox, text = span['bbox'], span['text'] 799 bbox, text = span['bbox'], span['text']
800 if text == '租赁利率':
801 greater_equal_v35 = True
799 if '总计' in text: 802 if '总计' in text:
800 start = True 803 start = True
801 if '注:出租人向承租人购买租赁车辆的对价' in text: 804 if '注:出租人向承租人购买租赁车辆的对价' in text:
...@@ -804,9 +807,14 @@ class Finder: ...@@ -804,9 +807,14 @@ class Finder:
804 if start == True: 807 if start == True:
805 items.append(text) 808 items.append(text)
806 lines = [['项目', '购买价格', '实际融资金额']] 809 lines = [['项目', '购买价格', '实际融资金额']]
807 for i in range(len(items) // 3): 810 if greater_equal_v35:
808 line = [items[2 + i * 3 + 0], items[2 + i * 3 + 1], items[2 + i * 3 + 2]] 811 for i in range(len(items) // 4):
809 lines.append(line) 812 line = [items[2 + i * 4 + 0], items[2 + i * 4 + 1], items[2 + i * 4 + 2]]
813 lines.append(line)
814 else:
815 for i in range(len(items) // 3):
816 line = [items[2 + i * 3 + 0], items[2 + i * 3 + 1], items[2 + i * 3 + 2]]
817 lines.append(line)
810 if len(items) > 0: 818 if len(items) > 0:
811 lines.append([items[0], '', items[1]]) 819 lines.append([items[0], '', items[1]])
812 820
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!