add slice
Showing
3 changed files
with
120 additions
and
11 deletions
| ... | @@ -250,6 +250,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -250,6 +250,7 @@ class Command(BaseCommand, LoggerMixin): |
| 250 | page_num_only = page_num | 250 | page_num_only = page_num |
| 251 | rebuild_page_info = [] | 251 | rebuild_page_info = [] |
| 252 | text_key = 'words' | 252 | text_key = 'words' |
| 253 | position_key = 'position' | ||
| 253 | for key, value in contract_dict.get('page_info', {}).items(): | 254 | for key, value in contract_dict.get('page_info', {}).items(): |
| 254 | if value is None: | 255 | if value is None: |
| 255 | rebuild_page_info.append((key, )) | 256 | rebuild_page_info.append((key, )) |
| ... | @@ -279,11 +280,17 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -279,11 +280,17 @@ class Command(BaseCommand, LoggerMixin): |
| 279 | 280 | ||
| 280 | contract_result.setdefault(classify, dict()).setdefault(page_num_only, []).append(rebuild_page_info) | 281 | contract_result.setdefault(classify, dict()).setdefault(page_num_only, []).append(rebuild_page_info) |
| 281 | 282 | ||
| 282 | page_compare_dict = {} | 283 | page_compare_dict = { |
| 284 | consts.IMG_PATH_KEY: img_path, | ||
| 285 | consts.ALL_POSITION_KEY: {}, | ||
| 286 | } | ||
| 283 | for key, value in contract_dict.get('page_info', {}).items(): | 287 | for key, value in contract_dict.get('page_info', {}).items(): |
| 284 | if not isinstance(value, dict): | 288 | if not isinstance(value, dict): |
| 285 | continue | 289 | continue |
| 286 | elif text_key in value: | 290 | elif text_key in value: |
| 291 | position_list = value.get(position_key, []) | ||
| 292 | page_compare_dict[consts.ALL_POSITION_KEY][key] = position_list if isinstance(position_list, list) else [] | ||
| 293 | |||
| 287 | if value[text_key] is None: | 294 | if value[text_key] is None: |
| 288 | page_compare_dict[key] = '' | 295 | page_compare_dict[key] = '' |
| 289 | elif isinstance(value[text_key], str): | 296 | elif isinstance(value[text_key], str): |
| ... | @@ -292,16 +299,47 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -292,16 +299,47 @@ class Command(BaseCommand, LoggerMixin): |
| 292 | page_compare_dict[key] = value[text_key] | 299 | page_compare_dict[key] = value[text_key] |
| 293 | else: | 300 | else: |
| 294 | page_compare_dict[key] = {} | 301 | page_compare_dict[key] = {} |
| 302 | page_compare_dict[consts.ALL_POSITION_KEY][key] = {} | ||
| 295 | for sub_key, sub_value in value.items(): | 303 | for sub_key, sub_value in value.items(): |
| 304 | position_list = sub_value.get(position_key, []) | ||
| 305 | page_compare_dict[consts.ALL_POSITION_KEY][key][sub_key] = position_list if isinstance( | ||
| 306 | position_list, list) else [] | ||
| 307 | |||
| 296 | if sub_value[text_key] is None: | 308 | if sub_value[text_key] is None: |
| 297 | page_compare_dict[key][sub_key] = '' | 309 | page_compare_dict[key][sub_key] = '' |
| 298 | elif isinstance(sub_value[text_key], str): | 310 | elif isinstance(sub_value[text_key], str): |
| 299 | page_compare_dict[key][sub_key] = sub_value[text_key] | 311 | page_compare_dict[key][sub_key] = sub_value[text_key] |
| 300 | 312 | ||
| 301 | page_compare_dict[consts.IMG_PATH_KEY] = img_path | ||
| 302 | contract_result_compare.setdefault(classify, dict())[consts.ASP_KEY] = contract_dict.get(consts.ASP_KEY, False) | 313 | contract_result_compare.setdefault(classify, dict())[consts.ASP_KEY] = contract_dict.get(consts.ASP_KEY, False) |
| 314 | # "position" = [xmin, ymin, xmax, ymax] | ||
| 303 | contract_result_compare.setdefault(classify, dict())[page_num_only] = page_compare_dict | 315 | contract_result_compare.setdefault(classify, dict())[page_num_only] = page_compare_dict |
| 304 | 316 | ||
| 317 | @staticmethod | ||
| 318 | def rebuild_position(src_position): | ||
| 319 | # 'position': {'left': 470, 'top': 671, 'right': 542, 'bottom': 694} | ||
| 320 | # 'width'='right-left', 'height'='bottom-top' | ||
| 321 | # 'position': {'left': 470, 'top': 671, 'width': 542, 'height': 694} | ||
| 322 | try: | ||
| 323 | left = src_position.get('left', 0) | ||
| 324 | top = src_position.get('top', 0) | ||
| 325 | right = src_position.get('right', 0) | ||
| 326 | bottom = src_position.get('bottom', 0) | ||
| 327 | width = right - left | ||
| 328 | height = bottom - top | ||
| 329 | return { | ||
| 330 | 'left': left, | ||
| 331 | 'top': top, | ||
| 332 | 'width': width, | ||
| 333 | 'height': height, | ||
| 334 | } | ||
| 335 | except Exception as e: | ||
| 336 | return { | ||
| 337 | 'left': 0, | ||
| 338 | 'top': 0, | ||
| 339 | 'width': 0, | ||
| 340 | 'height': 0, | ||
| 341 | } | ||
| 342 | |||
| 305 | def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path, do_dda, | 343 | def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path, do_dda, |
| 306 | dda_id_bc_mapping): | 344 | dda_id_bc_mapping): |
| 307 | # 类别:'0'身份证, '1'居住证 | 345 | # 类别:'0'身份证, '1'居住证 |
| ... | @@ -329,6 +367,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -329,6 +367,7 @@ class Command(BaseCommand, LoggerMixin): |
| 329 | # 保单 | 367 | # 保单 |
| 330 | if classify == consts.INSURANCE_CLASSIFY: | 368 | if classify == consts.INSURANCE_CLASSIFY: |
| 331 | product_result = ['', '', ''] | 369 | product_result = ['', '', ''] |
| 370 | product_result_position = [dict(), dict(), dict()] | ||
| 332 | min_char_count_1 = 1000 | 371 | min_char_count_1 = 1000 |
| 333 | min_char_count_2 = 1000 | 372 | min_char_count_2 = 1000 |
| 334 | for product in license_data.get('result', {}).get('productList', []): | 373 | for product in license_data.get('result', {}).get('productList', []): |
| ... | @@ -338,10 +377,16 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -338,10 +377,16 @@ class Command(BaseCommand, LoggerMixin): |
| 338 | min_char_count_1 = len(name) | 377 | min_char_count_1 = len(name) |
| 339 | product_result[0] = product.get('coverage', {}).get('words', '') | 378 | product_result[0] = product.get('coverage', {}).get('words', '') |
| 340 | product_result[2] = product.get('deductible_franchise', {}).get('words', '') | 379 | product_result[2] = product.get('deductible_franchise', {}).get('words', '') |
| 380 | product_result_position[0] = self.rebuild_position(product.get('coverage', {}).get( | ||
| 381 | 'position', {})) | ||
| 382 | product_result_position[2] = self.rebuild_position(product.get('deductible_franchise', {}).get( | ||
| 383 | 'position', {})) | ||
| 341 | elif name.find('第三者责任') != -1: | 384 | elif name.find('第三者责任') != -1: |
| 342 | if len(name) < min_char_count_2: | 385 | if len(name) < min_char_count_2: |
| 343 | min_char_count_2 = len(name) | 386 | min_char_count_2 = len(name) |
| 344 | product_result[1] = product.get('coverage', {}).get('words', '') | 387 | product_result[1] = product.get('coverage', {}).get('words', '') |
| 388 | product_result_position[1] = self.rebuild_position(product.get('coverage', {}).get( | ||
| 389 | 'position', {})) | ||
| 345 | 390 | ||
| 346 | special_str = license_data.get('result', {}).get('1stBeneficiary', {}).get('words', '') | 391 | special_str = license_data.get('result', {}).get('1stBeneficiary', {}).get('words', '') |
| 347 | special = '无' | 392 | special = '无' |
| ... | @@ -362,11 +407,29 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -362,11 +407,29 @@ class Command(BaseCommand, LoggerMixin): |
| 362 | consts.IMG_PATH_KEY: img_path, | 407 | consts.IMG_PATH_KEY: img_path, |
| 363 | consts.SECTION_IMG_PATH_KEY: section_img_path, | 408 | consts.SECTION_IMG_PATH_KEY: section_img_path, |
| 364 | } | 409 | } |
| 365 | # 'position': {'left': 470, 'top': 671, 'right': 542, 'bottom': 694} | 410 | |
| 366 | # position_dict = { | 411 | position_dict = { |
| 367 | # '被保险人姓名': {consts.FIELD_POSITION_KEY: {}} | 412 | '被保险人姓名': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( |
| 368 | # } | 413 | 'insured', {}).get('name', {}).get('position', {}))}, |
| 369 | # insurance_ocr_result[consts.ALL_POSITION_KEY] = position_dict | 414 | '被保险人证件号码': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( |
| 415 | 'insured', {}).get('certiCode', {}).get('position', {}))}, | ||
| 416 | '车架号': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( | ||
| 417 | 'vehicle', {}).get('VIN', {}).get('position', {}))}, | ||
| 418 | '机动车损失保险金额': {consts.FIELD_POSITION_KEY: product_result_position[0]}, | ||
| 419 | '机动车第三者责任保险金额': {consts.FIELD_POSITION_KEY: product_result_position[1]}, | ||
| 420 | '机动车损失保险绝对免赔率/绝对免赔额': {consts.FIELD_POSITION_KEY: product_result_position[2]}, | ||
| 421 | '保险费合计': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( | ||
| 422 | 'premiumSum', {}).get('position', {}))}, | ||
| 423 | '保险起始日期': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( | ||
| 424 | 'startDate', {}).get('position', {}))}, | ||
| 425 | '保险截止日期': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( | ||
| 426 | 'endDate', {}).get('position', {}))}, | ||
| 427 | '保单章': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( | ||
| 428 | 'seal', {}).get('position', {}))}, | ||
| 429 | '特别约定第一受益人': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( | ||
| 430 | '1stBeneficiary', {}).get('position', {}))}, | ||
| 431 | } | ||
| 432 | insurance_ocr_result[consts.ALL_POSITION_KEY] = position_dict | ||
| 370 | license_summary.setdefault(classify, []).append(insurance_ocr_result) | 433 | license_summary.setdefault(classify, []).append(insurance_ocr_result) |
| 371 | # DDA | 434 | # DDA |
| 372 | elif classify == consts.DDA_CLASSIFY: | 435 | elif classify == consts.DDA_CLASSIFY: |
| ... | @@ -873,11 +936,24 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -873,11 +936,24 @@ class Command(BaseCommand, LoggerMixin): |
| 873 | res[key] = page_info_dict.get(str(pno), {}).get(key1, '') | 936 | res[key] = page_info_dict.get(str(pno), {}).get(key1, '') |
| 874 | res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get( | 937 | res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get( |
| 875 | consts.IMG_PATH_KEY, '') | 938 | consts.IMG_PATH_KEY, '') |
| 939 | res.setdefault(consts.ALL_POSITION_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get( | ||
| 940 | consts.ALL_POSITION_KEY, {}).get(key1, []) | ||
| 876 | else: | 941 | else: |
| 877 | res[key] = page_info_dict.get(str(pno), {}).get(key1, {}).get(key2, '') | 942 | res[key] = page_info_dict.get(str(pno), {}).get(key1, {}).get(key2, '') |
| 878 | res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get( | 943 | res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get( |
| 879 | consts.IMG_PATH_KEY, '') | 944 | consts.IMG_PATH_KEY, '') |
| 880 | 945 | res.setdefault(consts.ALL_POSITION_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get( | |
| 946 | consts.ALL_POSITION_KEY, {}).get(key1, {}).get(key2, []) | ||
| 947 | |||
| 948 | # res = { | ||
| 949 | # 'key': 'list or str', | ||
| 950 | # 'uniq_img_path_key': { | ||
| 951 | # 'key': 'str', | ||
| 952 | # }, | ||
| 953 | # 'uniq_all_position_key': { | ||
| 954 | # 'key': 'list' | ||
| 955 | # } | ||
| 956 | # } | ||
| 881 | license_summary[classify] = [res] | 957 | license_summary[classify] = [res] |
| 882 | else: | 958 | else: |
| 883 | res = {} | 959 | res = {} | ... | ... |
| ... | @@ -2405,11 +2405,13 @@ def se_contract_compare(license_en, ocr_res_dict, strip_list, is_gsyh): | ... | @@ -2405,11 +2405,13 @@ def se_contract_compare(license_en, ocr_res_dict, strip_list, is_gsyh): |
| 2405 | result_field_list = [] | 2405 | result_field_list = [] |
| 2406 | field_img_path_dict = dict() | 2406 | field_img_path_dict = dict() |
| 2407 | 2407 | ||
| 2408 | ocr_res = dict() | ||
| 2408 | if ocr_res_str is not None: | 2409 | if ocr_res_str is not None: |
| 2409 | ocr_res_list = json.loads(ocr_res_str) | 2410 | ocr_res_list = json.loads(ocr_res_str) |
| 2410 | ocr_res = ocr_res_list.pop() | 2411 | ocr_res = ocr_res_list.pop() |
| 2411 | 2412 | ||
| 2412 | for name, value in strip_list: | 2413 | for name, value in strip_list: |
| 2414 | # 购置税校验 | ||
| 2413 | if name == consts.SE_AFC_CON_FIELD[21]: | 2415 | if name == consts.SE_AFC_CON_FIELD[21]: |
| 2414 | if len(value) == 3: | 2416 | if len(value) == 3: |
| 2415 | reason = [] | 2417 | reason = [] |
| ... | @@ -2471,6 +2473,29 @@ def se_contract_compare(license_en, ocr_res_dict, strip_list, is_gsyh): | ... | @@ -2471,6 +2473,29 @@ def se_contract_compare(license_en, ocr_res_dict, strip_list, is_gsyh): |
| 2471 | result_field_list.append((name, value, consts.RESULT_N, empty_str, empty_str, ErrorType.NF.value, | 2473 | result_field_list.append((name, value, consts.RESULT_N, empty_str, empty_str, ErrorType.NF.value, |
| 2472 | '{0}未找到'.format(license_en))) | 2474 | '{0}未找到'.format(license_en))) |
| 2473 | 2475 | ||
| 2476 | if ocr_res_str is not None: | ||
| 2477 | img_map = {} | ||
| 2478 | for name, _, result, _, img_path, _, _ in result_field_list: | ||
| 2479 | if result == consts.RESULT_N: | ||
| 2480 | img_map.setdefault(img_path, []).append(name) | ||
| 2481 | for path, field_list in img_map.items(): | ||
| 2482 | if os.path.exists(path): | ||
| 2483 | pre, suf = os.path.splitext(path) | ||
| 2484 | last_img = cv2.imread(path) | ||
| 2485 | for field_idx, field in enumerate(field_list): | ||
| 2486 | try: | ||
| 2487 | save_path = '{0}_{1}{2}'.format(pre, str(field_idx), suf) | ||
| 2488 | section_position_list = ocr_res.get(consts.ALL_POSITION_KEY, {}).get(field, []) | ||
| 2489 | if isinstance(section_position_list, list) and len(section_position_list) == 4: | ||
| 2490 | field_img = last_img[section_position_list[1]: section_position_list[3], | ||
| 2491 | section_position_list[0]: section_position_list[2], :] | ||
| 2492 | cv2.imwrite(save_path, field_img) | ||
| 2493 | field_img_path_dict[field] = save_path | ||
| 2494 | else: | ||
| 2495 | field_img_path_dict[field] = path | ||
| 2496 | except Exception as e: | ||
| 2497 | field_img_path_dict[field] = path | ||
| 2498 | |||
| 2474 | return result_field_list, field_img_path_dict | 2499 | return result_field_list, field_img_path_dict |
| 2475 | 2500 | ||
| 2476 | 2501 | ... | ... |
| ... | @@ -788,6 +788,7 @@ class Finder: | ... | @@ -788,6 +788,7 @@ class Finder: |
| 788 | items = [] | 788 | items = [] |
| 789 | start = False | 789 | start = False |
| 790 | page = None | 790 | page = None |
| 791 | greater_equal_v35 = False | ||
| 791 | for pno in self.pdf_info: | 792 | for pno in self.pdf_info: |
| 792 | condition = False | 793 | condition = False |
| 793 | for block in self.pdf_info[f'{pno}']['blocks']: | 794 | for block in self.pdf_info[f'{pno}']['blocks']: |
| ... | @@ -796,6 +797,8 @@ class Finder: | ... | @@ -796,6 +797,8 @@ class Finder: |
| 796 | for line in block['lines']: | 797 | for line in block['lines']: |
| 797 | for span in line['spans']: | 798 | for span in line['spans']: |
| 798 | bbox, text = span['bbox'], span['text'] | 799 | bbox, text = span['bbox'], span['text'] |
| 800 | if text == '租赁利率': | ||
| 801 | greater_equal_v35 = True | ||
| 799 | if '总计' in text: | 802 | if '总计' in text: |
| 800 | start = True | 803 | start = True |
| 801 | if '注:出租人向承租人购买租赁车辆的对价' in text: | 804 | if '注:出租人向承租人购买租赁车辆的对价' in text: |
| ... | @@ -804,9 +807,14 @@ class Finder: | ... | @@ -804,9 +807,14 @@ class Finder: |
| 804 | if start == True: | 807 | if start == True: |
| 805 | items.append(text) | 808 | items.append(text) |
| 806 | lines = [['项目', '购买价格', '实际融资金额']] | 809 | lines = [['项目', '购买价格', '实际融资金额']] |
| 807 | for i in range(len(items) // 3): | 810 | if greater_equal_v35: |
| 808 | line = [items[2 + i * 3 + 0], items[2 + i * 3 + 1], items[2 + i * 3 + 2]] | 811 | for i in range(len(items) // 4): |
| 809 | lines.append(line) | 812 | line = [items[2 + i * 4 + 0], items[2 + i * 4 + 1], items[2 + i * 4 + 2]] |
| 813 | lines.append(line) | ||
| 814 | else: | ||
| 815 | for i in range(len(items) // 3): | ||
| 816 | line = [items[2 + i * 3 + 0], items[2 + i * 3 + 1], items[2 + i * 3 + 2]] | ||
| 817 | lines.append(line) | ||
| 810 | if len(items) > 0: | 818 | if len(items) > 0: |
| 811 | lines.append([items[0], '', items[1]]) | 819 | lines.append([items[0], '', items[1]]) |
| 812 | 820 | ... | ... |
-
Please register or sign in to post a comment