add slice
Showing
3 changed files
with
120 additions
and
11 deletions
... | @@ -250,6 +250,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -250,6 +250,7 @@ class Command(BaseCommand, LoggerMixin): |
250 | page_num_only = page_num | 250 | page_num_only = page_num |
251 | rebuild_page_info = [] | 251 | rebuild_page_info = [] |
252 | text_key = 'words' | 252 | text_key = 'words' |
253 | position_key = 'position' | ||
253 | for key, value in contract_dict.get('page_info', {}).items(): | 254 | for key, value in contract_dict.get('page_info', {}).items(): |
254 | if value is None: | 255 | if value is None: |
255 | rebuild_page_info.append((key, )) | 256 | rebuild_page_info.append((key, )) |
... | @@ -279,11 +280,17 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -279,11 +280,17 @@ class Command(BaseCommand, LoggerMixin): |
279 | 280 | ||
280 | contract_result.setdefault(classify, dict()).setdefault(page_num_only, []).append(rebuild_page_info) | 281 | contract_result.setdefault(classify, dict()).setdefault(page_num_only, []).append(rebuild_page_info) |
281 | 282 | ||
282 | page_compare_dict = {} | 283 | page_compare_dict = { |
284 | consts.IMG_PATH_KEY: img_path, | ||
285 | consts.ALL_POSITION_KEY: {}, | ||
286 | } | ||
283 | for key, value in contract_dict.get('page_info', {}).items(): | 287 | for key, value in contract_dict.get('page_info', {}).items(): |
284 | if not isinstance(value, dict): | 288 | if not isinstance(value, dict): |
285 | continue | 289 | continue |
286 | elif text_key in value: | 290 | elif text_key in value: |
291 | position_list = value.get(position_key, []) | ||
292 | page_compare_dict[consts.ALL_POSITION_KEY][key] = position_list if isinstance(position_list, list) else [] | ||
293 | |||
287 | if value[text_key] is None: | 294 | if value[text_key] is None: |
288 | page_compare_dict[key] = '' | 295 | page_compare_dict[key] = '' |
289 | elif isinstance(value[text_key], str): | 296 | elif isinstance(value[text_key], str): |
... | @@ -292,16 +299,47 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -292,16 +299,47 @@ class Command(BaseCommand, LoggerMixin): |
292 | page_compare_dict[key] = value[text_key] | 299 | page_compare_dict[key] = value[text_key] |
293 | else: | 300 | else: |
294 | page_compare_dict[key] = {} | 301 | page_compare_dict[key] = {} |
302 | page_compare_dict[consts.ALL_POSITION_KEY][key] = {} | ||
295 | for sub_key, sub_value in value.items(): | 303 | for sub_key, sub_value in value.items(): |
304 | position_list = sub_value.get(position_key, []) | ||
305 | page_compare_dict[consts.ALL_POSITION_KEY][key][sub_key] = position_list if isinstance( | ||
306 | position_list, list) else [] | ||
307 | |||
296 | if sub_value[text_key] is None: | 308 | if sub_value[text_key] is None: |
297 | page_compare_dict[key][sub_key] = '' | 309 | page_compare_dict[key][sub_key] = '' |
298 | elif isinstance(sub_value[text_key], str): | 310 | elif isinstance(sub_value[text_key], str): |
299 | page_compare_dict[key][sub_key] = sub_value[text_key] | 311 | page_compare_dict[key][sub_key] = sub_value[text_key] |
300 | 312 | ||
301 | page_compare_dict[consts.IMG_PATH_KEY] = img_path | ||
302 | contract_result_compare.setdefault(classify, dict())[consts.ASP_KEY] = contract_dict.get(consts.ASP_KEY, False) | 313 | contract_result_compare.setdefault(classify, dict())[consts.ASP_KEY] = contract_dict.get(consts.ASP_KEY, False) |
314 | # "position" = [xmin, ymin, xmax, ymax] | ||
303 | contract_result_compare.setdefault(classify, dict())[page_num_only] = page_compare_dict | 315 | contract_result_compare.setdefault(classify, dict())[page_num_only] = page_compare_dict |
304 | 316 | ||
317 | @staticmethod | ||
318 | def rebuild_position(src_position): | ||
319 | # 'position': {'left': 470, 'top': 671, 'right': 542, 'bottom': 694} | ||
320 | # 'width'='right-left', 'height'='bottom-top' | ||
321 | # 'position': {'left': 470, 'top': 671, 'width': 542, 'height': 694} | ||
322 | try: | ||
323 | left = src_position.get('left', 0) | ||
324 | top = src_position.get('top', 0) | ||
325 | right = src_position.get('right', 0) | ||
326 | bottom = src_position.get('bottom', 0) | ||
327 | width = right - left | ||
328 | height = bottom - top | ||
329 | return { | ||
330 | 'left': left, | ||
331 | 'top': top, | ||
332 | 'width': width, | ||
333 | 'height': height, | ||
334 | } | ||
335 | except Exception as e: | ||
336 | return { | ||
337 | 'left': 0, | ||
338 | 'top': 0, | ||
339 | 'width': 0, | ||
340 | 'height': 0, | ||
341 | } | ||
342 | |||
305 | def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path, do_dda, | 343 | def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path, do_dda, |
306 | dda_id_bc_mapping): | 344 | dda_id_bc_mapping): |
307 | # 类别:'0'身份证, '1'居住证 | 345 | # 类别:'0'身份证, '1'居住证 |
... | @@ -329,6 +367,7 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -329,6 +367,7 @@ class Command(BaseCommand, LoggerMixin): |
329 | # 保单 | 367 | # 保单 |
330 | if classify == consts.INSURANCE_CLASSIFY: | 368 | if classify == consts.INSURANCE_CLASSIFY: |
331 | product_result = ['', '', ''] | 369 | product_result = ['', '', ''] |
370 | product_result_position = [dict(), dict(), dict()] | ||
332 | min_char_count_1 = 1000 | 371 | min_char_count_1 = 1000 |
333 | min_char_count_2 = 1000 | 372 | min_char_count_2 = 1000 |
334 | for product in license_data.get('result', {}).get('productList', []): | 373 | for product in license_data.get('result', {}).get('productList', []): |
... | @@ -338,10 +377,16 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -338,10 +377,16 @@ class Command(BaseCommand, LoggerMixin): |
338 | min_char_count_1 = len(name) | 377 | min_char_count_1 = len(name) |
339 | product_result[0] = product.get('coverage', {}).get('words', '') | 378 | product_result[0] = product.get('coverage', {}).get('words', '') |
340 | product_result[2] = product.get('deductible_franchise', {}).get('words', '') | 379 | product_result[2] = product.get('deductible_franchise', {}).get('words', '') |
380 | product_result_position[0] = self.rebuild_position(product.get('coverage', {}).get( | ||
381 | 'position', {})) | ||
382 | product_result_position[2] = self.rebuild_position(product.get('deductible_franchise', {}).get( | ||
383 | 'position', {})) | ||
341 | elif name.find('第三者责任') != -1: | 384 | elif name.find('第三者责任') != -1: |
342 | if len(name) < min_char_count_2: | 385 | if len(name) < min_char_count_2: |
343 | min_char_count_2 = len(name) | 386 | min_char_count_2 = len(name) |
344 | product_result[1] = product.get('coverage', {}).get('words', '') | 387 | product_result[1] = product.get('coverage', {}).get('words', '') |
388 | product_result_position[1] = self.rebuild_position(product.get('coverage', {}).get( | ||
389 | 'position', {})) | ||
345 | 390 | ||
346 | special_str = license_data.get('result', {}).get('1stBeneficiary', {}).get('words', '') | 391 | special_str = license_data.get('result', {}).get('1stBeneficiary', {}).get('words', '') |
347 | special = '无' | 392 | special = '无' |
... | @@ -362,11 +407,29 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -362,11 +407,29 @@ class Command(BaseCommand, LoggerMixin): |
362 | consts.IMG_PATH_KEY: img_path, | 407 | consts.IMG_PATH_KEY: img_path, |
363 | consts.SECTION_IMG_PATH_KEY: section_img_path, | 408 | consts.SECTION_IMG_PATH_KEY: section_img_path, |
364 | } | 409 | } |
365 | # 'position': {'left': 470, 'top': 671, 'right': 542, 'bottom': 694} | 410 | |
366 | # position_dict = { | 411 | position_dict = { |
367 | # '被保险人姓名': {consts.FIELD_POSITION_KEY: {}} | 412 | '被保险人姓名': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( |
368 | # } | 413 | 'insured', {}).get('name', {}).get('position', {}))}, |
369 | # insurance_ocr_result[consts.ALL_POSITION_KEY] = position_dict | 414 | '被保险人证件号码': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( |
415 | 'insured', {}).get('certiCode', {}).get('position', {}))}, | ||
416 | '车架号': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( | ||
417 | 'vehicle', {}).get('VIN', {}).get('position', {}))}, | ||
418 | '机动车损失保险金额': {consts.FIELD_POSITION_KEY: product_result_position[0]}, | ||
419 | '机动车第三者责任保险金额': {consts.FIELD_POSITION_KEY: product_result_position[1]}, | ||
420 | '机动车损失保险绝对免赔率/绝对免赔额': {consts.FIELD_POSITION_KEY: product_result_position[2]}, | ||
421 | '保险费合计': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( | ||
422 | 'premiumSum', {}).get('position', {}))}, | ||
423 | '保险起始日期': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( | ||
424 | 'startDate', {}).get('position', {}))}, | ||
425 | '保险截止日期': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( | ||
426 | 'endDate', {}).get('position', {}))}, | ||
427 | '保单章': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( | ||
428 | 'seal', {}).get('position', {}))}, | ||
429 | '特别约定第一受益人': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( | ||
430 | '1stBeneficiary', {}).get('position', {}))}, | ||
431 | } | ||
432 | insurance_ocr_result[consts.ALL_POSITION_KEY] = position_dict | ||
370 | license_summary.setdefault(classify, []).append(insurance_ocr_result) | 433 | license_summary.setdefault(classify, []).append(insurance_ocr_result) |
371 | # DDA | 434 | # DDA |
372 | elif classify == consts.DDA_CLASSIFY: | 435 | elif classify == consts.DDA_CLASSIFY: |
... | @@ -873,11 +936,24 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -873,11 +936,24 @@ class Command(BaseCommand, LoggerMixin): |
873 | res[key] = page_info_dict.get(str(pno), {}).get(key1, '') | 936 | res[key] = page_info_dict.get(str(pno), {}).get(key1, '') |
874 | res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get( | 937 | res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get( |
875 | consts.IMG_PATH_KEY, '') | 938 | consts.IMG_PATH_KEY, '') |
939 | res.setdefault(consts.ALL_POSITION_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get( | ||
940 | consts.ALL_POSITION_KEY, {}).get(key1, []) | ||
876 | else: | 941 | else: |
877 | res[key] = page_info_dict.get(str(pno), {}).get(key1, {}).get(key2, '') | 942 | res[key] = page_info_dict.get(str(pno), {}).get(key1, {}).get(key2, '') |
878 | res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get( | 943 | res.setdefault(consts.IMG_PATH_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get( |
879 | consts.IMG_PATH_KEY, '') | 944 | consts.IMG_PATH_KEY, '') |
880 | 945 | res.setdefault(consts.ALL_POSITION_KEY, dict())[key] = page_info_dict.get(str(pno), {}).get( | |
946 | consts.ALL_POSITION_KEY, {}).get(key1, {}).get(key2, []) | ||
947 | |||
948 | # res = { | ||
949 | # 'key': 'list or str', | ||
950 | # 'uniq_img_path_key': { | ||
951 | # 'key': 'str', | ||
952 | # }, | ||
953 | # 'uniq_all_position_key': { | ||
954 | # 'key': 'list' | ||
955 | # } | ||
956 | # } | ||
881 | license_summary[classify] = [res] | 957 | license_summary[classify] = [res] |
882 | else: | 958 | else: |
883 | res = {} | 959 | res = {} | ... | ... |
... | @@ -2405,11 +2405,13 @@ def se_contract_compare(license_en, ocr_res_dict, strip_list, is_gsyh): | ... | @@ -2405,11 +2405,13 @@ def se_contract_compare(license_en, ocr_res_dict, strip_list, is_gsyh): |
2405 | result_field_list = [] | 2405 | result_field_list = [] |
2406 | field_img_path_dict = dict() | 2406 | field_img_path_dict = dict() |
2407 | 2407 | ||
2408 | ocr_res = dict() | ||
2408 | if ocr_res_str is not None: | 2409 | if ocr_res_str is not None: |
2409 | ocr_res_list = json.loads(ocr_res_str) | 2410 | ocr_res_list = json.loads(ocr_res_str) |
2410 | ocr_res = ocr_res_list.pop() | 2411 | ocr_res = ocr_res_list.pop() |
2411 | 2412 | ||
2412 | for name, value in strip_list: | 2413 | for name, value in strip_list: |
2414 | # 购置税校验 | ||
2413 | if name == consts.SE_AFC_CON_FIELD[21]: | 2415 | if name == consts.SE_AFC_CON_FIELD[21]: |
2414 | if len(value) == 3: | 2416 | if len(value) == 3: |
2415 | reason = [] | 2417 | reason = [] |
... | @@ -2471,6 +2473,29 @@ def se_contract_compare(license_en, ocr_res_dict, strip_list, is_gsyh): | ... | @@ -2471,6 +2473,29 @@ def se_contract_compare(license_en, ocr_res_dict, strip_list, is_gsyh): |
2471 | result_field_list.append((name, value, consts.RESULT_N, empty_str, empty_str, ErrorType.NF.value, | 2473 | result_field_list.append((name, value, consts.RESULT_N, empty_str, empty_str, ErrorType.NF.value, |
2472 | '{0}未找到'.format(license_en))) | 2474 | '{0}未找到'.format(license_en))) |
2473 | 2475 | ||
2476 | if ocr_res_str is not None: | ||
2477 | img_map = {} | ||
2478 | for name, _, result, _, img_path, _, _ in result_field_list: | ||
2479 | if result == consts.RESULT_N: | ||
2480 | img_map.setdefault(img_path, []).append(name) | ||
2481 | for path, field_list in img_map.items(): | ||
2482 | if os.path.exists(path): | ||
2483 | pre, suf = os.path.splitext(path) | ||
2484 | last_img = cv2.imread(path) | ||
2485 | for field_idx, field in enumerate(field_list): | ||
2486 | try: | ||
2487 | save_path = '{0}_{1}{2}'.format(pre, str(field_idx), suf) | ||
2488 | section_position_list = ocr_res.get(consts.ALL_POSITION_KEY, {}).get(field, []) | ||
2489 | if isinstance(section_position_list, list) and len(section_position_list) == 4: | ||
2490 | field_img = last_img[section_position_list[1]: section_position_list[3], | ||
2491 | section_position_list[0]: section_position_list[2], :] | ||
2492 | cv2.imwrite(save_path, field_img) | ||
2493 | field_img_path_dict[field] = save_path | ||
2494 | else: | ||
2495 | field_img_path_dict[field] = path | ||
2496 | except Exception as e: | ||
2497 | field_img_path_dict[field] = path | ||
2498 | |||
2474 | return result_field_list, field_img_path_dict | 2499 | return result_field_list, field_img_path_dict |
2475 | 2500 | ||
2476 | 2501 | ... | ... |
... | @@ -788,6 +788,7 @@ class Finder: | ... | @@ -788,6 +788,7 @@ class Finder: |
788 | items = [] | 788 | items = [] |
789 | start = False | 789 | start = False |
790 | page = None | 790 | page = None |
791 | greater_equal_v35 = False | ||
791 | for pno in self.pdf_info: | 792 | for pno in self.pdf_info: |
792 | condition = False | 793 | condition = False |
793 | for block in self.pdf_info[f'{pno}']['blocks']: | 794 | for block in self.pdf_info[f'{pno}']['blocks']: |
... | @@ -796,6 +797,8 @@ class Finder: | ... | @@ -796,6 +797,8 @@ class Finder: |
796 | for line in block['lines']: | 797 | for line in block['lines']: |
797 | for span in line['spans']: | 798 | for span in line['spans']: |
798 | bbox, text = span['bbox'], span['text'] | 799 | bbox, text = span['bbox'], span['text'] |
800 | if text == '租赁利率': | ||
801 | greater_equal_v35 = True | ||
799 | if '总计' in text: | 802 | if '总计' in text: |
800 | start = True | 803 | start = True |
801 | if '注:出租人向承租人购买租赁车辆的对价' in text: | 804 | if '注:出租人向承租人购买租赁车辆的对价' in text: |
... | @@ -804,9 +807,14 @@ class Finder: | ... | @@ -804,9 +807,14 @@ class Finder: |
804 | if start == True: | 807 | if start == True: |
805 | items.append(text) | 808 | items.append(text) |
806 | lines = [['项目', '购买价格', '实际融资金额']] | 809 | lines = [['项目', '购买价格', '实际融资金额']] |
807 | for i in range(len(items) // 3): | 810 | if greater_equal_v35: |
808 | line = [items[2 + i * 3 + 0], items[2 + i * 3 + 1], items[2 + i * 3 + 2]] | 811 | for i in range(len(items) // 4): |
809 | lines.append(line) | 812 | line = [items[2 + i * 4 + 0], items[2 + i * 4 + 1], items[2 + i * 4 + 2]] |
813 | lines.append(line) | ||
814 | else: | ||
815 | for i in range(len(items) // 3): | ||
816 | line = [items[2 + i * 3 + 0], items[2 + i * 3 + 1], items[2 + i * 3 + 2]] | ||
817 | lines.append(line) | ||
810 | if len(items) > 0: | 818 | if len(items) > 0: |
811 | lines.append([items[0], '', items[1]]) | 819 | lines.append([items[0], '', items[1]]) |
812 | 820 | ... | ... |
-
Please register or sign in to post a comment