ocr_process添加try-except处理前半部分
Showing
1 changed file
with
640 additions
and
605 deletions
| ... | @@ -178,171 +178,180 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -178,171 +178,180 @@ class Command(BaseCommand, LoggerMixin): |
| 178 | # self.online_log.info('{0} [edms download success] [pdf_path={1}]'.format(self.log_base, pdf_path)) | 178 | # self.online_log.info('{0} [edms download success] [pdf_path={1}]'.format(self.log_base, pdf_path)) |
| 179 | 179 | ||
| 180 | def bs_process(self, wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx): | 180 | def bs_process(self, wb, ocr_data, bs_summary, unknown_summary, classify, res_list, pno, ino, part_idx): |
| 181 | sheets = ocr_data.get('data', []) | 181 | # 添加 try-except 处理 |
| 182 | if not sheets: | 182 | try: |
| 183 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) | 183 | sheets = ocr_data.get('data', []) |
| 184 | return | 184 | if not sheets: |
| 185 | # confidence = ocr_data.get('confidence', 1) | 185 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) |
| 186 | img_name = 'page_{0}_img_{1}_{2}'.format(pno, ino, part_idx) | 186 | return |
| 187 | cells_exists = False | 187 | # confidence = ocr_data.get('confidence', 1) |
| 188 | for i, sheet in enumerate(sheets): | 188 | img_name = 'page_{0}_img_{1}_{2}'.format(pno, ino, part_idx) |
| 189 | cells = sheet.get('cells') | 189 | cells_exists = False |
| 190 | if not cells: | 190 | for i, sheet in enumerate(sheets): |
| 191 | continue | 191 | cells = sheet.get('cells') |
| 192 | cells_exists = True | 192 | if not cells: |
| 193 | sheet_name = '{0}_{1}'.format(img_name, i) | 193 | continue |
| 194 | ws = wb.create_sheet(sheet_name) | 194 | cells_exists = True |
| 195 | for cell in cells: | 195 | sheet_name = '{0}_{1}'.format(img_name, i) |
| 196 | c1 = cell.get('start_column') | 196 | ws = wb.create_sheet(sheet_name) |
| 197 | r1 = cell.get('start_row') | 197 | for cell in cells: |
| 198 | words = cell.get('words') | 198 | c1 = cell.get('start_column') |
| 199 | ws.cell(row=r1 + 1, column=c1 + 1, value=words) | 199 | r1 = cell.get('start_row') |
| 200 | 200 | words = cell.get('words') | |
| 201 | # 真伪 | 201 | ws.cell(row=r1 + 1, column=c1 + 1, value=words) |
| 202 | verify_info = [] | 202 | |
| 203 | verify_dict = sheet.get('verify', {}) | 203 | # 真伪 |
| 204 | if verify_dict.get('verify_res') == 'fake': | 204 | verify_info = [] |
| 205 | verify_info.extend(verify_dict.get('verify_info', [])) | 205 | verify_dict = sheet.get('verify', {}) |
| 206 | 206 | if verify_dict.get('verify_res') == 'fake': | |
| 207 | # ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'] | 207 | verify_info.extend(verify_dict.get('verify_info', [])) |
| 208 | summary = sheet.get('summary') | 208 | |
| 209 | card = summary[1] | 209 | # ['户名', '卡号', '页码', '回单验证码', '打印时间', '起始时间', '终止时间'] |
| 210 | if card is None: | 210 | summary = sheet.get('summary') |
| 211 | classify_dict = unknown_summary.setdefault(classify, {}) | 211 | card = summary[1] |
| 212 | role = consts.UNKNOWN_ROLE if summary[0] is None else summary[0] | 212 | if card is None: |
| 213 | role_dict = classify_dict.setdefault(role, {}) | 213 | classify_dict = unknown_summary.setdefault(classify, {}) |
| 214 | role_dict['classify'] = classify | 214 | role = consts.UNKNOWN_ROLE if summary[0] is None else summary[0] |
| 215 | role_dict['role'] = role | 215 | role_dict = classify_dict.setdefault(role, {}) |
| 216 | role_dict.setdefault('sheet', []).append(sheet_name) | 216 | role_dict['classify'] = classify |
| 217 | # role_dict.setdefault('confidence', []).append(confidence) | 217 | role_dict['role'] = role |
| 218 | code_list = role_dict.setdefault('code', []) | 218 | role_dict.setdefault('sheet', []).append(sheet_name) |
| 219 | pt_list = role_dict.setdefault('print_time', []) | 219 | # role_dict.setdefault('confidence', []).append(confidence) |
| 220 | sd_list = role_dict.setdefault('start_date', []) | 220 | code_list = role_dict.setdefault('code', []) |
| 221 | ed_list = role_dict.setdefault('end_date', []) | 221 | pt_list = role_dict.setdefault('print_time', []) |
| 222 | verify_list = role_dict.setdefault('verify', []) | 222 | sd_list = role_dict.setdefault('start_date', []) |
| 223 | if summary[3] is not None: | 223 | ed_list = role_dict.setdefault('end_date', []) |
| 224 | code_list.append((summary[2], summary[3])) | 224 | verify_list = role_dict.setdefault('verify', []) |
| 225 | if summary[4] is not None: | 225 | if summary[3] is not None: |
| 226 | pt_list.append(summary[4]) | 226 | code_list.append((summary[2], summary[3])) |
| 227 | if summary[5] is not None: | 227 | if summary[4] is not None: |
| 228 | sd_list.append(summary[5]) | 228 | pt_list.append(summary[4]) |
| 229 | if summary[6] is not None: | 229 | if summary[5] is not None: |
| 230 | ed_list.append(summary[6]) | 230 | sd_list.append(summary[5]) |
| 231 | if len(verify_info) > 0: | 231 | if summary[6] is not None: |
| 232 | verify_list.append( | 232 | ed_list.append(summary[6]) |
| 233 | (pno, ino, '、'.join(verify_info)) | 233 | if len(verify_info) > 0: |
| 234 | ) | 234 | verify_list.append( |
| 235 | (pno, ino, '、'.join(verify_info)) | ||
| 236 | ) | ||
| 237 | else: | ||
| 238 | card_dict = bs_summary.setdefault(card, {}) | ||
| 239 | card_dict['count'] = card_dict.get('count', 0) + 1 | ||
| 240 | card_dict.setdefault('classify', []).append(classify) | ||
| 241 | # card_dict.setdefault('confidence', []).append(confidence) | ||
| 242 | card_dict.setdefault('sheet', []).append(sheet_name) | ||
| 243 | role_list = card_dict.setdefault('role', []) | ||
| 244 | role_set = card_dict.setdefault('role_set', set()) | ||
| 245 | code_list = card_dict.setdefault('code', []) | ||
| 246 | pt_list = card_dict.setdefault('print_time', []) | ||
| 247 | sd_list = card_dict.setdefault('start_date', []) | ||
| 248 | ed_list = card_dict.setdefault('end_date', []) | ||
| 249 | verify_list = card_dict.setdefault('verify', []) | ||
| 250 | if summary[0] is not None: | ||
| 251 | role_list.append(summary[0]) | ||
| 252 | role_set.add(summary[0]) | ||
| 253 | if summary[3] is not None: | ||
| 254 | code_list.append((summary[2], summary[3])) | ||
| 255 | if summary[4] is not None: | ||
| 256 | pt_list.append(summary[4]) | ||
| 257 | if summary[5] is not None: | ||
| 258 | sd_list.append(summary[5]) | ||
| 259 | if summary[6] is not None: | ||
| 260 | ed_list.append(summary[6]) | ||
| 261 | if len(verify_info) > 0: | ||
| 262 | verify_list.append( | ||
| 263 | (pno, ino, '、'.join(verify_info)) | ||
| 264 | ) | ||
| 265 | |||
| 266 | if cells_exists: | ||
| 267 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS)) | ||
| 235 | else: | 268 | else: |
| 236 | card_dict = bs_summary.setdefault(card, {}) | 269 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) |
| 237 | card_dict['count'] = card_dict.get('count', 0) + 1 | 270 | except Exception as e: |
| 238 | card_dict.setdefault('classify', []).append(classify) | 271 | res_list.append((pno, ino, part_idx, consts.RES_FAILED)) |
| 239 | # card_dict.setdefault('confidence', []).append(confidence) | 272 | self.online_log.error('{0} [bs_process error] [error={1}]'.format(self.log_base, traceback.format_exc())) |
| 240 | card_dict.setdefault('sheet', []).append(sheet_name) | ||
| 241 | role_list = card_dict.setdefault('role', []) | ||
| 242 | role_set = card_dict.setdefault('role_set', set()) | ||
| 243 | code_list = card_dict.setdefault('code', []) | ||
| 244 | pt_list = card_dict.setdefault('print_time', []) | ||
| 245 | sd_list = card_dict.setdefault('start_date', []) | ||
| 246 | ed_list = card_dict.setdefault('end_date', []) | ||
| 247 | verify_list = card_dict.setdefault('verify', []) | ||
| 248 | if summary[0] is not None: | ||
| 249 | role_list.append(summary[0]) | ||
| 250 | role_set.add(summary[0]) | ||
| 251 | if summary[3] is not None: | ||
| 252 | code_list.append((summary[2], summary[3])) | ||
| 253 | if summary[4] is not None: | ||
| 254 | pt_list.append(summary[4]) | ||
| 255 | if summary[5] is not None: | ||
| 256 | sd_list.append(summary[5]) | ||
| 257 | if summary[6] is not None: | ||
| 258 | ed_list.append(summary[6]) | ||
| 259 | if len(verify_info) > 0: | ||
| 260 | verify_list.append( | ||
| 261 | (pno, ino, '、'.join(verify_info)) | ||
| 262 | ) | ||
| 263 | |||
| 264 | if cells_exists: | ||
| 265 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS)) | ||
| 266 | else: | ||
| 267 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) | ||
| 268 | 273 | ||
| 269 | def contract_process(self, classify, ocr_data, contract_result, res_list, pno, ino, part_idx, | 274 | def contract_process(self, classify, ocr_data, contract_result, res_list, pno, ino, part_idx, |
| 270 | img_path, contract_result_compare): | 275 | img_path, contract_result_compare): |
| 271 | contract_dict = ocr_data.get('data') | 276 | # 添加 try-except 处理 |
| 272 | if not contract_dict or contract_dict.get('page_num') is None or contract_dict.get('page_info') is None: | 277 | try: |
| 273 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) | 278 | contract_dict = ocr_data.get('data') |
| 274 | return | 279 | if not contract_dict or contract_dict.get('page_num') is None or contract_dict.get('page_info') is None: |
| 275 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS)) | 280 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) |
| 276 | page_num = contract_dict.get('page_num') | 281 | return |
| 277 | if page_num.startswith('page_'): | 282 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS)) |
| 278 | page_num_only = page_num.split('_')[-1] | 283 | page_num = contract_dict.get('page_num') |
| 279 | else: | 284 | if page_num.startswith('page_'): |
| 280 | page_num_only = page_num | 285 | page_num_only = page_num.split('_')[-1] |
| 281 | rebuild_page_info = [] | ||
| 282 | text_key = 'words' | ||
| 283 | position_key = 'position' | ||
| 284 | for key, value in contract_dict.get('page_info', {}).items(): | ||
| 285 | if value is None: | ||
| 286 | rebuild_page_info.append((key, )) | ||
| 287 | elif text_key in value: | ||
| 288 | if value[text_key] is None: | ||
| 289 | rebuild_page_info.append((key,)) | ||
| 290 | elif isinstance(value[text_key], str): | ||
| 291 | rebuild_page_info.append((key, value[text_key])) | ||
| 292 | elif isinstance(value[text_key], list): | ||
| 293 | rebuild_page_info.append((key,)) | ||
| 294 | for row_list in value[text_key]: | ||
| 295 | rebuild_page_info.append(row_list) | ||
| 296 | else: | 286 | else: |
| 297 | rebuild_page_info.append((key,)) | 287 | page_num_only = page_num |
| 298 | for sub_key, sub_value in value.items(): | 288 | rebuild_page_info = [] |
| 299 | if sub_value is None: | 289 | text_key = 'words' |
| 300 | rebuild_page_info.append((sub_key,)) | 290 | position_key = 'position' |
| 301 | elif text_key in sub_value: | 291 | for key, value in contract_dict.get('page_info', {}).items(): |
| 302 | if sub_value[text_key] is None: | 292 | if value is None: |
| 303 | rebuild_page_info.append((sub_key,)) | 293 | rebuild_page_info.append((key, )) |
| 304 | elif isinstance(sub_value[text_key], str): | 294 | elif text_key in value: |
| 305 | rebuild_page_info.append((sub_key, sub_value[text_key])) | 295 | if value[text_key] is None: |
| 306 | elif isinstance(sub_value[text_key], list): | 296 | rebuild_page_info.append((key,)) |
| 297 | elif isinstance(value[text_key], str): | ||
| 298 | rebuild_page_info.append((key, value[text_key])) | ||
| 299 | elif isinstance(value[text_key], list): | ||
| 300 | rebuild_page_info.append((key,)) | ||
| 301 | for row_list in value[text_key]: | ||
| 302 | rebuild_page_info.append(row_list) | ||
| 303 | else: | ||
| 304 | rebuild_page_info.append((key,)) | ||
| 305 | for sub_key, sub_value in value.items(): | ||
| 306 | if sub_value is None: | ||
| 307 | rebuild_page_info.append((sub_key,)) | 307 | rebuild_page_info.append((sub_key,)) |
| 308 | for row_list in sub_value[text_key]: | 308 | elif text_key in sub_value: |
| 309 | rebuild_page_info.append(row_list) | 309 | if sub_value[text_key] is None: |
| 310 | rebuild_page_info.append((sub_key,)) | ||
| 311 | elif isinstance(sub_value[text_key], str): | ||
| 312 | rebuild_page_info.append((sub_key, sub_value[text_key])) | ||
| 313 | elif isinstance(sub_value[text_key], list): | ||
| 314 | rebuild_page_info.append((sub_key,)) | ||
| 315 | for row_list in sub_value[text_key]: | ||
| 316 | rebuild_page_info.append(row_list) | ||
| 317 | |||
| 318 | contract_result.setdefault(classify, dict()).setdefault(page_num_only, []).append(rebuild_page_info) | ||
| 319 | |||
| 320 | page_compare_dict = { | ||
| 321 | consts.IMG_PATH_KEY: img_path, | ||
| 322 | consts.ALL_POSITION_KEY: {}, | ||
| 323 | } | ||
| 324 | for key, value in contract_dict.get('page_info', {}).items(): | ||
| 325 | if not isinstance(value, dict): | ||
| 326 | continue | ||
| 327 | elif text_key in value: | ||
| 328 | position_list = value.get(position_key, []) | ||
| 329 | page_compare_dict[consts.ALL_POSITION_KEY][key] = position_list if isinstance(position_list, list) else [] | ||
| 330 | |||
| 331 | if value[text_key] is None: | ||
| 332 | page_compare_dict[key] = '' | ||
| 333 | elif isinstance(value[text_key], str): | ||
| 334 | page_compare_dict[key] = value[text_key] | ||
| 335 | elif isinstance(value[text_key], list): | ||
| 336 | page_compare_dict[key] = value[text_key] | ||
| 337 | else: | ||
| 338 | page_compare_dict[key] = {} | ||
| 339 | page_compare_dict[consts.ALL_POSITION_KEY][key] = {} | ||
| 340 | for sub_key, sub_value in value.items(): | ||
| 341 | position_list = sub_value.get(position_key, []) | ||
| 342 | page_compare_dict[consts.ALL_POSITION_KEY][key][sub_key] = position_list if isinstance( | ||
| 343 | position_list, list) else [] | ||
| 310 | 344 | ||
| 311 | contract_result.setdefault(classify, dict()).setdefault(page_num_only, []).append(rebuild_page_info) | 345 | if sub_value[text_key] is None: |
| 346 | page_compare_dict[key][sub_key] = '' | ||
| 347 | elif isinstance(sub_value[text_key], str): | ||
| 348 | page_compare_dict[key][sub_key] = sub_value[text_key] | ||
| 312 | 349 | ||
| 313 | page_compare_dict = { | 350 | contract_result_compare.setdefault(classify, dict())[consts.ASP_KEY] = contract_dict.get(consts.ASP_KEY, False) |
| 314 | consts.IMG_PATH_KEY: img_path, | 351 | # "position" = [xmin, ymin, xmax, ymax] |
| 315 | consts.ALL_POSITION_KEY: {}, | 352 | contract_result_compare.setdefault(classify, dict())[page_num_only] = page_compare_dict |
| 316 | } | 353 | except Exception as e: |
| 317 | for key, value in contract_dict.get('page_info', {}).items(): | 354 | self.online_log.error('{0} [contract_process error] [error={1}]'.format(self.log_base, traceback.format_exc())) |
| 318 | if not isinstance(value, dict): | ||
| 319 | continue | ||
| 320 | elif text_key in value: | ||
| 321 | position_list = value.get(position_key, []) | ||
| 322 | page_compare_dict[consts.ALL_POSITION_KEY][key] = position_list if isinstance(position_list, list) else [] | ||
| 323 | |||
| 324 | if value[text_key] is None: | ||
| 325 | page_compare_dict[key] = '' | ||
| 326 | elif isinstance(value[text_key], str): | ||
| 327 | page_compare_dict[key] = value[text_key] | ||
| 328 | elif isinstance(value[text_key], list): | ||
| 329 | page_compare_dict[key] = value[text_key] | ||
| 330 | else: | ||
| 331 | page_compare_dict[key] = {} | ||
| 332 | page_compare_dict[consts.ALL_POSITION_KEY][key] = {} | ||
| 333 | for sub_key, sub_value in value.items(): | ||
| 334 | position_list = sub_value.get(position_key, []) | ||
| 335 | page_compare_dict[consts.ALL_POSITION_KEY][key][sub_key] = position_list if isinstance( | ||
| 336 | position_list, list) else [] | ||
| 337 | |||
| 338 | if sub_value[text_key] is None: | ||
| 339 | page_compare_dict[key][sub_key] = '' | ||
| 340 | elif isinstance(sub_value[text_key], str): | ||
| 341 | page_compare_dict[key][sub_key] = sub_value[text_key] | ||
| 342 | |||
| 343 | contract_result_compare.setdefault(classify, dict())[consts.ASP_KEY] = contract_dict.get(consts.ASP_KEY, False) | ||
| 344 | # "position" = [xmin, ymin, xmax, ymax] | ||
| 345 | contract_result_compare.setdefault(classify, dict())[page_num_only] = page_compare_dict | ||
| 346 | 355 | ||
| 347 | @staticmethod | 356 | @staticmethod |
| 348 | def rebuild_position(src_position): | 357 | def rebuild_position(src_position): |
| ... | @@ -372,499 +381,525 @@ class Command(BaseCommand, LoggerMixin): | ... | @@ -372,499 +381,525 @@ class Command(BaseCommand, LoggerMixin): |
| 372 | 381 | ||
| 373 | def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path, do_dda, | 382 | def license1_process(self, ocr_data, license_summary, classify, res_list, pno, ino, part_idx, img_path, do_dda, |
| 374 | dda_id_bc_mapping): | 383 | dda_id_bc_mapping): |
| 375 | # 类别:'0'身份证, '1'居住证 | 384 | # 添加 try-except 处理 |
| 376 | license_data = ocr_data.get('data') | 385 | try: |
| 377 | if not license_data: | 386 | # 类别:'0'身份证, '1'居住证 |
| 378 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) | 387 | license_data = ocr_data.get('data') |
| 379 | return | 388 | if not license_data: |
| 380 | if isinstance(license_data, dict): | ||
| 381 | pre, suf = os.path.splitext(img_path) | ||
| 382 | base64_img = license_data.pop('base64_img', '') | ||
| 383 | is_save = True if len(base64_img) > 0 else False | ||
| 384 | section_img_path = '{0}_{1}{2}'.format(pre, part_idx, suf) if is_save else img_path | ||
| 385 | if is_save: | ||
| 386 | try: | ||
| 387 | with open(section_img_path, "wb") as fh: | ||
| 388 | fh.write(base64.b64decode(base64_img.encode())) | ||
| 389 | except Exception as e: | ||
| 390 | self.online_log.warn( | ||
| 391 | '{0} [section img save failed] [img_path={1}]' | ||
| 392 | ' [part_idx={2}]'.format(self.log_base, img_path, part_idx)) | ||
| 393 | else: | ||
| 394 | is_save = False | ||
| 395 | section_img_path = img_path | ||
| 396 | |||
| 397 | # 保单 | ||
| 398 | if classify == consts.INSURANCE_CLASSIFY: | ||
| 399 | product_result = ['', '', ''] | ||
| 400 | product_result_position = [dict(), dict(), dict()] | ||
| 401 | min_char_count_1 = 1000 | ||
| 402 | min_char_count_2 = 1000 | ||
| 403 | for product in license_data.get('result', {}).get('productList', []): | ||
| 404 | name = product.get('name', {}).get('words', '') | ||
| 405 | if name.find('机动车损失') != -1 or name.find('汽车损失') != -1 or name.find('车损险') != -1 or \ | ||
| 406 | name.find('车损失险') != -1 or name.find('车损失保险') != -1: | ||
| 407 | if len(name) < min_char_count_1: | ||
| 408 | min_char_count_1 = len(name) | ||
| 409 | product_result[0] = product.get('coverage', {}).get('words', '') | ||
| 410 | product_result[2] = product.get('deductible_franchise', {}).get('words', '') | ||
| 411 | product_result_position[0] = self.rebuild_position(product.get('coverage', {}).get( | ||
| 412 | 'position', {})) | ||
| 413 | product_result_position[2] = self.rebuild_position(product.get('deductible_franchise', {}).get( | ||
| 414 | 'position', {})) | ||
| 415 | elif name.find('第三者责任') != -1: | ||
| 416 | if len(name) < min_char_count_2: | ||
| 417 | min_char_count_2 = len(name) | ||
| 418 | product_result[1] = product.get('coverage', {}).get('words', '') | ||
| 419 | product_result_position[1] = self.rebuild_position(product.get('coverage', {}).get( | ||
| 420 | 'position', {})) | ||
| 421 | |||
| 422 | special_str = license_data.get('result', {}).get('1stBeneficiary', {}).get('words', '') | ||
| 423 | special = '无' | ||
| 424 | if special_str.find('宝马') != -1 or special_str.find('先锋国际融资租赁有限公司') != -1: | ||
| 425 | special = '有' | ||
| 426 | insurance_ocr_result = { | ||
| 427 | '被保险人姓名': license_data.get('result', {}).get('insured', {}).get('name', {}).get('words', ''), | ||
| 428 | '被保险人证件号码': license_data.get('result', {}).get('insured', {}).get('certiCode', {}).get('words', ''), | ||
| 429 | '车架号': license_data.get('result', {}).get('vehicle', {}).get('VIN', {}).get('words', ''), | ||
| 430 | '机动车损失保险金额': product_result[0], | ||
| 431 | '机动车第三者责任保险金额': product_result[1], | ||
| 432 | '机动车损失保险绝对免赔率/绝对免赔额': product_result[2], | ||
| 433 | '保险费合计': license_data.get('result', {}).get('premiumSum', {}).get('words', ''), | ||
| 434 | '保险起始日期': license_data.get('result', {}).get('startDate', {}).get('words', ''), | ||
| 435 | '保险截止日期': license_data.get('result', {}).get('endDate', {}).get('words', ''), | ||
| 436 | '保单章': license_data.get('result', {}).get('seal', {}).get('words', ''), | ||
| 437 | '特别约定第一受益人': special, | ||
| 438 | consts.IMG_PATH_KEY: img_path, | ||
| 439 | consts.SECTION_IMG_PATH_KEY: section_img_path, | ||
| 440 | } | ||
| 441 | |||
| 442 | position_dict = { | ||
| 443 | '被保险人姓名': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( | ||
| 444 | 'insured', {}).get('name', {}).get('position', {}))}, | ||
| 445 | '被保险人证件号码': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( | ||
| 446 | 'insured', {}).get('certiCode', {}).get('position', {}))}, | ||
| 447 | '车架号': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( | ||
| 448 | 'vehicle', {}).get('VIN', {}).get('position', {}))}, | ||
| 449 | '机动车损失保险金额': {consts.FIELD_POSITION_KEY: product_result_position[0]}, | ||
| 450 | '机动车第三者责任保险金额': {consts.FIELD_POSITION_KEY: product_result_position[1]}, | ||
| 451 | '机动车损失保险绝对免赔率/绝对免赔额': {consts.FIELD_POSITION_KEY: product_result_position[2]}, | ||
| 452 | '保险费合计': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( | ||
| 453 | 'premiumSum', {}).get('position', {}))}, | ||
| 454 | '保险起始日期': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( | ||
| 455 | 'startDate', {}).get('position', {}))}, | ||
| 456 | '保险截止日期': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( | ||
| 457 | 'endDate', {}).get('position', {}))}, | ||
| 458 | '保单章': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( | ||
| 459 | 'seal', {}).get('position', {}))}, | ||
| 460 | '特别约定第一受益人': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( | ||
| 461 | '1stBeneficiary', {}).get('position', {}))}, | ||
| 462 | } | ||
| 463 | insurance_ocr_result[consts.ALL_POSITION_KEY] = position_dict | ||
| 464 | license_summary.setdefault(classify, []).append(insurance_ocr_result) | ||
| 465 | # DDA | ||
| 466 | elif classify == consts.DDA_CLASSIFY: | ||
| 467 | pro = ocr_data.get('confidence', 0) | ||
| 468 | if pro < consts.DDA_PRO_MIN: | ||
| 469 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) | 389 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) |
| 470 | return | 390 | return |
| 471 | dda_ocr_result = {} | 391 | if isinstance(license_data, dict): |
| 472 | position_dict = {} | 392 | pre, suf = os.path.splitext(img_path) |
| 473 | for key, value in license_data.get('result', {}).items(): | 393 | base64_img = license_data.pop('base64_img', '') |
| 474 | dda_ocr_result[key] = value.get('words', '') | 394 | is_save = True if len(base64_img) > 0 else False |
| 475 | position_dict[key] = { | 395 | section_img_path = '{0}_{1}{2}'.format(pre, part_idx, suf) if is_save else img_path |
| 476 | consts.FIELD_POSITION_KEY: value.get('position', {}) | 396 | if is_save: |
| 397 | try: | ||
| 398 | with open(section_img_path, "wb") as fh: | ||
| 399 | fh.write(base64.b64decode(base64_img.encode())) | ||
| 400 | except Exception as e: | ||
| 401 | self.online_log.warn( | ||
| 402 | '{0} [section img save failed] [img_path={1}]' | ||
| 403 | ' [part_idx={2}]'.format(self.log_base, img_path, part_idx)) | ||
| 404 | else: | ||
| 405 | is_save = False | ||
| 406 | section_img_path = img_path | ||
| 407 | |||
| 408 | # 保单 | ||
| 409 | if classify == consts.INSURANCE_CLASSIFY: | ||
| 410 | product_result = ['', '', ''] | ||
| 411 | product_result_position = [dict(), dict(), dict()] | ||
| 412 | min_char_count_1 = 1000 | ||
| 413 | min_char_count_2 = 1000 | ||
| 414 | for product in license_data.get('result', {}).get('productList', []): | ||
| 415 | name = product.get('name', {}).get('words', '') | ||
| 416 | if name.find('机动车损失') != -1 or name.find('汽车损失') != -1 or name.find('车损险') != -1 or \ | ||
| 417 | name.find('车损失险') != -1 or name.find('车损失保险') != -1: | ||
| 418 | if len(name) < min_char_count_1: | ||
| 419 | min_char_count_1 = len(name) | ||
| 420 | product_result[0] = product.get('coverage', {}).get('words', '') | ||
| 421 | product_result[2] = product.get('deductible_franchise', {}).get('words', '') | ||
| 422 | product_result_position[0] = self.rebuild_position(product.get('coverage', {}).get( | ||
| 423 | 'position', {})) | ||
| 424 | product_result_position[2] = self.rebuild_position(product.get('deductible_franchise', {}).get( | ||
| 425 | 'position', {})) | ||
| 426 | elif name.find('第三者责任') != -1: | ||
| 427 | if len(name) < min_char_count_2: | ||
| 428 | min_char_count_2 = len(name) | ||
| 429 | product_result[1] = product.get('coverage', {}).get('words', '') | ||
| 430 | product_result_position[1] = self.rebuild_position(product.get('coverage', {}).get( | ||
| 431 | 'position', {})) | ||
| 432 | |||
| 433 | special_str = license_data.get('result', {}).get('1stBeneficiary', {}).get('words', '') | ||
| 434 | special = '无' | ||
| 435 | if special_str.find('宝马') != -1 or special_str.find('先锋国际融资租赁有限公司') != -1: | ||
| 436 | special = '有' | ||
| 437 | insurance_ocr_result = { | ||
| 438 | '被保险人姓名': license_data.get('result', {}).get('insured', {}).get('name', {}).get('words', ''), | ||
| 439 | '被保险人证件号码': license_data.get('result', {}).get('insured', {}).get('certiCode', {}).get('words', ''), | ||
| 440 | '车架号': license_data.get('result', {}).get('vehicle', {}).get('VIN', {}).get('words', ''), | ||
| 441 | '机动车损失保险金额': product_result[0], | ||
| 442 | '机动车第三者责任保险金额': product_result[1], | ||
| 443 | '机动车损失保险绝对免赔率/绝对免赔额': product_result[2], | ||
| 444 | '保险费合计': license_data.get('result', {}).get('premiumSum', {}).get('words', ''), | ||
| 445 | '保险起始日期': license_data.get('result', {}).get('startDate', {}).get('words', ''), | ||
| 446 | '保险截止日期': license_data.get('result', {}).get('endDate', {}).get('words', ''), | ||
| 447 | '保单章': license_data.get('result', {}).get('seal', {}).get('words', ''), | ||
| 448 | '特别约定第一受益人': special, | ||
| 449 | consts.IMG_PATH_KEY: img_path, | ||
| 450 | consts.SECTION_IMG_PATH_KEY: section_img_path, | ||
| 477 | } | 451 | } |
| 478 | dda_ocr_result[consts.DDA_IMG_PATH] = img_path | 452 | |
| 479 | dda_ocr_result[consts.DDA_PRO] = pro | 453 | position_dict = { |
| 480 | dda_ocr_result[consts.IMG_PATH_KEY] = img_path | 454 | '被保险人姓名': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( |
| 481 | dda_ocr_result[consts.SECTION_IMG_PATH_KEY] = section_img_path | 455 | 'insured', {}).get('name', {}).get('position', {}))}, |
| 482 | dda_ocr_result[consts.ALL_POSITION_KEY] = position_dict | 456 | '被保险人证件号码': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( |
| 483 | license_summary.setdefault(classify, []).append(dda_ocr_result) | 457 | 'insured', {}).get('certiCode', {}).get('position', {}))}, |
| 484 | # 抵押登记豁免函 | 458 | '车架号': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( |
| 485 | elif classify == consts.HMH_CLASSIFY: | 459 | 'vehicle', {}).get('VIN', {}).get('position', {}))}, |
| 486 | hmh_ocr_result = {} | 460 | '机动车损失保险金额': {consts.FIELD_POSITION_KEY: product_result_position[0]}, |
| 487 | position_dict = {} | 461 | '机动车第三者责任保险金额': {consts.FIELD_POSITION_KEY: product_result_position[1]}, |
| 488 | for key, value in license_data.get('words_result', {}).items(): | 462 | '机动车损失保险绝对免赔率/绝对免赔额': {consts.FIELD_POSITION_KEY: product_result_position[2]}, |
| 489 | hmh_ocr_result[key] = value.get('words', '') | 463 | '保险费合计': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( |
| 490 | location_list = value.get('location', [-1, -1, -1, -1]) | 464 | 'premiumSum', {}).get('position', {}))}, |
| 491 | if len(location_list) == 4: | 465 | '保险起始日期': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( |
| 466 | 'startDate', {}).get('position', {}))}, | ||
| 467 | '保险截止日期': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( | ||
| 468 | 'endDate', {}).get('position', {}))}, | ||
| 469 | '保单章': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( | ||
| 470 | 'seal', {}).get('position', {}))}, | ||
| 471 | '特别约定第一受益人': {consts.FIELD_POSITION_KEY: self.rebuild_position(license_data.get('result', {}).get( | ||
| 472 | '1stBeneficiary', {}).get('position', {}))}, | ||
| 473 | } | ||
| 474 | insurance_ocr_result[consts.ALL_POSITION_KEY] = position_dict | ||
| 475 | license_summary.setdefault(classify, []).append(insurance_ocr_result) | ||
| 476 | # DDA | ||
| 477 | elif classify == consts.DDA_CLASSIFY: | ||
| 478 | pro = ocr_data.get('confidence', 0) | ||
| 479 | if pro < consts.DDA_PRO_MIN: | ||
| 480 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS_EMPTY)) | ||
| 481 | return | ||
| 482 | dda_ocr_result = {} | ||
| 483 | position_dict = {} | ||
| 484 | for key, value in license_data.get('result', {}).items(): | ||
| 485 | dda_ocr_result[key] = value.get('words', '') | ||
| 492 | position_dict[key] = { | 486 | position_dict[key] = { |
| 493 | consts.FIELD_POSITION_KEY: { | 487 | consts.FIELD_POSITION_KEY: value.get('position', {}) |
| 494 | 'top': location_list[1], | ||
| 495 | 'left': location_list[0], | ||
| 496 | 'height': location_list[-1] - location_list[1], | ||
| 497 | 'width': location_list[2] - location_list[0] | ||
| 498 | } | ||
| 499 | } | 488 | } |
| 500 | hmh_ocr_result[consts.IMG_PATH_KEY] = img_path | 489 | dda_ocr_result[consts.DDA_IMG_PATH] = img_path |
| 501 | hmh_ocr_result[consts.SECTION_IMG_PATH_KEY] = section_img_path | 490 | dda_ocr_result[consts.DDA_PRO] = pro |
| 502 | hmh_ocr_result[consts.ALL_POSITION_KEY] = position_dict | 491 | dda_ocr_result[consts.IMG_PATH_KEY] = img_path |
| 503 | license_summary.setdefault(classify, []).append(hmh_ocr_result) | 492 | dda_ocr_result[consts.SECTION_IMG_PATH_KEY] = section_img_path |
| 504 | # 二手车交易凭证 | 493 | dda_ocr_result[consts.ALL_POSITION_KEY] = position_dict |
| 505 | elif classify == consts.JYPZ_CLASSIFY: | 494 | license_summary.setdefault(classify, []).append(dda_ocr_result) |
| 506 | jypz_ocr_result = {} | 495 | # 抵押登记豁免函 |
| 507 | position_dict = {} | 496 | elif classify == consts.HMH_CLASSIFY: |
| 508 | for key, value in license_data.get('result', {}).items(): | 497 | hmh_ocr_result = {} |
| 509 | jypz_ocr_result[key] = value.get('words', '') | 498 | position_dict = {} |
| 510 | position_dict[key] = { | 499 | for key, value in license_data.get('words_result', {}).items(): |
| 511 | consts.FIELD_POSITION_KEY: value.get('position', {}) | 500 | hmh_ocr_result[key] = value.get('words', '') |
| 512 | } | 501 | location_list = value.get('location', [-1, -1, -1, -1]) |
| 513 | jypz_ocr_result[consts.IMG_PATH_KEY] = img_path | 502 | if len(location_list) == 4: |
| 514 | jypz_ocr_result[consts.SECTION_IMG_PATH_KEY] = section_img_path | 503 | position_dict[key] = { |
| 515 | jypz_ocr_result[consts.ALL_POSITION_KEY] = position_dict | 504 | consts.FIELD_POSITION_KEY: { |
| 516 | license_summary.setdefault(classify, []).append(jypz_ocr_result) | 505 | 'top': location_list[1], |
| 517 | # 车辆登记证 3/4页结果整合 | 506 | 'left': location_list[0], |
| 518 | elif classify == consts.MVC_CLASSIFY: | 507 | 'height': location_list[-1] - location_list[1], |
| 519 | rebuild_data_dict = {} | 508 | 'width': location_list[2] - location_list[0] |
| 520 | position_dict = {} | 509 | } |
| 521 | mvc_page = license_data.pop('page', 'VehicleRCI') | ||
| 522 | mvc_res = license_data.pop('results', {}) | ||
| 523 | if mvc_page == 'VehicleRegArea': | ||
| 524 | rebuild_data_dict['机动车登记证书编号'] = mvc_res.get('机动车登记证书编号', {}).get('words', '') | ||
| 525 | code_position_list = mvc_res.get('机动车登记证书编号', {}).get('position', [0, 0, 0, 0]) | ||
| 526 | if len(code_position_list) == 4: | ||
| 527 | position_dict['机动车登记证书编号'] = { | ||
| 528 | consts.FIELD_POSITION_KEY: { | ||
| 529 | 'top': code_position_list[1], | ||
| 530 | 'left': code_position_list[0], | ||
| 531 | 'height': code_position_list[-1], | ||
| 532 | 'width': code_position_list[2], | ||
| 533 | } | 510 | } |
| 511 | hmh_ocr_result[consts.IMG_PATH_KEY] = img_path | ||
| 512 | hmh_ocr_result[consts.SECTION_IMG_PATH_KEY] = section_img_path | ||
| 513 | hmh_ocr_result[consts.ALL_POSITION_KEY] = position_dict | ||
| 514 | license_summary.setdefault(classify, []).append(hmh_ocr_result) | ||
| 515 | # 二手车交易凭证 | ||
| 516 | elif classify == consts.JYPZ_CLASSIFY: | ||
| 517 | jypz_ocr_result = {} | ||
| 518 | position_dict = {} | ||
| 519 | for key, value in license_data.get('result', {}).items(): | ||
| 520 | jypz_ocr_result[key] = value.get('words', '') | ||
| 521 | position_dict[key] = { | ||
| 522 | consts.FIELD_POSITION_KEY: value.get('position', {}) | ||
| 534 | } | 523 | } |
| 535 | for register_info in mvc_res.get('登记信息', []): | 524 | jypz_ocr_result[consts.IMG_PATH_KEY] = img_path |
| 536 | register_info.pop('register_type', None) | 525 | jypz_ocr_result[consts.SECTION_IMG_PATH_KEY] = section_img_path |
| 537 | register_info.pop('register_type_name', None) | 526 | jypz_ocr_result[consts.ALL_POSITION_KEY] = position_dict |
| 538 | for cn_key, detail_dict in register_info.items(): | 527 | license_summary.setdefault(classify, []).append(jypz_ocr_result) |
| 539 | rebuild_data_dict.setdefault(cn_key, []).append( | 528 | # 车辆登记证 3/4页结果整合 |
| 540 | detail_dict.get('words', '')) | 529 | elif classify == consts.MVC_CLASSIFY: |
| 541 | tmp_position_list = detail_dict.get('position', [0, 0, 0, 0]) | 530 | rebuild_data_dict = {} |
| 542 | if len(tmp_position_list) == 4: | 531 | position_dict = {} |
| 543 | position_dict.setdefault(cn_key, []).append( | 532 | mvc_page = license_data.pop('page', 'VehicleRCI') |
| 544 | { | 533 | mvc_res = license_data.pop('results', {}) |
| 545 | consts.FIELD_POSITION_KEY: { | 534 | if mvc_page == 'VehicleRegArea': |
| 546 | 'top': tmp_position_list[1], | 535 | rebuild_data_dict['机动车登记证书编号'] = mvc_res.get('机动车登记证书编号', {}).get('words', '') |
| 547 | 'left': tmp_position_list[0], | 536 | code_position_list = mvc_res.get('机动车登记证书编号', {}).get('position', [0, 0, 0, 0]) |
| 548 | 'height': tmp_position_list[-1], | 537 | if len(code_position_list) == 4: |
| 549 | 'width': tmp_position_list[2], | 538 | position_dict['机动车登记证书编号'] = { |
| 539 | consts.FIELD_POSITION_KEY: { | ||
| 540 | 'top': code_position_list[1], | ||
| 541 | 'left': code_position_list[0], | ||
| 542 | 'height': code_position_list[-1], | ||
| 543 | 'width': code_position_list[2], | ||
| 544 | } | ||
| 545 | } | ||
| 546 | for register_info in mvc_res.get('登记信息', []): | ||
| 547 | register_info.pop('register_type', None) | ||
| 548 | register_info.pop('register_type_name', None) | ||
| 549 | for cn_key, detail_dict in register_info.items(): | ||
| 550 | rebuild_data_dict.setdefault(cn_key, []).append( | ||
| 551 | detail_dict.get('words', '')) | ||
| 552 | tmp_position_list = detail_dict.get('position', [0, 0, 0, 0]) | ||
| 553 | if len(tmp_position_list) == 4: | ||
| 554 | position_dict.setdefault(cn_key, []).append( | ||
| 555 | { | ||
| 556 | consts.FIELD_POSITION_KEY: { | ||
| 557 | 'top': tmp_position_list[1], | ||
| 558 | 'left': tmp_position_list[0], | ||
| 559 | 'height': tmp_position_list[-1], | ||
| 560 | 'width': tmp_position_list[2], | ||
| 561 | } | ||
| 550 | } | 562 | } |
| 563 | ) | ||
| 564 | |||
| 565 | rebuild_data_dict[consts.ALL_POSITION_KEY_2] = position_dict | ||
| 566 | rebuild_data_dict[consts.IMG_PATH_KEY_2] = img_path | ||
| 567 | rebuild_data_dict[consts.SECTION_IMG_PATH_KEY_2] = section_img_path | ||
| 568 | else: | ||
| 569 | for cn_key, detail_dict in mvc_res.items(): | ||
| 570 | rebuild_data_dict[cn_key] = detail_dict.get('words', '') | ||
| 571 | position_list = detail_dict.get('position', [0, 0, 0, 0]) | ||
| 572 | if len(position_list) == 4: | ||
| 573 | position_dict[cn_key] = { | ||
| 574 | consts.FIELD_POSITION_KEY: { | ||
| 575 | 'top': position_list[1], | ||
| 576 | 'left': position_list[0], | ||
| 577 | 'height': position_list[-1], | ||
| 578 | 'width': position_list[2], | ||
| 551 | } | 579 | } |
| 552 | ) | 580 | } |
| 553 | 581 | rebuild_data_dict[consts.ALL_POSITION_KEY] = position_dict | |
| 554 | rebuild_data_dict[consts.ALL_POSITION_KEY_2] = position_dict | 582 | rebuild_data_dict[consts.IMG_PATH_KEY] = img_path |
| 555 | rebuild_data_dict[consts.IMG_PATH_KEY_2] = img_path | 583 | rebuild_data_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path |
| 556 | rebuild_data_dict[consts.SECTION_IMG_PATH_KEY_2] = section_img_path | 584 | del mvc_res |
| 557 | else: | 585 | license_summary.setdefault(classify, []).append(rebuild_data_dict) |
| 558 | for cn_key, detail_dict in mvc_res.items(): | 586 | |
| 559 | rebuild_data_dict[cn_key] = detail_dict.get('words', '') | 587 | |
| 560 | position_list = detail_dict.get('position', [0, 0, 0, 0]) | 588 | # for mvc_dict in license_data: |
| 561 | if len(position_list) == 4: | 589 | # mvc_dict[consts.IMG_PATH_KEY] = img_path |
| 562 | position_dict[cn_key] = { | 590 | # try: |
| 591 | # mvc_page = mvc_dict.pop('page') | ||
| 592 | # except Exception as e: | ||
| 593 | # pass | ||
| 594 | # else: | ||
| 595 | # if mvc_page == 'VehicleRegArea': | ||
| 596 | # mvc_res = mvc_dict.pop('results', {}) | ||
| 597 | # mvc_dict['机动车登记证书编号'] = mvc_res.get('register_no', {}).get('words', '') | ||
| 598 | # for register_info in mvc_res.get('register_info', []): | ||
| 599 | # for detail_dict in register_info.get('details', {}).values(): | ||
| 600 | # mvc_dict.setdefault(detail_dict.get('chinese_key', '未知'), []).append( | ||
| 601 | # detail_dict.get('words', '')) | ||
| 602 | # del mvc_res | ||
| 603 | # license_summary.setdefault(classify, []).extend(license_data) | ||
| 604 | |||
| 605 | # 身份证真伪 | ||
| 606 | elif classify == consts.IC_CLASSIFY: | ||
| 607 | id_card_dict = {} | ||
| 608 | position_dict = {} | ||
| 609 | card_type = license_data.get('type', '') | ||
| 610 | is_ic = card_type.startswith('身份证') | ||
| 611 | is_info_side = card_type.endswith('信息面') | ||
| 612 | id_card_dict['类别'] = '0' if is_ic else '1' | ||
| 613 | if is_ic: | ||
| 614 | field_map = consts.IC_MAP_0 if is_info_side else consts.IC_MAP_1 | ||
| 615 | else: | ||
| 616 | field_map = consts.RP_MAP_0 if is_info_side else consts.RP_MAP_1 | ||
| 617 | for write_field, search_field in field_map: | ||
| 618 | id_card_dict[write_field] = license_data.get('words_result', {}).get(search_field, {}).get('words', '') | ||
| 619 | location_list = license_data.get('words_result', {}).get(search_field, {}).get( | ||
| 620 | 'location', [-1, -1, -1, -1]) | ||
| 621 | if len(location_list) == 4: | ||
| 622 | position_dict[write_field] = { | ||
| 563 | consts.FIELD_POSITION_KEY: { | 623 | consts.FIELD_POSITION_KEY: { |
| 564 | 'top': position_list[1], | 624 | 'top': location_list[1], |
| 565 | 'left': position_list[0], | 625 | 'left': location_list[0], |
| 566 | 'height': position_list[-1], | 626 | 'height': location_list[-1] - location_list[1], |
| 567 | 'width': position_list[2], | 627 | 'width': location_list[2] - location_list[0] |
| 568 | } | 628 | } |
| 569 | } | 629 | } |
| 570 | rebuild_data_dict[consts.ALL_POSITION_KEY] = position_dict | 630 | if not is_info_side: |
| 571 | rebuild_data_dict[consts.IMG_PATH_KEY] = img_path | 631 | start_time = license_data.get('words_result', {}).get('签发日期', {}).get('words', '') |
| 572 | rebuild_data_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path | 632 | end_time = license_data.get('words_result', {}).get('失效日期', {}).get('words', '') |
| 573 | del mvc_res | 633 | id_card_dict['有效期限'] = '{0}-{1}'.format(start_time, end_time) |
| 574 | license_summary.setdefault(classify, []).append(rebuild_data_dict) | 634 | end_time_location_list = license_data.get('words_result', {}).get('失效日期', {}).get( |
| 575 | 635 | 'location', [-1, -1, -1, -1]) | |
| 576 | 636 | if len(end_time_location_list) == 4: | |
| 577 | # for mvc_dict in license_data: | 637 | position_dict['有效期限'] = { |
| 578 | # mvc_dict[consts.IMG_PATH_KEY] = img_path | 638 | consts.FIELD_POSITION_KEY: { |
| 579 | # try: | 639 | 'top': end_time_location_list[1], |
| 580 | # mvc_page = mvc_dict.pop('page') | 640 | 'left': end_time_location_list[0], |
| 581 | # except Exception as e: | 641 | 'height': end_time_location_list[-1] - end_time_location_list[1], |
| 582 | # pass | 642 | 'width': end_time_location_list[2] - end_time_location_list[0] |
| 583 | # else: | 643 | } |
| 584 | # if mvc_page == 'VehicleRegArea': | ||
| 585 | # mvc_res = mvc_dict.pop('results', {}) | ||
| 586 | # mvc_dict['机动车登记证书编号'] = mvc_res.get('register_no', {}).get('words', '') | ||
| 587 | # for register_info in mvc_res.get('register_info', []): | ||
| 588 | # for detail_dict in register_info.get('details', {}).values(): | ||
| 589 | # mvc_dict.setdefault(detail_dict.get('chinese_key', '未知'), []).append( | ||
| 590 | # detail_dict.get('words', '')) | ||
| 591 | # del mvc_res | ||
| 592 | # license_summary.setdefault(classify, []).extend(license_data) | ||
| 593 | |||
| 594 | # 身份证真伪 | ||
| 595 | elif classify == consts.IC_CLASSIFY: | ||
| 596 | id_card_dict = {} | ||
| 597 | position_dict = {} | ||
| 598 | card_type = license_data.get('type', '') | ||
| 599 | is_ic = card_type.startswith('身份证') | ||
| 600 | is_info_side = card_type.endswith('信息面') | ||
| 601 | id_card_dict['类别'] = '0' if is_ic else '1' | ||
| 602 | if is_ic: | ||
| 603 | field_map = consts.IC_MAP_0 if is_info_side else consts.IC_MAP_1 | ||
| 604 | else: | ||
| 605 | field_map = consts.RP_MAP_0 if is_info_side else consts.RP_MAP_1 | ||
| 606 | for write_field, search_field in field_map: | ||
| 607 | id_card_dict[write_field] = license_data.get('words_result', {}).get(search_field, {}).get('words', '') | ||
| 608 | location_list = license_data.get('words_result', {}).get(search_field, {}).get( | ||
| 609 | 'location', [-1, -1, -1, -1]) | ||
| 610 | if len(location_list) == 4: | ||
| 611 | position_dict[write_field] = { | ||
| 612 | consts.FIELD_POSITION_KEY: { | ||
| 613 | 'top': location_list[1], | ||
| 614 | 'left': location_list[0], | ||
| 615 | 'height': location_list[-1] - location_list[1], | ||
| 616 | 'width': location_list[2] - location_list[0] | ||
| 617 | } | ||
| 618 | } | ||
| 619 | if not is_info_side: | ||
| 620 | start_time = license_data.get('words_result', {}).get('签发日期', {}).get('words', '') | ||
| 621 | end_time = license_data.get('words_result', {}).get('失效日期', {}).get('words', '') | ||
| 622 | id_card_dict['有效期限'] = '{0}-{1}'.format(start_time, end_time) | ||
| 623 | end_time_location_list = license_data.get('words_result', {}).get('失效日期', {}).get( | ||
| 624 | 'location', [-1, -1, -1, -1]) | ||
| 625 | if len(end_time_location_list) == 4: | ||
| 626 | position_dict['有效期限'] = { | ||
| 627 | consts.FIELD_POSITION_KEY: { | ||
| 628 | 'top': end_time_location_list[1], | ||
| 629 | 'left': end_time_location_list[0], | ||
| 630 | 'height': end_time_location_list[-1] - end_time_location_list[1], | ||
| 631 | 'width': end_time_location_list[2] - end_time_location_list[0] | ||
| 632 | } | 644 | } |
| 633 | } | ||
| 634 | 645 | ||
| 635 | 646 | ||
| 636 | if not is_info_side: | 647 | if not is_info_side: |
| 637 | id_card_dict[consts.IMG_PATH_KEY_2] = img_path | 648 | id_card_dict[consts.IMG_PATH_KEY_2] = img_path |
| 638 | id_card_dict[consts.ALL_POSITION_KEY_2] = position_dict | 649 | id_card_dict[consts.ALL_POSITION_KEY_2] = position_dict |
| 639 | id_card_dict[consts.SECTION_IMG_PATH_KEY_2] = section_img_path | 650 | id_card_dict[consts.SECTION_IMG_PATH_KEY_2] = section_img_path |
| 640 | 651 | ||
| 641 | else: | ||
| 642 | id_card_dict[consts.ALL_POSITION_KEY] = position_dict | ||
| 643 | id_card_dict[consts.IMG_PATH_KEY] = img_path | ||
| 644 | id_card_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path | ||
| 645 | |||
| 646 | if is_ic and is_save: | ||
| 647 | card_type = -1 | ||
| 648 | json_data_4 = { | ||
| 649 | 'mode': 1, | ||
| 650 | 'user_info': { | ||
| 651 | 'image_content': base64_img, | ||
| 652 | }, | ||
| 653 | 'options': { | ||
| 654 | 'distinguish_type': 1, | ||
| 655 | 'auto_rotate': True, | ||
| 656 | }, | ||
| 657 | } | ||
| 658 | for times in range(consts.RETRY_TIMES): | ||
| 659 | try: | ||
| 660 | start_time = time.time() | ||
| 661 | ocr_4_response = requests.post(self.ocr_url_4, json=json_data_4) | ||
| 662 | if ocr_4_response.status_code != 200: | ||
| 663 | raise OCR4Exception('ocr_4 status code: {0}'.format(ocr_4_response.status_code)) | ||
| 664 | except Exception as e: | ||
| 665 | self.online_log.warn( | ||
| 666 | '{0} [ocr_4 failed] [times={1}] [img_path={2}] [error={3}]'.format( | ||
| 667 | self.log_base, times, img_path, traceback.format_exc())) | ||
| 668 | else: | ||
| 669 | ocr_4_res = ocr_4_response.json() | ||
| 670 | end_time = time.time() | ||
| 671 | speed_time = int(end_time - start_time) | ||
| 672 | |||
| 673 | if ocr_4_res.get('code') == 0 and ocr_4_res.get('result', {}).get('rtn') == 0: | ||
| 674 | card_type = ocr_4_res.get('result', {}).get( | ||
| 675 | 'idcard_distinguish_result', {}).get('result', -1) | ||
| 676 | |||
| 677 | self.online_log.info( | ||
| 678 | '{0} [ocr_4 success] [img_path={1}] [speed_time={2}]'.format( | ||
| 679 | self.log_base, img_path, speed_time)) | ||
| 680 | break | ||
| 681 | else: | 652 | else: |
| 682 | self.online_log.warn( | 653 | id_card_dict[consts.ALL_POSITION_KEY] = position_dict |
| 683 | '{0} [ocr_4 failed] [img_path={1}]'.format(self.log_base, img_path)) | 654 | id_card_dict[consts.IMG_PATH_KEY] = img_path |
| 684 | 655 | id_card_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path | |
| 685 | id_card_dict[consts.IC_TURE_OR_FALSE] = consts.IC_RES_MAPPING.get(card_type) | 656 | |
| 686 | 657 | if is_ic and is_save: | |
| 687 | if do_dda and isinstance(id_card_dict.get(consts.IC_KEY_FIELD[0]), str) and \ | 658 | card_type = -1 |
| 688 | isinstance(id_card_dict.get(consts.IC_KEY_FIELD[1]), str): | 659 | json_data_4 = { |
| 689 | ic_name = id_card_dict.get(consts.IC_KEY_FIELD[0], '').strip() | 660 | 'mode': 1, |
| 690 | ic_id = id_card_dict.get(consts.IC_KEY_FIELD[1], '').strip() | 661 | 'user_info': { |
| 691 | if len(ic_name) > 0 and len(ic_id) > 0: | 662 | 'image_content': base64_img, |
| 692 | dda_id_bc_mapping.setdefault(consts.IC_FIELD, []).append((ic_name, ic_id, img_path)) | 663 | }, |
| 693 | license_summary.setdefault(classify, []).append(id_card_dict) | 664 | 'options': { |
| 694 | # 购车发票 & 二手车发票 | 665 | 'distinguish_type': 1, |
| 695 | elif classify == consts.MVI_CLASSIFY or classify == consts.UCI_CLASSIFY: | 666 | 'auto_rotate': True, |
| 696 | rebuild_data_dict = {} | 667 | }, |
| 697 | position_dict = {} | 668 | } |
| 698 | mvi_res = license_data.pop('result', {}) | 669 | for times in range(consts.RETRY_TIMES): |
| 699 | for en_key, detail_dict in mvi_res.items(): | ||
| 700 | rebuild_data_dict[detail_dict.get('chinese_key', '')] = detail_dict.get('words', '') | ||
| 701 | position_dict[detail_dict.get('chinese_key', '')] = { | ||
| 702 | consts.FIELD_POSITION_KEY: detail_dict.get('position', {}) | ||
| 703 | } | ||
| 704 | rebuild_data_dict['新旧版式'] = license_data.get('layout', '') | ||
| 705 | rebuild_data_dict[consts.IMG_PATH_KEY] = img_path | ||
| 706 | rebuild_data_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path | ||
| 707 | rebuild_data_dict[consts.ALL_POSITION_KEY] = position_dict | ||
| 708 | license_summary.setdefault(classify, []).append(rebuild_data_dict) | ||
| 709 | # 其他 | ||
| 710 | else: | ||
| 711 | for res_dict in license_data: | ||
| 712 | res_dict[consts.IMG_PATH_KEY] = img_path | ||
| 713 | res_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path | ||
| 714 | license_summary.setdefault(classify, []).extend(license_data) | ||
| 715 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS)) | ||
| 716 | |||
| 717 | def license2_process(self, ocr_res_2, license_summary, pid, classify, res_list, pno, ino, part_idx, img_path, do_dda, dda_id_bc_mapping, file_data): | ||
| 718 | if ocr_res_2.get('ErrorCode') in consts.SUCCESS_CODE_SET: | ||
| 719 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS)) | ||
| 720 | if pid == consts.BC_PID: | ||
| 721 | # 银行卡 | ||
| 722 | # res_dict = {} | ||
| 723 | # for en_key, chn_key in consts.BC_FIELD: | ||
| 724 | # res_dict[chn_key] = ocr_res_2.get(en_key, '') | ||
| 725 | ocr_res_2[consts.IMG_PATH_KEY] = img_path | ||
| 726 | license_summary.setdefault(classify, []).append(ocr_res_2) | ||
| 727 | if do_dda and isinstance(ocr_res_2.get(consts.BC_KEY_FIELD), str): | ||
| 728 | bc_no = ocr_res_2[consts.BC_KEY_FIELD].strip() | ||
| 729 | if len(bc_no) > 0: | ||
| 730 | dda_id_bc_mapping.setdefault(consts.BC_FIELD, []).append((bc_no, img_path)) | ||
| 731 | else: | ||
| 732 | # 营业执照等 | ||
| 733 | pre, suf = os.path.splitext(img_path) | ||
| 734 | src_section_img_path = img_path if file_data is None else '{0}_{1}{2}'.format(pre, part_idx, suf) | ||
| 735 | |||
| 736 | is_save = False | ||
| 737 | for res_idx, result_dict in enumerate(ocr_res_2.get('ResultList', [])): | ||
| 738 | image_data = result_dict.get('image_data', '') | ||
| 739 | if len(image_data) > 0: | ||
| 740 | position = {} | ||
| 741 | angle = 0 | ||
| 742 | section_img_path = '{0}_{1}_{2}{3}'.format(pre, part_idx, res_idx, suf) | ||
| 743 | try: | 670 | try: |
| 744 | with open(section_img_path, "wb") as fh: | 671 | start_time = time.time() |
| 745 | fh.write(base64.b64decode(image_data.encode())) | 672 | ocr_4_response = requests.post(self.ocr_url_4, json=json_data_4) |
| 673 | if ocr_4_response.status_code != 200: | ||
| 674 | raise OCR4Exception('ocr_4 status code: {0}'.format(ocr_4_response.status_code)) | ||
| 746 | except Exception as e: | 675 | except Exception as e: |
| 747 | self.online_log.warn( | 676 | self.online_log.warn( |
| 748 | '{0} [section img save failed] [img_path={1}]' | 677 | '{0} [ocr_4 failed] [times={1}] [img_path={2}] [error={3}]'.format( |
| 749 | ' [part_idx={2}] [res_idx={3}]'.format(self.log_base, img_path, part_idx, res_idx)) | 678 | self.log_base, times, img_path, traceback.format_exc())) |
| 679 | else: | ||
| 680 | ocr_4_res = ocr_4_response.json() | ||
| 681 | end_time = time.time() | ||
| 682 | speed_time = int(end_time - start_time) | ||
| 683 | |||
| 684 | if ocr_4_res.get('code') == 0 and ocr_4_res.get('result', {}).get('rtn') == 0: | ||
| 685 | card_type = ocr_4_res.get('result', {}).get( | ||
| 686 | 'idcard_distinguish_result', {}).get('result', -1) | ||
| 687 | |||
| 688 | self.online_log.info( | ||
| 689 | '{0} [ocr_4 success] [img_path={1}] [speed_time={2}]'.format( | ||
| 690 | self.log_base, img_path, speed_time)) | ||
| 691 | break | ||
| 750 | else: | 692 | else: |
| 751 | is_save = True | 693 | self.online_log.warn( |
| 752 | section_img_path = src_section_img_path | 694 | '{0} [ocr_4 failed] [img_path={1}]'.format(self.log_base, img_path)) |
| 753 | position = result_dict.get('position', {}) | 695 | |
| 754 | angle = result_dict.get('angle', 0) | 696 | id_card_dict[consts.IC_TURE_OR_FALSE] = consts.IC_RES_MAPPING.get(card_type) |
| 755 | res_dict = {} | 697 | |
| 756 | position_dict = {} | 698 | if do_dda and isinstance(id_card_dict.get(consts.IC_KEY_FIELD[0]), str) and \ |
| 757 | for field_dict in result_dict.get('FieldList', []): | 699 | isinstance(id_card_dict.get(consts.IC_KEY_FIELD[1]), str): |
| 758 | res_dict[field_dict.get('chn_key', '')] = field_dict.get('value', '') | 700 | ic_name = id_card_dict.get(consts.IC_KEY_FIELD[0], '').strip() |
| 759 | position_dict[field_dict.get('chn_key', '')] = { | 701 | ic_id = id_card_dict.get(consts.IC_KEY_FIELD[1], '').strip() |
| 760 | consts.FIELD_POSITION_KEY: field_dict.get('position', {}), | 702 | if len(ic_name) > 0 and len(ic_id) > 0: |
| 761 | consts.FIELD_QUAD_KEY: field_dict.get('quad', []), | 703 | dda_id_bc_mapping.setdefault(consts.IC_FIELD, []).append((ic_name, ic_id, img_path)) |
| 762 | } | 704 | license_summary.setdefault(classify, []).append(id_card_dict) |
| 763 | position_dict[consts.POSITION_KEY] = position | 705 | # 购车发票 & 二手车发票 |
| 764 | position_dict[consts.ANGLE_KEY] = angle | 706 | elif classify == consts.MVI_CLASSIFY or classify == consts.UCI_CLASSIFY: |
| 707 | rebuild_data_dict = {} | ||
| 708 | position_dict = {} | ||
| 709 | mvi_res = license_data.pop('result', {}) | ||
| 710 | for en_key, detail_dict in mvi_res.items(): | ||
| 711 | rebuild_data_dict[detail_dict.get('chinese_key', '')] = detail_dict.get('words', '') | ||
| 712 | position_dict[detail_dict.get('chinese_key', '')] = { | ||
| 713 | consts.FIELD_POSITION_KEY: detail_dict.get('position', {}) | ||
| 714 | } | ||
| 715 | rebuild_data_dict['新旧版式'] = license_data.get('layout', '') | ||
| 716 | rebuild_data_dict[consts.IMG_PATH_KEY] = img_path | ||
| 717 | rebuild_data_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path | ||
| 718 | rebuild_data_dict[consts.ALL_POSITION_KEY] = position_dict | ||
| 719 | license_summary.setdefault(classify, []).append(rebuild_data_dict) | ||
| 720 | # 其他 | ||
| 721 | else: | ||
| 722 | for res_dict in license_data: | ||
| 765 | res_dict[consts.IMG_PATH_KEY] = img_path | 723 | res_dict[consts.IMG_PATH_KEY] = img_path |
| 766 | res_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path | 724 | res_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path |
| 767 | res_dict[consts.ALL_POSITION_KEY] = position_dict | 725 | license_summary.setdefault(classify, []).extend(license_data) |
| 768 | license_summary.setdefault(classify, []).append(res_dict) | 726 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS)) |
| 727 | except Exception as e: | ||
| 728 | res_list.append((pno, ino, part_idx, consts.RES_FAILED)) | ||
| 729 | self.online_log.error('{0} [license1_process error] [error={1}]'.format(self.log_base, traceback.format_exc())) | ||
| 769 | 730 | ||
| 770 | if is_save and file_data is not None: | 731 | def license2_process(self, ocr_res_2, license_summary, pid, classify, res_list, pno, ino, part_idx, img_path, do_dda, dda_id_bc_mapping, file_data): |
| 771 | try: | 732 | # 添加 try-except 处理 |
| 772 | with open(src_section_img_path, "wb") as fh: | 733 | try: |
| 773 | fh.write(base64.b64decode(file_data.encode())) | 734 | if ocr_res_2.get('ErrorCode') in consts.SUCCESS_CODE_SET: |
| 774 | except Exception as e: | 735 | res_list.append((pno, ino, part_idx, consts.RES_SUCCESS)) |
| 775 | self.online_log.warn( | 736 | if pid == consts.BC_PID: |
| 776 | '{0} [section img save failed] [img_path={1}]' | 737 | # 银行卡 |
| 777 | ' [part_idx={2}]'.format(self.log_base, img_path, part_idx)) | 738 | # res_dict = {} |
| 778 | else: | 739 | # for en_key, chn_key in consts.BC_FIELD: |
| 740 | # res_dict[chn_key] = ocr_res_2.get(en_key, '') | ||
| 741 | ocr_res_2[consts.IMG_PATH_KEY] = img_path | ||
| 742 | license_summary.setdefault(classify, []).append(ocr_res_2) | ||
| 743 | if do_dda and isinstance(ocr_res_2.get(consts.BC_KEY_FIELD), str): | ||
| 744 | bc_no = ocr_res_2[consts.BC_KEY_FIELD].strip() | ||
| 745 | if len(bc_no) > 0: | ||
| 746 | dda_id_bc_mapping.setdefault(consts.BC_FIELD, []).append((bc_no, img_path)) | ||
| 747 | else: | ||
| 748 | # 营业执照等 | ||
| 749 | pre, suf = os.path.splitext(img_path) | ||
| 750 | src_section_img_path = img_path if file_data is None else '{0}_{1}{2}'.format(pre, part_idx, suf) | ||
| 751 | |||
| 752 | is_save = False | ||
| 753 | for res_idx, result_dict in enumerate(ocr_res_2.get('ResultList', [])): | ||
| 754 | image_data = result_dict.get('image_data', '') | ||
| 755 | if len(image_data) > 0: | ||
| 756 | position = {} | ||
| 757 | angle = 0 | ||
| 758 | section_img_path = '{0}_{1}_{2}{3}'.format(pre, part_idx, res_idx, suf) | ||
| 759 | try: | ||
| 760 | with open(section_img_path, "wb") as fh: | ||
| 761 | fh.write(base64.b64decode(image_data.encode())) | ||
| 762 | except Exception as e: | ||
| 763 | self.online_log.warn( | ||
| 764 | '{0} [section img save failed] [img_path={1}]' | ||
| 765 | ' [part_idx={2}] [res_idx={3}]'.format(self.log_base, img_path, part_idx, res_idx)) | ||
| 766 | else: | ||
| 767 | is_save = True | ||
| 768 | section_img_path = src_section_img_path | ||
| 769 | position = result_dict.get('position', {}) | ||
| 770 | angle = result_dict.get('angle', 0) | ||
| 771 | res_dict = {} | ||
| 772 | position_dict = {} | ||
| 773 | for field_dict in result_dict.get('FieldList', []): | ||
| 774 | res_dict[field_dict.get('chn_key', '')] = field_dict.get('value', '') | ||
| 775 | position_dict[field_dict.get('chn_key', '')] = { | ||
| 776 | consts.FIELD_POSITION_KEY: field_dict.get('position', {}), | ||
| 777 | consts.FIELD_QUAD_KEY: field_dict.get('quad', []), | ||
| 778 | } | ||
| 779 | position_dict[consts.POSITION_KEY] = position | ||
| 780 | position_dict[consts.ANGLE_KEY] = angle | ||
| 781 | res_dict[consts.IMG_PATH_KEY] = img_path | ||
| 782 | res_dict[consts.SECTION_IMG_PATH_KEY] = section_img_path | ||
| 783 | res_dict[consts.ALL_POSITION_KEY] = position_dict | ||
| 784 | license_summary.setdefault(classify, []).append(res_dict) | ||
| 785 | |||
| 786 | if is_save and file_data is not None: | ||
| 787 | try: | ||
| 788 | with open(src_section_img_path, "wb") as fh: | ||
| 789 | fh.write(base64.b64decode(file_data.encode())) | ||
| 790 | except Exception as e: | ||
| 791 | self.online_log.warn( | ||
| 792 | '{0} [section img save failed] [img_path={1}]' | ||
| 793 | ' [part_idx={2}]'.format(self.log_base, img_path, part_idx)) | ||
| 794 | else: | ||
| 795 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_2)) | ||
| 796 | except Exception as e: | ||
| 779 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_2)) | 797 | res_list.append((pno, ino, part_idx, consts.RES_FAILED_2)) |
| 798 | self.online_log.error('{0} [license2_process error] [error={1}]'.format(self.log_base, traceback.format_exc())) | ||
| 780 | 799 | ||
| 781 | @staticmethod | 800 | @staticmethod |
| 782 | def license_rebuild(license_summary): | 801 | def license_rebuild(license_summary): |
| 783 | ic_merge = False | 802 | # 添加 try-except 处理 |
| 784 | rp_merge = False | 803 | try: |
| 804 | ic_merge = False | ||
| 805 | rp_merge = False | ||
| 785 | 806 | ||
| 786 | for classify in (consts.IC_CLASSIFY, consts.MVI_CLASSIFY, consts.MVC_CLASSIFY): | 807 | for classify in (consts.IC_CLASSIFY, consts.MVI_CLASSIFY, consts.MVC_CLASSIFY): |
| 787 | 808 | ||
| 788 | license_list = license_summary.get(classify) | 809 | license_list = license_summary.get(classify) |
| 789 | 810 | ||
| 790 | if not license_list: | 811 | if not license_list: |
| 791 | continue | 812 | continue |
| 792 | 813 | ||
| 793 | if classify == consts.IC_CLASSIFY: # 身份证、居住证分开,先正面,后反面 | 814 | if classify == consts.IC_CLASSIFY: # 身份证、居住证分开,先正面,后反面 |
| 794 | key, _, _ = consts.FIELD_ORDER_MAP.get(classify) | 815 | key, _, _ = consts.FIELD_ORDER_MAP.get(classify) |
| 795 | ic_side1_list = [] | 816 | ic_side1_list = [] |
| 796 | ic_side2_list = [] | 817 | ic_side2_list = [] |
| 797 | rp_side1_list = [] | 818 | rp_side1_list = [] |
| 798 | rp_side2_list = [] | 819 | rp_side2_list = [] |
| 799 | for license_dict in license_list: | 820 | for license_dict in license_list: |
| 800 | is_rp = license_dict.pop('类别', '0') | 821 | is_rp = license_dict.pop('类别', '0') |
| 801 | if key in license_dict: | 822 | if key in license_dict: |
| 802 | if is_rp == '1': | 823 | if is_rp == '1': |
| 803 | rp_side2_list.append(license_dict) | 824 | rp_side2_list.append(license_dict) |
| 825 | else: | ||
| 826 | ic_side2_list.append(license_dict) | ||
| 827 | elif is_rp == '1': | ||
| 828 | rp_side1_list.append(license_dict) | ||
| 804 | else: | 829 | else: |
| 805 | ic_side2_list.append(license_dict) | 830 | ic_side1_list.append(license_dict) |
| 806 | elif is_rp == '1': | ||
| 807 | rp_side1_list.append(license_dict) | ||
| 808 | else: | ||
| 809 | ic_side1_list.append(license_dict) | ||
| 810 | 831 | ||
| 811 | ic_merge = len(ic_side1_list) == len(ic_side2_list) == 1 | 832 | ic_merge = len(ic_side1_list) == len(ic_side2_list) == 1 |
| 812 | rp_merge = len(rp_side1_list) == len(rp_side2_list) == 1 | 833 | rp_merge = len(rp_side1_list) == len(rp_side2_list) == 1 |
| 813 | 834 | ||
| 814 | ic_side1_list.extend(ic_side2_list) | 835 | ic_side1_list.extend(ic_side2_list) |
| 815 | rp_side1_list.extend(rp_side2_list) | 836 | rp_side1_list.extend(rp_side2_list) |
| 816 | 837 | ||
| 817 | if ic_side1_list: | 838 | if ic_side1_list: |
| 818 | # license_list = ic_side1_list | 839 | # license_list = ic_side1_list |
| 819 | license_summary[classify] = ic_side1_list | 840 | license_summary[classify] = ic_side1_list |
| 820 | else: | 841 | else: |
| 821 | license_summary.pop(classify, None) | 842 | license_summary.pop(classify, None) |
| 822 | 843 | ||
| 823 | if rp_side1_list: | 844 | if rp_side1_list: |
| 824 | license_summary[consts.RP_CLASSIFY] = rp_side1_list | 845 | license_summary[consts.RP_CLASSIFY] = rp_side1_list |
| 825 | 846 | ||
| 826 | ic_side1_list = ic_side2_list = rp_side1_list = rp_side2_list = None | 847 | ic_side1_list = ic_side2_list = rp_side1_list = rp_side2_list = None |
| 827 | 848 | ||
| 828 | if classify == consts.MVI_CLASSIFY: # 机动车销售统一发票, 增加不含税价(逻辑计算) | 849 | if classify == consts.MVI_CLASSIFY: # 机动车销售统一发票, 增加不含税价(逻辑计算) |
| 829 | for license_dict in license_list: | 850 | for license_dict in license_list: |
| 830 | price = '' | 851 | price = '' |
| 831 | rate_str = license_dict.get('增值税税率') | 852 | rate_str = license_dict.get('增值税税率') |
| 832 | price_total_str = license_dict.get('价税合计小写') | 853 | price_total_str = license_dict.get('价税合计小写') |
| 833 | if rate_str is not None and price_total_str is not None: | 854 | if rate_str is not None and price_total_str is not None: |
| 834 | try: | 855 | try: |
| 835 | rate = int(rate_str.rstrip('%')) | 856 | rate = int(rate_str.rstrip('%')) |
| 836 | price_total = float(price_total_str) | 857 | price_total = float(price_total_str) |
| 837 | except Exception as e: | 858 | except Exception as e: |
| 838 | pass | 859 | pass |
| 860 | else: | ||
| 861 | price = round(price_total * 100 / (rate + 100), 2) | ||
| 862 | license_dict['不含税价(逻辑计算)'] = price | ||
| 863 | |||
| 864 | if classify == consts.MVC_CLASSIFY: # 机动车登记证先1/2页,后3/4页 | ||
| 865 | key, _, _ = consts.FIELD_ORDER_MAP.get(classify) | ||
| 866 | page_1_2 = [] | ||
| 867 | page_3_4 = [] | ||
| 868 | for license_dict in license_list: | ||
| 869 | if key in license_dict: | ||
| 870 | page_3_4.append(license_dict) | ||
| 839 | else: | 871 | else: |
| 840 | price = round(price_total * 100 / (rate + 100), 2) | 872 | page_1_2.append(license_dict) |
| 841 | license_dict['不含税价(逻辑计算)'] = price | 873 | page_1_2.extend(page_3_4) |
| 842 | 874 | license_summary[classify] = page_1_2 | |
| 843 | if classify == consts.MVC_CLASSIFY: # 机动车登记证先1/2页,后3/4页 | 875 | page_1_2 = page_3_4 = None |
| 844 | key, _, _ = consts.FIELD_ORDER_MAP.get(classify) | ||
| 845 | page_1_2 = [] | ||
| 846 | page_3_4 = [] | ||
| 847 | for license_dict in license_list: | ||
| 848 | if key in license_dict: | ||
| 849 | page_3_4.append(license_dict) | ||
| 850 | else: | ||
| 851 | page_1_2.append(license_dict) | ||
| 852 | page_1_2.extend(page_3_4) | ||
| 853 | license_summary[classify] = page_1_2 | ||
| 854 | page_1_2 = page_3_4 = None | ||
| 855 | 876 | ||
| 856 | return ic_merge, rp_merge | 877 | return ic_merge, rp_merge |
| 878 | except Exception as e: | ||
| 879 | print("license_rebuild error") | ||
| 880 | print(traceback.format_exc()) | ||
| 881 | return False, False | ||
| 857 | 882 | ||
| 858 | def parse_img_path(self, img_path): | 883 | def parse_img_path(self, img_path): |
| 859 | img_name, _ = os.path.splitext(os.path.basename(img_path)) | 884 | # 添加 try-except 处理 |
| 860 | part_list = img_name.split('_') | 885 | try: |
| 861 | # page_7_img_11_0 | 886 | img_name, _ = os.path.splitext(os.path.basename(img_path)) |
| 862 | return int(part_list[1])+1, int(part_list[3])+1 | 887 | part_list = img_name.split('_') |
| 888 | # page_7_img_11_0 | ||
| 889 | return int(part_list[1])+1, int(part_list[3])+1 | ||
| 890 | except Exception as e: | ||
| 891 | self.online_log.error('{0} [parse_img_path error] [error={1}]'.format(self.log_base, traceback.format_exc())) | ||
| 892 | return 0, 0 | ||
| 863 | 893 | ||
| 864 | def get_most(self, value_list): | 894 | def get_most(self, value_list): |
| 865 | if value_list: | 895 | # 添加 try-except 处理 |
| 866 | most_common = Counter(value_list).most_common(1) | 896 | try: |
| 867 | return most_common[0][0] if most_common else None | 897 | if value_list: |
| 898 | most_common = Counter(value_list).most_common(1) | ||
| 899 | return most_common[0][0] if most_common else None | ||
| 900 | except Exception as e: | ||
| 901 | self.online_log.error('{0} [get_most error] [error={1}]'.format(self.log_base, traceback.format_exc())) | ||
| 902 | return None | ||
| 868 | 903 | ||
| 869 | def date_format(self, date_str, format_str): | 904 | def date_format(self, date_str, format_str): |
| 870 | try: | 905 | try: | ... | ... |
-
Please register or sign in to post a comment