import os import shutil dongfeng_root = '/home/mly/data/datasets/text_recognition/from_nas/东风/合照/' baodan_root = '/home/mly/data/datasets/text_recognition/from_nas/全线表格(保单合同)数据集/img/' zhongguobank_root = '/home/mly/data/datasets/text_recognition/from_nas/全线银行流水数据集/img/中国银行/' beijingbank_root = '/home/mly/data/datasets/text_recognition/from_nas/全线银行流水数据集/img/北京银行/' gongshangbank_root = '/home/mly/data/datasets/text_recognition/from_nas/全线银行流水数据集/img/工商银行/' jianshebank_root = '/home/mly/data/datasets/text_recognition/from_nas/全线银行流水数据集/img/建设银行/' mohu_root = '/home/mly/data/datasets/text_recognition/from_nas/模糊图片/模糊图片_未分类/' gouchefapiao_root = '/home/mly/data/datasets/text_recognition/from_nas/购车发票 2116张/购车发票/' # let,lxy,tx-- dir--jpg wild_200_train_root = '/home/mly/data/datasets/text_recognition/from_nas/通用场景文字检测测试集-wild200/最新整理过的数据集,请使用该文件夹下的数据/wild_200/train/image/' wild_200_test_root = '/home/mly/data/datasets/text_recognition/from_nas/通用场景文字检测测试集-wild200/最新整理过的数据集,请使用该文件夹下的数据/wild_200/test/image/' jiashizheng_root = '/home/mly/data/datasets/text_recognition/from_nas/通用/驾驶证/' jiehunzheng_root = '/home/mly/data/datasets/text_recognition/from_nas/通用/结婚证/' baoma_root = '/home/mly/data/datasets/text_recognition/from_nas/宝马/AFC_申请表_个人/' overall_root = '/home/mly/data/datasets/text_recognition/from_nas/overall/' def get_img_path_list(root): img_name_list = sorted(os.listdir(root)) img_list = list() for img_name in img_name_list: if img_name[-1] == 'g': img_list.append(os.path.join(root, img_name)) return img_list def get_gouchefapiao_img_path_list(root): img_list = list() bn_list = os.listdir(root) for bn in bn_list: img_name_list = os.listdir(os.path.join(root, bn)) for img_name in img_name_list: if img_name[-1] == 'g': img_list.append(os.path.join(root, bn, img_name)) return img_list def copy(img_list, prefix, gen_root, ratio=0.1): cnt = 0 max_lth = int(len(img_list) * ratio) print(f'processing {prefix}, max lth: {max_lth}') for img in img_list: endpoint = img.split('.')[-1] shutil.copy(src=img, dst=os.path.join(gen_root, prefix + "_{:>04d}.{}".format(cnt, endpoint))) cnt += 1 if cnt > max_lth: break def main(): dongfeng_list = get_img_path_list(dongfeng_root) baodan_list = get_img_path_list(baodan_root) zhongguobank_list = get_img_path_list(zhongguobank_root) beijingbank_list = get_img_path_list(beijingbank_root) gongshangbank_list = get_img_path_list(gongshangbank_root) jianshebank_list = get_img_path_list(jianshebank_root) mohu_list = get_img_path_list(mohu_root) wild_200_train_list = get_img_path_list(wild_200_train_root) wild_200_test_list = get_img_path_list(wild_200_test_root) gouchefapiao_list = get_gouchefapiao_img_path_list(gouchefapiao_root) jiehunzheng_list = get_img_path_list(jiehunzheng_root) jiashizheng_list = get_img_path_list(jiashizheng_root) baoma_list = get_img_path_list(baoma_root) copy(dongfeng_list, 'dongfeng', overall_root, 0.6) copy(baodan_list, 'baodan', overall_root, 0.1) copy(zhongguobank_list, 'zhongguobank', overall_root, 0.1) copy(beijingbank_list, 'beijingbank', overall_root, 0.1) copy(gongshangbank_list, 'gongshangbank', overall_root, 0.1) copy(jianshebank_list, 'jianshebank', overall_root, 0.1) copy(mohu_list, 'mohu', overall_root, 0.1) copy(wild_200_train_list, 'wild_200_train', overall_root, 0.1) copy(wild_200_test_list, 'wild_200_test', overall_root, 0.1) copy(gouchefapiao_list, 'gouchefapiao', overall_root, 0.1) copy(jiehunzheng_list, 'jiehunzheng', overall_root, 1.0) copy(jiashizheng_list, 'jiashizheng', overall_root, 0.5) copy(baoma_list, 'baoma', overall_root, 0.1) if __name__ == '__main__': main()