create_test.py 4.13 KB
import os
import shutil

dongfeng_root = '/home/mly/data/datasets/text_recognition/from_nas/东风/合照/'
baodan_root = '/home/mly/data/datasets/text_recognition/from_nas/全线表格(保单合同)数据集/img/'
zhongguobank_root = '/home/mly/data/datasets/text_recognition/from_nas/全线银行流水数据集/img/中国银行/'
beijingbank_root = '/home/mly/data/datasets/text_recognition/from_nas/全线银行流水数据集/img/北京银行/'
gongshangbank_root = '/home/mly/data/datasets/text_recognition/from_nas/全线银行流水数据集/img/工商银行/'
jianshebank_root = '/home/mly/data/datasets/text_recognition/from_nas/全线银行流水数据集/img/建设银行/'
mohu_root = '/home/mly/data/datasets/text_recognition/from_nas/模糊图片/模糊图片_未分类/'
gouchefapiao_root = '/home/mly/data/datasets/text_recognition/from_nas/购车发票 2116张/购车发票/'  # let,lxy,tx-- dir--jpg
wild_200_train_root = '/home/mly/data/datasets/text_recognition/from_nas/通用场景文字检测测试集-wild200/最新整理过的数据集,请使用该文件夹下的数据/wild_200/train/image/'
wild_200_test_root = '/home/mly/data/datasets/text_recognition/from_nas/通用场景文字检测测试集-wild200/最新整理过的数据集,请使用该文件夹下的数据/wild_200/test/image/'

jiashizheng_root = '/home/mly/data/datasets/text_recognition/from_nas/通用/驾驶证/'
jiehunzheng_root = '/home/mly/data/datasets/text_recognition/from_nas/通用/结婚证/'
baoma_root = '/home/mly/data/datasets/text_recognition/from_nas/宝马/AFC_申请表_个人/'

overall_root = '/home/mly/data/datasets/text_recognition/from_nas/overall/'


def get_img_path_list(root):
    img_name_list = sorted(os.listdir(root))
    img_list = list()
    for img_name in img_name_list:
        if img_name[-1] == 'g':
            img_list.append(os.path.join(root, img_name))

    return img_list


def get_gouchefapiao_img_path_list(root):
    img_list = list()
    bn_list = os.listdir(root)
    for bn in bn_list:
        img_name_list = os.listdir(os.path.join(root, bn))
        for img_name in img_name_list:
            if img_name[-1] == 'g':
                img_list.append(os.path.join(root, bn, img_name))

    return img_list


def copy(img_list, prefix, gen_root, ratio=0.1):
    cnt = 0
    max_lth = int(len(img_list) * ratio)
    print(f'processing {prefix}, max lth: {max_lth}')
    for img in img_list:
        endpoint = img.split('.')[-1]
        shutil.copy(src=img, dst=os.path.join(gen_root, prefix + "_{:>04d}.{}".format(cnt, endpoint)))
        cnt += 1
        if cnt > max_lth:
            break


def main():
    dongfeng_list = get_img_path_list(dongfeng_root)
    baodan_list = get_img_path_list(baodan_root)
    zhongguobank_list = get_img_path_list(zhongguobank_root)
    beijingbank_list = get_img_path_list(beijingbank_root)
    gongshangbank_list = get_img_path_list(gongshangbank_root)
    jianshebank_list = get_img_path_list(jianshebank_root)
    mohu_list = get_img_path_list(mohu_root)
    wild_200_train_list = get_img_path_list(wild_200_train_root)
    wild_200_test_list = get_img_path_list(wild_200_test_root)
    gouchefapiao_list = get_gouchefapiao_img_path_list(gouchefapiao_root)
    jiehunzheng_list = get_img_path_list(jiehunzheng_root)
    jiashizheng_list = get_img_path_list(jiashizheng_root)
    baoma_list = get_img_path_list(baoma_root)

    copy(dongfeng_list, 'dongfeng', overall_root, 0.6)
    copy(baodan_list, 'baodan', overall_root, 0.1)
    copy(zhongguobank_list, 'zhongguobank', overall_root, 0.1)
    copy(beijingbank_list, 'beijingbank', overall_root, 0.1)
    copy(gongshangbank_list, 'gongshangbank', overall_root, 0.1)
    copy(jianshebank_list, 'jianshebank', overall_root, 0.1)
    copy(mohu_list, 'mohu', overall_root, 0.1)
    copy(wild_200_train_list, 'wild_200_train', overall_root, 0.1)
    copy(wild_200_test_list, 'wild_200_test', overall_root, 0.1)
    copy(gouchefapiao_list, 'gouchefapiao', overall_root, 0.1)
    copy(jiehunzheng_list, 'jiehunzheng', overall_root, 1.0)
    copy(jiashizheng_list, 'jiashizheng', overall_root, 0.5)
    copy(baoma_list, 'baoma', overall_root, 0.1)


if __name__ == '__main__':
    main()