create_test.py
4.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import os
import shutil
dongfeng_root = '/home/mly/data/datasets/text_recognition/from_nas/东风/合照/'
baodan_root = '/home/mly/data/datasets/text_recognition/from_nas/全线表格(保单合同)数据集/img/'
zhongguobank_root = '/home/mly/data/datasets/text_recognition/from_nas/全线银行流水数据集/img/中国银行/'
beijingbank_root = '/home/mly/data/datasets/text_recognition/from_nas/全线银行流水数据集/img/北京银行/'
gongshangbank_root = '/home/mly/data/datasets/text_recognition/from_nas/全线银行流水数据集/img/工商银行/'
jianshebank_root = '/home/mly/data/datasets/text_recognition/from_nas/全线银行流水数据集/img/建设银行/'
mohu_root = '/home/mly/data/datasets/text_recognition/from_nas/模糊图片/模糊图片_未分类/'
gouchefapiao_root = '/home/mly/data/datasets/text_recognition/from_nas/购车发票 2116张/购车发票/' # let,lxy,tx-- dir--jpg
wild_200_train_root = '/home/mly/data/datasets/text_recognition/from_nas/通用场景文字检测测试集-wild200/最新整理过的数据集,请使用该文件夹下的数据/wild_200/train/image/'
wild_200_test_root = '/home/mly/data/datasets/text_recognition/from_nas/通用场景文字检测测试集-wild200/最新整理过的数据集,请使用该文件夹下的数据/wild_200/test/image/'
jiashizheng_root = '/home/mly/data/datasets/text_recognition/from_nas/通用/驾驶证/'
jiehunzheng_root = '/home/mly/data/datasets/text_recognition/from_nas/通用/结婚证/'
baoma_root = '/home/mly/data/datasets/text_recognition/from_nas/宝马/AFC_申请表_个人/'
overall_root = '/home/mly/data/datasets/text_recognition/from_nas/overall/'
def get_img_path_list(root):
img_name_list = sorted(os.listdir(root))
img_list = list()
for img_name in img_name_list:
if img_name[-1] == 'g':
img_list.append(os.path.join(root, img_name))
return img_list
def get_gouchefapiao_img_path_list(root):
img_list = list()
bn_list = os.listdir(root)
for bn in bn_list:
img_name_list = os.listdir(os.path.join(root, bn))
for img_name in img_name_list:
if img_name[-1] == 'g':
img_list.append(os.path.join(root, bn, img_name))
return img_list
def copy(img_list, prefix, gen_root, ratio=0.1):
cnt = 0
max_lth = int(len(img_list) * ratio)
print(f'processing {prefix}, max lth: {max_lth}')
for img in img_list:
endpoint = img.split('.')[-1]
shutil.copy(src=img, dst=os.path.join(gen_root, prefix + "_{:>04d}.{}".format(cnt, endpoint)))
cnt += 1
if cnt > max_lth:
break
def main():
dongfeng_list = get_img_path_list(dongfeng_root)
baodan_list = get_img_path_list(baodan_root)
zhongguobank_list = get_img_path_list(zhongguobank_root)
beijingbank_list = get_img_path_list(beijingbank_root)
gongshangbank_list = get_img_path_list(gongshangbank_root)
jianshebank_list = get_img_path_list(jianshebank_root)
mohu_list = get_img_path_list(mohu_root)
wild_200_train_list = get_img_path_list(wild_200_train_root)
wild_200_test_list = get_img_path_list(wild_200_test_root)
gouchefapiao_list = get_gouchefapiao_img_path_list(gouchefapiao_root)
jiehunzheng_list = get_img_path_list(jiehunzheng_root)
jiashizheng_list = get_img_path_list(jiashizheng_root)
baoma_list = get_img_path_list(baoma_root)
copy(dongfeng_list, 'dongfeng', overall_root, 0.6)
copy(baodan_list, 'baodan', overall_root, 0.1)
copy(zhongguobank_list, 'zhongguobank', overall_root, 0.1)
copy(beijingbank_list, 'beijingbank', overall_root, 0.1)
copy(gongshangbank_list, 'gongshangbank', overall_root, 0.1)
copy(jianshebank_list, 'jianshebank', overall_root, 0.1)
copy(mohu_list, 'mohu', overall_root, 0.1)
copy(wild_200_train_list, 'wild_200_train', overall_root, 0.1)
copy(wild_200_test_list, 'wild_200_test', overall_root, 0.1)
copy(gouchefapiao_list, 'gouchefapiao', overall_root, 0.1)
copy(jiehunzheng_list, 'jiehunzheng', overall_root, 1.0)
copy(jiashizheng_list, 'jiashizheng', overall_root, 0.5)
copy(baoma_list, 'baoma', overall_root, 0.1)
if __name__ == '__main__':
main()