get_csv.py 475 Bytes
import pandas as pd
import os
import random

root = '/home/mly/data/datasets/text_recognition/finetune/src/'
train_df = pd.DataFrame(columns=['path'])
test_df = pd.DataFrame(columns=['path'])

img_list = os.listdir(os.path.join(root, 'img'))
random.shuffle(img_list)
train_list = img_list[:1000]
test_list = img_list[1000:]

train_df.path = train_list
test_df.path = test_list

train_df.to_csv(os.path.join(root, 'train.csv'))
test_df.to_csv(os.path.join(root, 'test.csv'))