mp_create_aug.py 2.95 KB
import os
import cv2
import random
import numpy as np

import imgaug as ia
import imgaug.augmenters as iaa

import multiprocessing as mp


def mkdir(path):
    if not os.path.exists(path):
        os.makedirs(path)

src_root = '/home/mly/data/datasets/text_recognition/CDLA/CDLA_DATASET_SRC/'
mask_root = '/home/mly/data/datasets/text_recognition/CDLA/CDLA_DATASET_SEG_ANNOTATIONS_FULL/'
train_img_root = os.path.join(src_root, 'train')
train_mask_root = os.path.join(mask_root, 'train')
val_img_root = os.path.join(src_root, 'val')
val_mask_root = os.path.join(mask_root, 'val')
gen_root = '/home/mly/data/datasets/text_recognition/CDLA/gen/'
gen_train_root = os.path.join(gen_root, 'train')
gen_val_root = os.path.join(gen_root, 'val')
gen_train_img_root = os.path.join(gen_train_root, 'img')
gen_train_mask_root = os.path.join(gen_train_root, 'mask')
gen_val_img_root = os.path.join(gen_val_root, 'img')
gen_val_mask_root = os.path.join(gen_val_root, 'mask')
mkdir(gen_train_img_root)
mkdir(gen_train_mask_root)
mkdir(gen_val_img_root)
mkdir(gen_val_mask_root)


transform = iaa.Sequential([
        iaa.Fliplr(0.5),
        iaa.Flipud(0.2),
        iaa.Sometimes(0.3, iaa.CropAndPad(percent=(-0.1, 0.1), keep_size=False)),
        iaa.Sometimes(0.5, iaa.GaussianBlur((0, 2.0))),
        iaa.PerspectiveTransform(scale=(0.05, 0.20), keep_size=False),
        iaa.ElasticTransformation(alpha=(0, 3.0), sigma=0.25),
        iaa.OneOf([
            iaa.Rot90((1, 3), keep_size=False),
            iaa.Rotate((-30, 30))
        ]),
    ], random_order=True)


def gen(i, name_list, lth, img_root, mask_root, generate_img_root, generate_mask_root, need,):
    idx = random.randint(0, lth - 1)
    img = cv2.imread(os.path.join(img_root, name_list[idx]))
    mask = cv2.imread(os.path.join(mask_root, name_list[idx]))
    img = np.asarray(img, dtype=np.uint8)
    mask = np.asarray(mask, dtype=np.uint8)
    img = img[np.newaxis, :]
    mask = mask[np.newaxis, :]
    aug_img, aug_mask = transform(images=img, segmentation_maps=mask)
    aug_img = aug_img[0]
    aug_mask = aug_mask[0]
    cv2.imwrite(os.path.join(generate_img_root, "{:>05d}.jpg".format(i)), aug_img)
    cv2.imwrite(os.path.join(generate_mask_root, "{:>05d}.jpg".format(i)), aug_mask)


def generate(img_root, mask_root, generate_img_root, generate_mask_root, need):
    # pool = mp.Pool(processes=4)
    name_list = sorted(os.listdir(img_root))
    lth = len(name_list)
    for i in range(need):
        # pool.apply_async(gen, (i, name_list, lth, img_root, mask_root, generate_img_root, generate_mask_root, need,))
        gen(i, name_list, lth, img_root, mask_root, generate_img_root, generate_mask_root, need)
    # pool.close()
    # pool.join()
        
def main():
    # print('processing train')
    # generate(train_img_root, train_mask_root, gen_train_img_root, gen_train_mask_root, 20000)
    print('processing val')
    generate(val_img_root, val_mask_root, gen_val_img_root, gen_val_mask_root, 8000)

if __name__ == '__main__':
    main()