1ff97d6a by 刘晓龙

1st commit

0 parents
# 文档分割
## 数据
来自文档分割标注数据
## 模型
ResNet50-Unet
from torch.utils.data import Dataset, DataLoader
import albumentations as A
import pandas as pd
import os
import cv2
import torch
import numpy as np
from PIL import Image
train_img_root = '/home/mly/data/datasets/text_recognition/CDLA/syn/train/img/'
val_img_root = '/home/mly/data/datasets/text_recognition/CDLA/syn/val/img'
train_anno_root = '/home/mly/data/datasets/text_recognition/CDLA/syn/train/mask/'
val_anno_root = '/home/mly/data/datasets/text_recognition/CDLA/syn/val/mask/'
train_csv_path = '/home/mly/data/datasets/text_recognition/CDLA/syn/train.csv'
val_csv_path = '/home/mly/data/datasets/text_recognition/CDLA/syn/val.csv'
def img_aug(img, mask):
transform = A.Compose([
A.RandomResizedCrop(height=256, width=256),
A.GaussNoise(p=0.3),
A.HorizontalFlip(p=0.5),
A.Rotate(limit=20, p=0.3),
A.RandomRotate90(p=0.5),
A.RandomBrightnessContrast(p=0.5),
A.Affine(rotate=(-90, 90), shear=(-45, 45), p=0.5),
A.RandomShadow(p=0.5),
A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
augmented = transform(image=img, mask=mask)
img = augmented['image']
mask = augmented['mask']
return img, mask
def val_tran(img, mask):
transform = A.Compose([
A.Resize(256, 256),
A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
augmented = transform(image=img, mask=mask)
img = augmented['image']
mask = augmented['mask']
return img, mask
class CDLA(Dataset):
def __init__(self, img_root, anno_root, csv, is_training=True):
self.df = pd.read_csv(csv, index_col=0)
self.img_list = self.df.path.tolist()
self.img_root = img_root
self.anno_root = anno_root
self.is_training = is_training
def __getitem__(self, idx):
img_path = os.path.join(self.img_root, self.img_list[idx])
mask_path = os.path.join(self.anno_root, self.img_list[idx])
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
mask = cv2.imread(mask_path, 0)
if self.is_training:
img, mask = img_aug(img, mask)
else:
img, mask = val_tran(img, mask)
img = np.transpose(img, axes=(2, 0, 1))
mask[mask > 0] = 1
img = torch.tensor(img.copy(), dtype=torch.float32)
mask = torch.tensor(mask.copy(), dtype=torch.float32)
return img, mask
def __len__(self):
return len(self.img_list)
def get_loader():
CDLA_train_data = CDLA(img_root=train_img_root, anno_root=train_anno_root, csv=train_csv_path, is_training=True)
CDLA_val_data = CDLA(img_root=val_img_root, anno_root=val_anno_root, csv=val_csv_path, is_training=False)
train_loader = DataLoader(CDLA_train_data, batch_size=128, num_workers=8, shuffle=True, pin_memory=True, drop_last=True)
val_loader = DataLoader(CDLA_val_data, batch_size=128, num_workers=8, shuffle=False, pin_memory=True, drop_last=True)
return train_loader, val_loader
from .CDLA_loader import *
from .edge_loader import *
from .finetune_loader import *
No preview for this file type
No preview for this file type
No preview for this file type
No preview for this file type
import os
import cv2
import random
import numpy as np
import imgaug as ia
import imgaug.augmenters as iaa
from tqdm import tqdm
def mkdir(path):
if not os.path.exists(path):
os.makedirs(path)
src_root = '/home/mly/data/datasets/text_recognition/CDLA/CDLA_DATASET_SRC/'
mask_root = '/home/mly/data/datasets/text_recognition/CDLA/CDLA_DATASET_SEG_ANNOTATIONS_FULL/'
train_img_root = os.path.join(src_root, 'train')
train_mask_root = os.path.join(mask_root, 'train')
val_img_root = os.path.join(src_root, 'val')
val_mask_root = os.path.join(mask_root, 'val')
gen_root = '/home/mly/data/datasets/text_recognition/CDLA/gen/'
gen_train_root = os.path.join(gen_root, 'train')
gen_val_root = os.path.join(gen_root, 'val')
gen_train_img_root = os.path.join(gen_train_root, 'img')
gen_train_mask_root = os.path.join(gen_train_root, 'mask')
gen_val_img_root = os.path.join(gen_val_root, 'img')
gen_val_mask_root = os.path.join(gen_val_root, 'mask')
mkdir(gen_train_img_root)
mkdir(gen_train_mask_root)
mkdir(gen_val_img_root)
mkdir(gen_val_mask_root)
transform = iaa.Sequential([
iaa.Fliplr(0.5),
iaa.Flipud(0.2),
iaa.Sometimes(0.3, iaa.CropAndPad(percent=(-0.1, 0.1), keep_size=False)),
iaa.Sometimes(0.5, iaa.GaussianBlur((0, 2.0))),
iaa.PerspectiveTransform(scale=(0.05, 0.20), keep_size=False),
iaa.ElasticTransformation(alpha=(0, 3.0), sigma=0.25),
iaa.OneOf([
iaa.Rot90((1, 3), keep_size=False),
iaa.Rotate((-30, 30))
]),
], random_order=True)
def generate(img_root, mask_root, generate_img_root, generate_mask_root, need):
name_list = sorted(os.listdir(img_root))
lth = len(name_list)
for i in tqdm(range(need)):
idx = random.randint(0, lth - 1)
img = cv2.imread(os.path.join(img_root, name_list[idx]))
mask = cv2.imread(os.path.join(mask_root, name_list[idx]))
img = np.asarray(img, dtype=np.uint8)
mask = np.asarray(mask, dtype=np.uint8)
img = img[np.newaxis, :]
mask = mask[np.newaxis, :]
aug_img, aug_mask = transform(images=img, segmentation_maps=mask)
aug_img = aug_img[0]
aug_mask = aug_mask[0]
cv2.imwrite(os.path.join(generate_img_root, "{:>05d}.jpg".format(i)), aug_img)
cv2.imwrite(os.path.join(generate_mask_root, "{:>05d}.jpg".format(i)), aug_mask)
def main():
generate(train_img_root, train_mask_root, gen_train_img_root, gen_train_mask_root, 20000)
generate(val_img_root, val_mask_root, gen_val_img_root, gen_val_mask_root, 8000)
if __name__ == '__main__':
main()
import os
import cv2
import numpy as np
from PIL import Image
# import albumentations as A
import imgaug as ia
import imgaug.augmenters as iaa
from tqdm.contrib import tzip
src_root = '/home/mly/data/datasets/text_recognition/CDLA/gen/'
seg_root = '/home/mly/data/datasets/text_recognition/CDLA/CDLA_DATASET_SEG_ANNOTATIONS_FULL'
train_seg_root = os.path.join(seg_root, 'train')
val_seg_root = os.path.join(seg_root, 'val')
bg_root = '/home/mly/data/datasets/humanMatting/bg'
train_root = os.path.join(src_root, 'train')
val_root = os.path.join(src_root, 'val')
gen_root = '/home/mly/data/datasets/text_recognition/CDLA/CDLA_DATASET_SYN/'
if not os.path.exists(os.path.join(gen_root, 'img')):
os.mkdir(os.path.join(gen_root, 'img'))
if not os.path.exists(os.path.join(gen_root, 'mask')):
os.mkdir(os.path.join(gen_root, 'mask'))
if not os.path.exists(os.path.join(gen_root, 'edge')):
os.mkdir(os.path.join(gen_root, 'edge'))
gen_img_root = os.path.join(gen_root, 'img')
gen_mask_root = os.path.join(gen_root, 'mask')
gen_edge_root = os.path.join(gen_root, 'edge')
def get_img_mask_list(root):
file_list = os.listdir(root)
img_list = list()
for file in file_list:
if file[-1] == 'g':
img_list.append(file)
return img_list
def get_img_mask_full_path_list(img_list, img_root, mask_root):
img_full_path_list = list()
mask_full_path_list = list()
for img_name in img_list:
img_full_path_list.append(os.path.join(img_root, img_name))
mask_full_path_list.append(os.path.join(mask_root, img_name))
return img_full_path_list, mask_full_path_list
def read_bg_list(root):
img_list = os.listdir(root)
needed = list()
for img_name in img_list:
needed.append(os.path.join(root, img_name))
return needed
def img_aug(img, mask):
# transform = A.Compose([
# A.GaussNoise(p=0.5),
# A.Rotate(limit=20, p=0.3),
# A.RandomRotate90(p=0.5),
# A.RandomBrightnessContrast(p=0.5),
# A.Affine(p=0.5),
# A.ElasticTransform(p=0.5)
# ])
# augmented = transform(image=img, mask=mask)
# img = augmented['image']
# mask = augmented['mask']
# return img, mask
# transform = iaa.Sequential(
# iaa.Fliplr(0.5),
# iaa.Flipud(0.2),
# iaa.Sometimes(0.4, iaa.CropAndPad(percent=(-0.3, 0.3), pad_mode=ia.ALL, pad_cval=(0, 255), keep_size=False)),
# iaa.OneOf(
# iaa.GaussianBlur((0, 3.0)),
# iaa.AverageBlur(k=(2, 7)),
# iaa.MedianBlur(k=(3, 11)),
# ),
# iaa.Sometimes(0.2, iaa.Dropout(p=(0, 0.1), per_channel=0.5)),
# iaa.PerspectiveTransform(scale=(0.01, 0.3), keep_size=False),
# iaa.ElasticTransformation(alpha=(0, 5.0), sigma=0.25),
# iaa.OneOf(
# iaa.Rot90((1, 3), keep_size=False),
# iaa.Rotate((-45, 45), keep_size=False)
# )
# )
# img, mask = transform(images=img, segmentation_maps=mask)
return img, mask
def paste_img_and_mask(img, mask, bg):
img = cv2.resize(img, (512, 512))
mask = cv2.resize(mask, (512, 512), cv2.INTER_NEAREST)
y_max = 2048 - img.shape[1]
x_max = 2048 - img.shape[0]
x = int(np.random.randint(0, x_max, 1))
y = int(np.random.randint(0, y_max, 1))
point = (x, y)
bg = cv2.imread(bg)
bg = cv2.resize(bg, (2048, 2048))
bg_mask = np.zeros_like(bg)
bg[point[0]: point[0] + img.shape[0], point[1]: point[1] + img.shape[1], :] = img
bg_mask[point[0]: point[0] + img.shape[0], point[1]: point[1] + img.shape[1], :] = mask
edge = np.asarray(bg_mask.copy())
edge = cv2.Canny(edge, 50, 150)
return bg, bg_mask, edge
def generate(img_list, mask_list, gen_iter=4):
bg_list = read_bg_list(bg_root)
len_img_list = len(img_list)
lth = len(bg_list)
for it in range(gen_iter):
print(f'processing iteration: {it}')
cnt = 0
for img, mask in tzip(img_list, mask_list):
print(img)
print(mask)
img_path = img
name = img_path.split('/')[-1].split('.')[0]
cnt += 1
bn = img_path.split('/')[-2]
img = cv2.imread(img)
mask = cv2.imread(mask)
img, mask = img_aug(img, mask)
idx = int(np.random.randint(0, lth, size=1))
while os.path.getsize(bg_list[idx]) <= 100:
idx = int(np.random.randint(0, lth, size=1))
img, mask, edge = paste_img_and_mask(img, mask, bg_list[idx])
if not os.path.exists(os.path.join(gen_img_root, bn)):
os.mkdir(os.path.join(gen_img_root, bn))
if not os.path.exists(os.path.join(gen_mask_root, bn)):
os.mkdir(os.path.join(gen_mask_root, bn))
if not os.path.exists(os.path.join(gen_edge_root, bn)):
os.mkdir(os.path.join(gen_edge_root, bn))
cv2.imwrite(os.path.join(gen_img_root, bn, name + '_' + str(it) + '.jpg'), img)
cv2.imwrite(os.path.join(gen_mask_root, bn, name + '_' + str(it) + '.jpg'), mask)
cv2.imwrite(os.path.join(gen_edge_root, bn, name + '_' + str(it) + '.jpg'), edge)
# def demo():
# paper_img = Image.open('/home/mly/data/datasets/text_recognition/CDLA/CDLA_DATASET/train/train_5000.jpg')
# print(paper_img.size)
# bg_img = Image.open('/home/mly/data/datasets/humanMatting/bg/办公桌_4057.jpg')
# bg_img = bg_img.resize((2048, 2048))
# bg_img.paste(paper_img, (0, 0))
# bg_img.save('./paste.jpg')
def main():
train_img_mask_list = get_img_mask_list(root=train_root)
val_img_mask_list = get_img_mask_list(root=val_root)
train_img_full_path, train_mask_full_path = get_img_mask_full_path_list(train_img_mask_list, train_root, train_seg_root)
val_img_full_path, val_mask_full_path = get_img_mask_full_path_list(val_img_mask_list, val_root, val_seg_root)
generate(train_img_full_path, train_mask_full_path, gen_iter=2)
generate(val_img_full_path, val_mask_full_path, gen_iter=1)
if __name__ == '__main__':
main()
import os
import shutil
dongfeng_root = '/home/mly/data/datasets/text_recognition/from_nas/东风/合照/'
baodan_root = '/home/mly/data/datasets/text_recognition/from_nas/全线表格(保单合同)数据集/img/'
zhongguobank_root = '/home/mly/data/datasets/text_recognition/from_nas/全线银行流水数据集/img/中国银行/'
beijingbank_root = '/home/mly/data/datasets/text_recognition/from_nas/全线银行流水数据集/img/北京银行/'
gongshangbank_root = '/home/mly/data/datasets/text_recognition/from_nas/全线银行流水数据集/img/工商银行/'
jianshebank_root = '/home/mly/data/datasets/text_recognition/from_nas/全线银行流水数据集/img/建设银行/'
mohu_root = '/home/mly/data/datasets/text_recognition/from_nas/模糊图片/模糊图片_未分类/'
gouchefapiao_root = '/home/mly/data/datasets/text_recognition/from_nas/购车发票 2116张/购车发票/' # let,lxy,tx-- dir--jpg
wild_200_train_root = '/home/mly/data/datasets/text_recognition/from_nas/通用场景文字检测测试集-wild200/最新整理过的数据集,请使用该文件夹下的数据/wild_200/train/image/'
wild_200_test_root = '/home/mly/data/datasets/text_recognition/from_nas/通用场景文字检测测试集-wild200/最新整理过的数据集,请使用该文件夹下的数据/wild_200/test/image/'
jiashizheng_root = '/home/mly/data/datasets/text_recognition/from_nas/通用/驾驶证/'
jiehunzheng_root = '/home/mly/data/datasets/text_recognition/from_nas/通用/结婚证/'
baoma_root = '/home/mly/data/datasets/text_recognition/from_nas/宝马/AFC_申请表_个人/'
overall_root = '/home/mly/data/datasets/text_recognition/from_nas/overall/'
def get_img_path_list(root):
img_name_list = sorted(os.listdir(root))
img_list = list()
for img_name in img_name_list:
if img_name[-1] == 'g':
img_list.append(os.path.join(root, img_name))
return img_list
def get_gouchefapiao_img_path_list(root):
img_list = list()
bn_list = os.listdir(root)
for bn in bn_list:
img_name_list = os.listdir(os.path.join(root, bn))
for img_name in img_name_list:
if img_name[-1] == 'g':
img_list.append(os.path.join(root, bn, img_name))
return img_list
def copy(img_list, prefix, gen_root, ratio=0.1):
cnt = 0
max_lth = int(len(img_list) * ratio)
print(f'processing {prefix}, max lth: {max_lth}')
for img in img_list:
endpoint = img.split('.')[-1]
shutil.copy(src=img, dst=os.path.join(gen_root, prefix + "_{:>04d}.{}".format(cnt, endpoint)))
cnt += 1
if cnt > max_lth:
break
def main():
dongfeng_list = get_img_path_list(dongfeng_root)
baodan_list = get_img_path_list(baodan_root)
zhongguobank_list = get_img_path_list(zhongguobank_root)
beijingbank_list = get_img_path_list(beijingbank_root)
gongshangbank_list = get_img_path_list(gongshangbank_root)
jianshebank_list = get_img_path_list(jianshebank_root)
mohu_list = get_img_path_list(mohu_root)
wild_200_train_list = get_img_path_list(wild_200_train_root)
wild_200_test_list = get_img_path_list(wild_200_test_root)
gouchefapiao_list = get_gouchefapiao_img_path_list(gouchefapiao_root)
jiehunzheng_list = get_img_path_list(jiehunzheng_root)
jiashizheng_list = get_img_path_list(jiashizheng_root)
baoma_list = get_img_path_list(baoma_root)
copy(dongfeng_list, 'dongfeng', overall_root, 0.6)
copy(baodan_list, 'baodan', overall_root, 0.1)
copy(zhongguobank_list, 'zhongguobank', overall_root, 0.1)
copy(beijingbank_list, 'beijingbank', overall_root, 0.1)
copy(gongshangbank_list, 'gongshangbank', overall_root, 0.1)
copy(jianshebank_list, 'jianshebank', overall_root, 0.1)
copy(mohu_list, 'mohu', overall_root, 0.1)
copy(wild_200_train_list, 'wild_200_train', overall_root, 0.1)
copy(wild_200_test_list, 'wild_200_test', overall_root, 0.1)
copy(gouchefapiao_list, 'gouchefapiao', overall_root, 0.1)
copy(jiehunzheng_list, 'jiehunzheng', overall_root, 1.0)
copy(jiashizheng_list, 'jiashizheng', overall_root, 0.5)
copy(baoma_list, 'baoma', overall_root, 0.1)
if __name__ == '__main__':
main()
import os
from PIL import Image
root = '/home/mly/data/datasets/text_recognition/finetune/src/'
img_root = os.path.join(root, 'img')
mask_root = os.path.join(root, 'mask')
crop_root = os.path.join(root, 'crop')
blend_root = os.path.join(root, 'blend')
img_list = os.listdir(img_root)
if not os.path.exists(crop_root):
os.mkdir(crop_root)
for name in img_list:
img = Image.open(os.path.join(img_root, name)).convert('RGB')
mask = Image.open(os.path.join(mask_root, name))
crop = Image.new('RGB', img.size)
crop.paste(img, mask=mask)
import os
import cv2
import numpy as np
from PIL import Image
from tqdm.contrib import tzip
import threading
src_root = '/home/mly/data/datasets/text_recognition/CDLA/CDLA_DATASET'
seg_root = '/home/mly/data/datasets/text_recognition/CDLA/CDLA_DATASET_SEG_ANNOTATIONS_FULL'
train_seg_root = os.path.join(seg_root, 'train')
val_seg_root = os.path.join(seg_root, 'val')
bg_root = '/home/mly/data/datasets/humanMatting/bg'
train_root = os.path.join(src_root, 'train')
val_root = os.path.join(src_root, 'val')
# gen_root = '/home/mly/data/datasets/text_recognition/CDLA/CDLA_DATASET_SYN/'
gen_root = '/home/mly/data/datasets/text_recognition/CDLA/new_syn'
if not os.path.exists(gen_root):
os.mkdir(gen_root)
if not os.path.exists(os.path.join(gen_root, 'img')):
os.mkdir(os.path.join(gen_root, 'img'))
if not os.path.exists(os.path.join(gen_root, 'mask')):
os.mkdir(os.path.join(gen_root, 'mask'))
if not os.path.exists(os.path.join(gen_root, 'edge')):
os.mkdir(os.path.join(gen_root, 'edge'))
gen_img_root = os.path.join(gen_root, 'img')
gen_mask_root = os.path.join(gen_root, 'mask')
gen_edge_root = os.path.join(gen_root, 'edge')
def get_img_mask_list(root):
file_list = os.listdir(root)
img_list = list()
for file in file_list:
if file[-1] == 'g':
img_list.append(file)
return img_list
def get_img_mask_full_path_list(img_list, img_root, mask_root):
img_full_path_list = list()
mask_full_path_list = list()
for img_name in img_list:
img_full_path_list.append(os.path.join(img_root, img_name))
mask_full_path_list.append(os.path.join(mask_root, img_name))
return img_full_path_list, mask_full_path_list
def read_bg_list(root):
img_list = os.listdir(root)
needed = list()
for img_name in img_list:
if img_name[-1] == 'g':
needed.append(os.path.join(root, img_name))
return needed
def paste_img_and_mask(img, mask, bg):
img = cv2.imread(img)
# mask = cv2.imread(mask)
# img = cv2.resize(img, (512, 512))
# mask = cv2.resize(mask, (512, 512), cv2.INTER_NEAREST)
resize_h = int(np.random.randint(448, 2048, 1))
resize_w = int(np.random.randint(448, 2048, 1))
img = cv2.resize(img, (resize_h, resize_w))
mask = np.ones(img.shape) * 255
y_max = 2048 - img.shape[1]
x_max = 2048 - img.shape[0]
x = int(np.random.randint(0, x_max, 1))
y = int(np.random.randint(0, y_max, 1))
point = (x, y)
try:
bg = cv2.imread(bg)
bg = cv2.resize(bg, (2048, 2048))
except BaseException:
print('error and replace by 4257')
bg = cv2.imread('/home/mly/data/datasets/humanMatting/bg/办公桌_4257.jpg')
bg = cv2.resize(bg, (2048, 2048))
bg = cv2.resize(bg, (2048, 2048))
bg_mask = np.zeros_like(bg)
# img[img == 0] = 100
bg[point[0]: point[0] + img.shape[0], point[1]: point[1] + img.shape[1], :] = img
bg_mask[point[0]: point[0] + img.shape[0], point[1]: point[1] + img.shape[1], :] = mask
edge = np.asarray(bg_mask.copy())
edge = cv2.Canny(edge, 50, 255)
return bg, bg_mask, edge
def generate(img_list, mask_list, gen_iter=5):
# img_list = img_list[:10]
bg_list = read_bg_list(bg_root)
len_bg_list = len(bg_list)
len_img_list = len(img_list)
for it in range(gen_iter):
for img, mask in tzip(img_list, mask_list):
idx = int(np.random.randint(0, len_bg_list, 1))
while os.path.getsize(bg_list[idx]) < 100:
idx = int(np.random.randint(0, len_bg_list, 1))
bn = img.split('/')[-2]
name = img.split('/')[-1].split('.')[0]
img, mask, edge = paste_img_and_mask(img, mask, bg_list[idx])
if not os.path.exists(os.path.join(gen_img_root, bn)):
os.mkdir(os.path.join(gen_img_root, bn))
cv2.imwrite(os.path.join(gen_img_root, bn, name + '_' + str(it) + '.jpg'), img)
if not os.path.exists(os.path.join(gen_mask_root, bn)):
os.mkdir(os.path.join(gen_mask_root, bn))
cv2.imwrite(os.path.join(gen_mask_root, bn, name + '_' + str(it) + '.jpg'), mask)
if not os.path.exists(os.path.join(gen_edge_root, bn)):
os.mkdir(os.path.join(gen_edge_root, bn))
cv2.imwrite(os.path.join(gen_edge_root, bn, name + '_' + str(it) + '.jpg'), edge)
def main():
train_img_mask_list = get_img_mask_list(root=train_root)
val_img_mask_list = get_img_mask_list(root=val_root)
train_img_full_path, train_mask_full_path = get_img_mask_full_path_list(train_img_mask_list, train_root,
train_seg_root)
val_img_full_path, val_mask_full_path = get_img_mask_full_path_list(val_img_mask_list, val_root, val_seg_root)
print('processing train!')
generate(train_img_full_path, train_mask_full_path, gen_iter=6)
print('processing val!')
generate(val_img_full_path, val_mask_full_path, gen_iter=4)
if __name__ == '__main__':
main()
# t1 = threading.Thread(target=main)
# t2 = threading.Thread(target=main)
# t3 = threading.Thread(target=main)
# t4 = threading.Thread(target=main)
# t1.start()
# t2.start()
# t3.start()
# t4.start()
# t1.join()
# t2.join()
# t3.join()
# t4.join()
File mode changed
from torch.utils.data import DataLoader, Dataset
import os
import pandas as pd
import cv2
syn_root = '/home/mly/data/datasets/text_recognition/CDLA/CDLA_DATASET_SYN'
syn_img_root = os.path.join(syn_root, 'img')
syn_edge_root = os.path.join(syn_root, 'edge')
sy_mask_root = os.path.join(syn_root, 'mask')
def img_aug():
pass
class EdgeData(Dataset):
def __init__(self, csv, transform):
phase = csv.split('/')[-1].split('.')[0]
df = pd.read_csv(csv, index_col=0)
self.img_list = df.path.tolist()
self.img_root = os.path.join(syn_img_root, phase)
self.edge_root = os.path.join(syn_edge_root, phase)
self.transform = transform
def __len__(self):
return len(self.img_list)
def __getitem__(self, idx):
img = cv2.imread(os.path.join(self.img_root, self.img_list[idx]))
edge = cv2.imread(os.path.join(self.edge_root, self.img_list[idx]))
img, edge = self.transform(img, edge)
return img, edge
def get_loader():
edge_train_data = EdgeData(csv=os.path.join(syn_root, 'train.csv'), transform=img_aug)
edge_val_data = EdgeData(csv=os.path.join(syn_root, 'val.csv'), transform=img_aug)
train_loader = DataLoader(edge_train_data, batch_size=8, num_workers=4, pin_memory=True, drop_last=True)
val_loader = DataLoader(edge_val_data, batch_size=8, num_workers=4, pin_memory=True, drop_last=True)
return train_loader, val_loader
import pandas
import os
import torch
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import transforms as Ap
import cv2
import pandas as pd
root = '/home/mly/data/datasets/text_recognition/finetune/src'
img_root = os.path.join(root, 'img')
label_root = os.path.join(root, 'mask')
train_csv_path = os.path.join(root, 'train.csv')
val_csv_path = os.path.join(root, 'test.csv')
SIZE = 512
def train_aug(img, mask):
transform = A.Compose([
# A.RandomResizedCrop(height=224, width=224),
# A.Resize(SIZE, SIZE),
A.Resize(768, 768),
A.RandomCrop(SIZE, SIZE),
A.GaussNoise(p=0.3),
A.HorizontalFlip(p=0.5),
A.Rotate(limit=20, p=0.3),
A.RandomRotate90(p=0.5),
A.RandomBrightnessContrast(p=0.5),
A.Affine(rotate=(-90, 90), shear=(-45, 45), p=0.5),
A.RandomShadow(p=0.5),
A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
Ap.ToTensorV2(),
])
augmented = transform(image=img, mask=mask)
img = augmented['image']
mask = augmented['mask']
return img, mask
def val_aug(img, mask):
transform = A.Compose([
A.Resize(SIZE, 512),
A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
Ap.ToTensorV2(),
])
augmented = transform(image=img, mask=mask)
img = augmented['image']
mask = augmented['mask']
return img, mask
class FineTuneData(Dataset):
def __init__(self, img_root, anno_root, csv, is_training=True):
self.df = pd.read_csv(csv, index_col=0)
self.img_list = self.df.path.tolist()
self.img_root = img_root
self.anno_root = anno_root
self.is_training = is_training
def __getitem__(self, idx):
img_path = os.path.join(self.img_root, self.img_list[idx])
mask_path = os.path.join(self.anno_root, self.img_list[idx].split('.')[0] + '.png')
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
mask = cv2.imread(mask_path, 0)
if self.is_training:
img, mask = train_aug(img, mask)
else:
img, mask = val_aug(img, mask)
mask[mask > 0] = 1
img = img.to(torch.float32)
mask = mask.to(torch.float32)
return img, mask
def __len__(self):
return len(self.img_list)
def get_loader():
bs = 16
finetune_train_data = FineTuneData(img_root=img_root, anno_root=label_root, csv=train_csv_path, is_training=True)
finetune_val_data = FineTuneData(img_root=img_root, anno_root=label_root, csv=val_csv_path, is_training=False)
train_loader = DataLoader(finetune_train_data, batch_size=bs, num_workers=4, shuffle=True, pin_memory=True, drop_last=True)
val_loader = DataLoader(finetune_val_data, batch_size=bs, num_workers=4, shuffle=False, pin_memory=True, drop_last=True)
return train_loader, val_loader
import pandas as pd
import os
img_root = '/home/mly/data/datasets/text_recognition/CDLA/syn/'
train_img_root = os.path.join(img_root, 'train', 'img')
val_img_root = os.path.join(img_root, 'val', 'img')
gen_root = '/home/mly/data/datasets/text_recognition/CDLA/syn/'
train_img_list = sorted(os.listdir(train_img_root))
train_df = pd.DataFrame(columns=['path'])
train_df.path = train_img_list
val_img_list = sorted(os.listdir(val_img_root))
val_df = pd.DataFrame(columns=['path'])
val_df.path = val_img_list
train_df.to_csv(os.path.join(gen_root, 'train.csv'))
val_df.to_csv(os.path.join(gen_root, 'val.csv'))
print(f'saved to {gen_root}-train.csv/val.csv')
import pandas as pd
import os
import random
root = '/home/mly/data/datasets/text_recognition/finetune/src/'
train_df = pd.DataFrame(columns=['path'])
test_df = pd.DataFrame(columns=['path'])
img_list = os.listdir(os.path.join(root, 'img'))
random.shuffle(img_list)
train_list = img_list[:1000]
test_list = img_list[1000:]
train_df.path = train_list
test_df.path = test_list
train_df.to_csv(os.path.join(root, 'train.csv'))
test_df.to_csv(os.path.join(root, 'test.csv'))
import os
import numpy as np
import cv2
import json
from tqdm import tqdm
from math import ceil
import pandas as pd
src_root = '/home/mly/data/datasets/text_recognition/CDLA/CDLA_DATASET'
anno_root = '/home/mly/data/datasets/text_recognition/CDLA/CDLA_DATASET_SEG_ANNOTATIONS_FULL'
csv_root = '/home/mly/data/datasets/text_recognition/CDLA/CDLA_DATASET_SEG_ANNOTATIONS_FULL/csv'
if not os.path.exists(anno_root):
os.mkdir(anno_root)
if not os.path.exists(os.path.join(anno_root, 'train')):
os.mkdir(os.path.join(anno_root, 'train'))
if not os.path.exists(os.path.join(anno_root, 'val')):
os.mkdir(os.path.join(anno_root, 'val'))
def read_list(root):
all_list = sorted(os.listdir(root))
name_list = list()
for all_name in all_list:
name_list.append(all_name.split('.')[0])
name_list = list(set(name_list))
return name_list
def read_dict(file):
f = open(file)
dict = json.load(f)
shape_dict_list = dict['shapes']
f.close()
return shape_dict_list
def parse_shapes_dict(shape_dict_list):
points_list = list()
for shape_dict in shape_dict_list:
points = shape_dict['points']
points_list = points_list + points
return points_list
def get_seg_points(points_list):
x_list = list()
y_list = list()
for points in points_list:
x_list.append(points[0])
y_list.append(points[1])
max_x = max(x_list)
min_x = min(x_list)
max_y = max(y_list)
min_y = min(y_list)
return [int(min_x), int(min_y), ceil(max_x), ceil(max_y)]
def gen_mask(h, w, c):
# min_x, min_y, max_x, max_y = points
mask = 255 * np.ones((h, w, c))
return mask.astype(np.uint8)
def process(root):
sub_dir = root.split('/')[-1]
name_list = read_list(root)
for name in tqdm(name_list):
# for name in name_list:
img = cv2.imread(os.path.join(root, name+'.jpg'))
h, w, c = img.shape
json_path = os.path.join(root, name+'.json')
if os.path.getsize(json_path) != 0:
# shape_dict_list = read_dict(json_path)
# points_list = parse_shapes_dict(shape_dict_list)
# points = get_seg_points(points_list)
# mask = gen_mask(points, h, w, c)
mask = gen_mask(h, w, c)
if not os.path.exists(os.path.join(anno_root, sub_dir)):
os.mkdir(os.path.join(anno_root, sub_dir))
cv2.imwrite(os.path.join(anno_root, sub_dir, name+'.jpg'), mask)
else:
print(f'{name}.json size is 0!')
def gen_csv(root):
sub_dir = root.split('/')[-1]
img_name_list = sorted(os.listdir(root))
df = pd.DataFrame(columns=['path'])
df.path = img_name_list
df.to_csv(os.path.join(csv_root, sub_dir+'.csv'))
def main():
print('processing train...')
process(os.path.join(src_root, 'train'))
# gen_csv(os.path.join(anno_root, 'train'))
print('processing val...')
process(os.path.join(src_root, 'val'))
# gen_csv(os.path.join(anno_root, 'val'))
if __name__ == '__main__':
main()
import os
from PIL import Image
import random
from tqdm import tqdm
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import multiprocessing as mp
def mkdir(path):
if not os.path.exists(path):
os.makedirs(path)
syn_root = '/home/mly/data/datasets/text_recognition/CDLA/syn'
src_root = '/home/mly/data/datasets/text_recognition/CDLA/gen/'
bg_root = '/home/mly/data/datasets/humanMatting/bg/'
src_train_root = os.path.join(src_root, 'train')
src_val_root = os.path.join(src_root, 'val')
img_train_root = os.path.join(src_train_root, 'img')
mask_train_root = os.path.join(src_train_root, 'mask')
img_val_root = os.path.join(src_val_root, 'img')
mask_val_root = os.path.join(src_val_root, 'mask')
img_train_syn_root = os.path.join(syn_root, 'train', 'img')
mask_train_syn_root = os.path.join(syn_root, 'train', 'mask')
img_val_syn_root = os.path.join(syn_root, 'val', 'img')
mask_val_syn_root = os.path.join(syn_root, 'val', 'mask')
mkdir(img_train_syn_root)
mkdir(mask_train_syn_root)
mkdir(img_val_syn_root)
mkdir(mask_val_syn_root)
def process(img_root, mask_root, gen_img_root, gen_mask_root, bg_list):
bg_lth = len(bg_list)
name_list = os.listdir(img_root)
for name in name_list:
img = Image.open(os.path.join(img_root, name)).convert('RGB')
mask = Image.open(os.path.join(mask_root, name)).convert('L')
bg_idx = random.randint(0, bg_lth - 1)
while os.path.getsize(bg_list[bg_idx]) <= 200:
bg_idx = random.randint(0, bg_lth - 1)
bg = Image.open(bg_list[bg_idx]).convert('RGB')
bg = bg.resize((2048, 2048))
width = random.randint(512, 2040)
height = random.randint(512, 2040)
img = img.resize((width, height))
mask = mask.resize((width, height))
x = random.randint(0, (2048 - width) - 1)
y = random.randint(0, (2048 - height) - 1)
bg.paste(img, (x, y), mask)
new_mask = Image.new('L', bg.size, 0)
new_mask.paste(mask, (x, y), mask)
bg.save(os.path.join(gen_img_root, name))
new_mask.save(os.path.join(gen_mask_root, name))
def main():
bg_list = [os.path.join(bg_root, bg) for bg in os.listdir(bg_root)]
process(img_train_root, mask_train_root, img_train_syn_root, mask_train_syn_root, bg_list)
process(img_val_root, mask_val_root, img_val_syn_root, mask_val_syn_root, bg_list)
if __name__ == '__main__':
main()
import os
import cv2
import random
import numpy as np
import imgaug as ia
import imgaug.augmenters as iaa
import multiprocessing as mp
def mkdir(path):
if not os.path.exists(path):
os.makedirs(path)
src_root = '/home/mly/data/datasets/text_recognition/CDLA/CDLA_DATASET_SRC/'
mask_root = '/home/mly/data/datasets/text_recognition/CDLA/CDLA_DATASET_SEG_ANNOTATIONS_FULL/'
train_img_root = os.path.join(src_root, 'train')
train_mask_root = os.path.join(mask_root, 'train')
val_img_root = os.path.join(src_root, 'val')
val_mask_root = os.path.join(mask_root, 'val')
gen_root = '/home/mly/data/datasets/text_recognition/CDLA/gen/'
gen_train_root = os.path.join(gen_root, 'train')
gen_val_root = os.path.join(gen_root, 'val')
gen_train_img_root = os.path.join(gen_train_root, 'img')
gen_train_mask_root = os.path.join(gen_train_root, 'mask')
gen_val_img_root = os.path.join(gen_val_root, 'img')
gen_val_mask_root = os.path.join(gen_val_root, 'mask')
mkdir(gen_train_img_root)
mkdir(gen_train_mask_root)
mkdir(gen_val_img_root)
mkdir(gen_val_mask_root)
transform = iaa.Sequential([
iaa.Fliplr(0.5),
iaa.Flipud(0.2),
iaa.Sometimes(0.3, iaa.CropAndPad(percent=(-0.1, 0.1), keep_size=False)),
iaa.Sometimes(0.5, iaa.GaussianBlur((0, 2.0))),
iaa.PerspectiveTransform(scale=(0.05, 0.20), keep_size=False),
iaa.ElasticTransformation(alpha=(0, 3.0), sigma=0.25),
iaa.OneOf([
iaa.Rot90((1, 3), keep_size=False),
iaa.Rotate((-30, 30))
]),
], random_order=True)
def gen(i, name_list, lth, img_root, mask_root, generate_img_root, generate_mask_root, need,):
idx = random.randint(0, lth - 1)
img = cv2.imread(os.path.join(img_root, name_list[idx]))
mask = cv2.imread(os.path.join(mask_root, name_list[idx]))
img = np.asarray(img, dtype=np.uint8)
mask = np.asarray(mask, dtype=np.uint8)
img = img[np.newaxis, :]
mask = mask[np.newaxis, :]
aug_img, aug_mask = transform(images=img, segmentation_maps=mask)
aug_img = aug_img[0]
aug_mask = aug_mask[0]
cv2.imwrite(os.path.join(generate_img_root, "{:>05d}.jpg".format(i)), aug_img)
cv2.imwrite(os.path.join(generate_mask_root, "{:>05d}.jpg".format(i)), aug_mask)
def generate(img_root, mask_root, generate_img_root, generate_mask_root, need):
# pool = mp.Pool(processes=4)
name_list = sorted(os.listdir(img_root))
lth = len(name_list)
for i in range(need):
# pool.apply_async(gen, (i, name_list, lth, img_root, mask_root, generate_img_root, generate_mask_root, need,))
gen(i, name_list, lth, img_root, mask_root, generate_img_root, generate_mask_root, need)
# pool.close()
# pool.join()
def main():
# print('processing train')
# generate(train_img_root, train_mask_root, gen_train_img_root, gen_train_mask_root, 20000)
print('processing val')
generate(val_img_root, val_mask_root, gen_val_img_root, gen_val_mask_root, 8000)
if __name__ == '__main__':
main()
import os
from PIL import Image
import random
from tqdm import tqdm
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import multiprocessing as mp
def mkdir(path):
if not os.path.exists(path):
os.makedirs(path)
syn_root = '/home/mly/data/datasets/text_recognition/CDLA/syn'
src_root = '/home/mly/data/datasets/text_recognition/CDLA/gen/'
bg_root = '/home/mly/data/datasets/humanMatting/bg/'
src_train_root = os.path.join(src_root, 'train')
src_val_root = os.path.join(src_root, 'val')
img_train_root = os.path.join(src_train_root, 'img')
mask_train_root = os.path.join(src_train_root, 'mask')
img_val_root = os.path.join(src_val_root, 'img')
mask_val_root = os.path.join(src_val_root, 'mask')
img_train_syn_root = os.path.join(syn_root, 'train', 'img')
mask_train_syn_root = os.path.join(syn_root, 'train', 'mask')
img_val_syn_root = os.path.join(syn_root, 'val', 'img')
mask_val_syn_root = os.path.join(syn_root, 'val', 'mask')
mkdir(img_train_syn_root)
mkdir(mask_train_syn_root)
mkdir(img_val_syn_root)
mkdir(mask_val_syn_root)
def iteration(name, name_list, bg_lth, img_root, mask_root, gen_img_root, gen_mask_root, bg_list):
try:
img = Image.open(os.path.join(img_root, name)).convert('RGB')
mask = Image.open(os.path.join(mask_root, name)).convert('L')
bg_idx = random.randint(0, bg_lth - 1)
while os.path.getsize(bg_list[bg_idx]) <= 200:
bg_idx = random.randint(0, bg_lth - 1)
bg = Image.open(bg_list[bg_idx]).convert('RGB')
bg = bg.resize((2048, 2048))
width = random.randint(512, 2040)
height = random.randint(512, 2040)
img = img.resize((width, height))
mask = mask.resize((width, height))
x = random.randint(0, (2048 - width) - 1)
y = random.randint(0, (2048 - height) - 1)
bg.paste(img, (x, y), mask)
new_mask = Image.new('L', bg.size, 0)
new_mask.paste(mask, (x, y), mask)
except Exception:
print(f'error on name')
pass
bg.save(os.path.join(gen_img_root, name))
new_mask.save(os.path.join(gen_mask_root, name))
def process(img_root, mask_root, gen_img_root, gen_mask_root, bg_list, need):
bg_lth = len(bg_list)
name_list = os.listdir(img_root)
name_list_lth = len(name_list)
pool = mp.Pool(processes=4)
# for name in name_list:
# iteration(name, name_list, bg_lth, img_root, mask_root, gen_img_root, gen_mask_root, bg_list)
for _ in range(need):
name_list_idx = random.randint(0, name_list_lth - 1)
name = name_list[name_list_idx]
pool.apply_async(iteration, (name, name_list, bg_lth, img_root, mask_root, gen_img_root, gen_mask_root, bg_list, ))
pool.close()
pool.join()
def main():
bg_list = [os.path.join(bg_root, bg) for bg in os.listdir(bg_root)]
# print('start train')
# process(img_train_root, mask_train_root, img_train_syn_root, mask_train_syn_root, bg_list, need=30000)
# print('end train')
# print('start val')
process(img_val_root, mask_val_root, img_val_syn_root, mask_val_syn_root, bg_list, need=8000)
print('end val')
if __name__ == '__main__':
main()
This file is too large to display.
import matplotlib.pyplot as plt
import numpy as np
import random
import scipy.spatial.qhull as qhull
import math
import cv2
class BasePerturbed(object):
# d = np.abs(sk_normalize(d, norm='l2'))
def get_normalize(self, d):
E = np.mean(d)
std = np.std(d)
d = (d-E)/std
# d = preprocessing.normalize(d, norm='l2')
return d
def get_0_1_d(self, d, new_max=1, new_min=0):
d_min = np.min(d)
d_max = np.max(d)
d = ((d-d_min)/(d_max-d_min))*(new_max-new_min)+new_min
return d
def draw_distance_hotmap(self, distance_vertex_line):
plt.matshow(distance_vertex_line, cmap='autumn')
plt.colorbar()
plt.show()
def get_pixel(self, p, origin_img):
try:
return origin_img[p[0], p[1]]
except:
# print('out !')
return np.array([257, 257, 257])
def nearest_neighbor_interpolation(self, xy, new_origin_img):
# xy = np.around(xy_).astype(np.int)
origin_pixel = self.get_pixel([xy[0], xy[1]], new_origin_img)
if (origin_pixel == 256).all():
return origin_pixel, False
return origin_pixel, True
def bilinear_interpolation(self, xy_, new_origin_img):
xy_int = [int(xy_[0]), int(xy_[1])]
xy_decimal = [round(xy_[0] - xy_int[0], 5), round(xy_[1] - xy_int[1], 5)]
x0_y0 = (1 - xy_decimal[0]) * (1 - xy_decimal[1]) * self.get_pixel([xy_int[0], xy_int[1]], new_origin_img)
x0_y1 = (1 - xy_decimal[0]) * (xy_decimal[1]) * self.get_pixel([xy_int[0], xy_int[1] + 1], new_origin_img)
x1_y0 = (xy_decimal[0]) * (1 - xy_decimal[1]) * self.get_pixel([xy_int[0] + 1, xy_int[1]], new_origin_img)
x1_y1 = (xy_decimal[0]) * (xy_decimal[1]) * self.get_pixel([xy_int[0] + 1, xy_int[1] + 1], new_origin_img)
return x0_y0, x0_y1, x1_y0, x1_y1
def get_coor(self, p, origin_label):
try:
return origin_label[p[0], p[1]]
except:
# print('out !')
return np.array([0, 0])
def bilinear_interpolation_coordinate_v4(self, xy_, new_origin_img):
xy_int = [int(xy_[0]), int(xy_[1])]
xy_decimal = [round(xy_[0] - xy_int[0], 5), round(xy_[1] - xy_int[1], 5)]
x_y_i = 0
x0, x1, x2, x3 = 0, 0, 0, 0
y0, y1, y2, y3 = 0, 0, 0, 0
x0_y0 = self.get_coor(np.array([xy_int[0], xy_int[1]]), new_origin_img)
x0_y1 = self.get_coor(np.array([xy_int[0], xy_int[1]+1]), new_origin_img)
x1_y0 = self.get_coor(np.array([xy_int[0]+1, xy_int[1]]), new_origin_img)
x1_y1 = self.get_coor(np.array([xy_int[0]+1, xy_int[1]+1]), new_origin_img)
if x0_y0[0] != 0:
x0 = (1 - xy_decimal[0])
if x0_y1[0] != 0:
x1 = (1 - xy_decimal[0])
if x1_y0[0] != 0:
x2 = (xy_decimal[0])
if x1_y1[0] != 0:
x3 = (xy_decimal[0])
if x0_y0[1] != 0:
y0 = (1 - xy_decimal[1])
if x0_y1[1] != 0:
y1 = (xy_decimal[1])
if x1_y0[1] != 0:
y2 = (1 - xy_decimal[1])
if x1_y1[1] != 0:
y3 = (xy_decimal[1])
x_ = x0+x1+x2+x3
if x_ == 0:
x = 0
else:
x = x0/x_*x0_y0[0]+x1/x_*x0_y1[0]+x2/x_*x1_y0[0]+x3/x_*x1_y1[0]
y_ = y0+y1+y2+y3
if y_ == 0:
y = 0
else:
y = y0/y_*x0_y0[1]+y1/y_*x0_y1[1]+y2/y_*x1_y0[1]+y3/y_*x1_y1[1]
return np.array([x, y])
def is_perform(self, execution, inexecution):
return random.choices([True, False], weights=[execution, inexecution])[0]
def get_margin_scale(self, min_, max_, clip_add_margin, new_shape):
if clip_add_margin < 0:
# raise Exception('add margin error')
return -1, -1
if min_-clip_add_margin//2 > 0 and max_+clip_add_margin//2 < new_shape:
if clip_add_margin%2 == 0:
clip_subtract_margin, clip_plus_margin = clip_add_margin//2, clip_add_margin//2
else:
clip_subtract_margin, clip_plus_margin = clip_add_margin//2, clip_add_margin//2+1
elif min_-clip_add_margin//2 < 0 and max_+clip_add_margin//2 <= new_shape:
clip_subtract_margin = min_
clip_plus_margin = clip_add_margin-clip_subtract_margin
elif max_+clip_add_margin//2 > new_shape and min_-clip_add_margin//2 >= 0:
clip_plus_margin = new_shape-max_
clip_subtract_margin = clip_add_margin-clip_plus_margin
else:
# raise Exception('add margin error')
return -1, -1
return clip_subtract_margin, clip_plus_margin
# class perturbedCurveImg(object):
# def __init__(self):
def adjust_position(self, x_min, y_min, x_max, y_max):
if (self.new_shape[0] - (x_max - x_min)) % 2 == 0:
f_g_0_0 = (self.new_shape[0] - (x_max - x_min)) // 2
f_g_0_1 = f_g_0_0
else:
f_g_0_0 = (self.new_shape[0] - (x_max - x_min)) // 2
f_g_0_1 = f_g_0_0 + 1
if (self.new_shape[1] - (y_max - y_min)) % 2 == 0:
f_g_1_0 = (self.new_shape[1] - (y_max - y_min)) // 2
f_g_1_1 = f_g_1_0
else:
f_g_1_0 = (self.new_shape[1] - (y_max - y_min)) // 2
f_g_1_1 = f_g_1_0 + 1
# return f_g_0_0, f_g_0_1, f_g_1_0, f_g_1_1
return f_g_0_0, f_g_1_0, self.new_shape[0] - f_g_0_1, self.new_shape[1] - f_g_1_1
def adjust_position_v2(self, x_min, y_min, x_max, y_max, new_shape):
if (new_shape[0] - (x_max - x_min)) % 2 == 0:
f_g_0_0 = (new_shape[0] - (x_max - x_min)) // 2
f_g_0_1 = f_g_0_0
else:
f_g_0_0 = (new_shape[0] - (x_max - x_min)) // 2
f_g_0_1 = f_g_0_0 + 1
if (new_shape[1] - (y_max - y_min)) % 2 == 0:
f_g_1_0 = (new_shape[1] - (y_max - y_min)) // 2
f_g_1_1 = f_g_1_0
else:
f_g_1_0 = (new_shape[1] - (y_max - y_min)) // 2
f_g_1_1 = f_g_1_0 + 1
# return f_g_0_0, f_g_0_1, f_g_1_0, f_g_1_1
return f_g_0_0, f_g_1_0, new_shape[0] - f_g_0_1, new_shape[1] - f_g_1_1
def adjust_border(self, x_min, y_min, x_max, y_max, x_min_new, y_min_new, x_max_new, y_max_new):
if ((x_max - x_min) - (x_max_new - x_min_new)) % 2 == 0:
f_g_0_0 = ((x_max - x_min) - (x_max_new - x_min_new)) // 2
f_g_0_1 = f_g_0_0
else:
f_g_0_0 = ((x_max - x_min) - (x_max_new - x_min_new)) // 2
f_g_0_1 = f_g_0_0 + 1
if ((y_max - y_min) - (y_max_new - y_min_new)) % 2 == 0:
f_g_1_0 = ((y_max - y_min) - (y_max_new - y_min_new)) // 2
f_g_1_1 = f_g_1_0
else:
f_g_1_0 = ((y_max - y_min) - (y_max_new - y_min_new)) // 2
f_g_1_1 = f_g_1_0 + 1
return f_g_0_0, f_g_0_1, f_g_1_0, f_g_1_1
def interp_weights(self, xyz, uvw):
tri = qhull.Delaunay(xyz)
simplex = tri.find_simplex(uvw)
vertices = np.take(tri.simplices, simplex, axis=0)
# pixel_triangle = pixel[tri.simplices]
temp = np.take(tri.transform, simplex, axis=0)
delta = uvw - temp[:, 2]
bary = np.einsum('njk,nk->nj', temp[:, :2, :], delta)
return vertices, np.hstack((bary, 1 - bary.sum(axis=1, keepdims=True)))
def interpolate(self, values, vtx, wts):
return np.einsum('njk,nj->nk', np.take(values, vtx, axis=0), wts)
def pad(self, synthesis_perturbed_img_map, x_min, y_min, x_max, y_max):
synthesis_perturbed_img_map[x_min - 1, y_min:y_max] = synthesis_perturbed_img_map[x_min, y_min:y_max]
synthesis_perturbed_img_map[x_max + 1, y_min:y_max] = synthesis_perturbed_img_map[x_max, y_min:y_max]
synthesis_perturbed_img_map[x_min:x_max, y_min - 1] = synthesis_perturbed_img_map[x_min:x_max, y_min - 1]
synthesis_perturbed_img_map[x_min:x_max, y_max + 1] = synthesis_perturbed_img_map[x_min:x_max, y_max + 1]
synthesis_perturbed_img_map[x_min - 1, y_min - 1] = synthesis_perturbed_img_map[x_min, y_min]
synthesis_perturbed_img_map[x_min - 1, y_max + 1] = synthesis_perturbed_img_map[x_min, y_max]
synthesis_perturbed_img_map[x_max + 1, y_min - 1] = synthesis_perturbed_img_map[x_max, y_min]
synthesis_perturbed_img_map[x_max + 1, y_max + 1] = synthesis_perturbed_img_map[x_max, y_max]
return synthesis_perturbed_img_map
def isSavePerturbed(self, synthesis_perturbed_img, new_shape):
if np.sum(synthesis_perturbed_img[:, 0]) != 771 * new_shape[0] or np.sum(synthesis_perturbed_img[:, new_shape[1] - 1]) != 771 * new_shape[0] or \
np.sum(synthesis_perturbed_img[0, :]) != 771 * new_shape[1] or np.sum(synthesis_perturbed_img[new_shape[0] - 1, :]) != 771 * new_shape[1]:
# raise Exception('clip error')
return False
else:
return True
def get_angle(self, A, o, B):
v1 = o-A
v2 = o-B
return np.arccos((v1 @ v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)))*180/np.pi
def get_angle_4(self, pts):
a0_ = self.get_angle(pts[2], pts[0], pts[1])
a1_ = self.get_angle(pts[0], pts[1], pts[3])
a2_ = self.get_angle(pts[3], pts[2], pts[0])
a3_ = self.get_angle(pts[1], pts[3], pts[2])
return a0_, a1_, a2_, a3_
def HSV_v1(self, synthesis_perturbed_img_clip_HSV):
synthesis_perturbed_img_clip_HSV = cv2.cvtColor(synthesis_perturbed_img_clip_HSV, cv2.COLOR_RGB2HSV)
img_h = synthesis_perturbed_img_clip_HSV[:, :, 0].copy()
# img_s = synthesis_perturbed_img_clip_HSV[:, :, 1].copy()
img_v = synthesis_perturbed_img_clip_HSV[:, :, 2].copy()
if self.is_perform(0.2, 0.8):
img_h = (img_h + (random.random()-0.5) * 360) % 360 # img_h = np.minimum(np.maximum(img_h+20, 0), 360)
else:
img_h = (img_h + (random.random()-0.5) * 40) % 360
# img_s = np.minimum(np.maximum(img_s-0.2, 0), 1)
img_v = np.minimum(np.maximum(img_v + (random.random()-0.5)*60, 0), 255)
# img_v = cv2.equalizeHist(img_v.astype(np.uint8))
synthesis_perturbed_img_clip_HSV[:, :, 0] = img_h
# synthesis_perturbed_img_clip_HSV[:, :, 1] = img_s
synthesis_perturbed_img_clip_HSV[:, :, 2] = img_v
synthesis_perturbed_img_clip_HSV = cv2.cvtColor(synthesis_perturbed_img_clip_HSV, cv2.COLOR_HSV2RGB)
return synthesis_perturbed_img_clip_HSV
\ No newline at end of file
This file is too large to display.
INFO:root:==============EPOCH 1 START================
INFO:root:epoch: 1/20, iteration: 1/62, loss: 1.2849
INFO:root:epoch: 1/20, iteration: 50/62, loss: 0.1668
INFO:root:EPOCH: 1/20, MIOU: 0.7167
INFO:root:VAL MIOU: 0.7908
INFO:root:==============EPOCH 2 START================
INFO:root:epoch: 2/20, iteration: 1/62, loss: 0.2679
INFO:root:epoch: 2/20, iteration: 50/62, loss: 0.1657
INFO:root:EPOCH: 2/20, MIOU: 0.8125
INFO:root:VAL MIOU: 0.7849
INFO:root:==============EPOCH 3 START================
INFO:root:epoch: 3/20, iteration: 1/62, loss: 0.7276
INFO:root:epoch: 3/20, iteration: 50/62, loss: 0.1731
INFO:root:EPOCH: 3/20, MIOU: 0.8486
INFO:root:VAL MIOU: 0.8699
INFO:root:==============EPOCH 4 START================
INFO:root:epoch: 4/20, iteration: 1/62, loss: 0.1249
INFO:root:epoch: 4/20, iteration: 50/62, loss: 0.1250
INFO:root:EPOCH: 4/20, MIOU: 0.8872
INFO:root:VAL MIOU: 0.8806
INFO:root:==============EPOCH 5 START================
INFO:root:epoch: 5/20, iteration: 1/62, loss: 0.1404
INFO:root:epoch: 5/20, iteration: 50/62, loss: 0.0660
INFO:root:EPOCH: 5/20, MIOU: 0.9052
INFO:root:VAL MIOU: 0.9054
INFO:root:==============EPOCH 6 START================
INFO:root:epoch: 6/20, iteration: 1/62, loss: 0.0706
INFO:root:epoch: 6/20, iteration: 50/62, loss: 0.0513
INFO:root:EPOCH: 6/20, MIOU: 0.9215
INFO:root:VAL MIOU: 0.9121
INFO:root:==============EPOCH 7 START================
INFO:root:epoch: 7/20, iteration: 1/62, loss: 0.0613
INFO:root:epoch: 7/20, iteration: 50/62, loss: 0.0654
INFO:root:EPOCH: 7/20, MIOU: 0.9178
INFO:root:VAL MIOU: 0.9116
INFO:root:==============EPOCH 8 START================
INFO:root:epoch: 8/20, iteration: 1/62, loss: 0.1080
INFO:root:epoch: 8/20, iteration: 50/62, loss: 0.0584
INFO:root:EPOCH: 8/20, MIOU: 0.9295
INFO:root:VAL MIOU: 0.9220
INFO:root:==============EPOCH 9 START================
INFO:root:epoch: 9/20, iteration: 1/62, loss: 0.1335
INFO:root:epoch: 9/20, iteration: 50/62, loss: 0.0744
INFO:root:EPOCH: 9/20, MIOU: 0.9265
INFO:root:VAL MIOU: 0.9245
INFO:root:==============EPOCH 10 START================
INFO:root:epoch: 10/20, iteration: 1/62, loss: 0.0387
INFO:root:epoch: 10/20, iteration: 50/62, loss: 0.0886
INFO:root:EPOCH: 10/20, MIOU: 0.9320
INFO:root:VAL MIOU: 0.9291
INFO:root:==============EPOCH 11 START================
INFO:root:epoch: 11/20, iteration: 1/62, loss: 0.0704
INFO:root:epoch: 11/20, iteration: 50/62, loss: 0.0589
INFO:root:EPOCH: 11/20, MIOU: 0.9380
INFO:root:VAL MIOU: 0.9296
INFO:root:==============EPOCH 12 START================
INFO:root:epoch: 12/20, iteration: 1/62, loss: 0.1132
INFO:root:epoch: 12/20, iteration: 50/62, loss: 0.0605
INFO:root:EPOCH: 12/20, MIOU: 0.9407
INFO:root:VAL MIOU: 0.9315
INFO:root:==============EPOCH 13 START================
INFO:root:epoch: 13/20, iteration: 1/62, loss: 0.0531
INFO:root:epoch: 13/20, iteration: 50/62, loss: 0.0633
INFO:root:EPOCH: 13/20, MIOU: 0.9377
INFO:root:VAL MIOU: 0.9275
INFO:root:==============EPOCH 14 START================
INFO:root:epoch: 14/20, iteration: 1/62, loss: 0.1511
INFO:root:epoch: 14/20, iteration: 50/62, loss: 0.1535
INFO:root:EPOCH: 14/20, MIOU: 0.9359
INFO:root:VAL MIOU: 0.9291
INFO:root:==============EPOCH 15 START================
INFO:root:epoch: 15/20, iteration: 1/62, loss: 0.1025
INFO:root:epoch: 15/20, iteration: 50/62, loss: 0.0960
INFO:root:EPOCH: 15/20, MIOU: 0.9389
INFO:root:VAL MIOU: 0.9315
INFO:root:==============EPOCH 16 START================
INFO:root:epoch: 16/20, iteration: 1/62, loss: 0.0281
INFO:root:epoch: 16/20, iteration: 50/62, loss: 0.0842
INFO:root:EPOCH: 16/20, MIOU: 0.9372
INFO:root:VAL MIOU: 0.9330
INFO:root:==============EPOCH 17 START================
INFO:root:epoch: 17/20, iteration: 1/62, loss: 0.0594
INFO:root:epoch: 17/20, iteration: 50/62, loss: 0.0577
INFO:root:EPOCH: 17/20, MIOU: 0.9403
INFO:root:VAL MIOU: 0.9332
INFO:root:==============EPOCH 18 START================
INFO:root:epoch: 18/20, iteration: 1/62, loss: 0.0584
INFO:root:epoch: 18/20, iteration: 50/62, loss: 0.0393
INFO:root:EPOCH: 18/20, MIOU: 0.9422
INFO:root:VAL MIOU: 0.9325
INFO:root:==============EPOCH 19 START================
INFO:root:epoch: 19/20, iteration: 1/62, loss: 0.0585
INFO:root:epoch: 19/20, iteration: 50/62, loss: 0.1805
INFO:root:EPOCH: 19/20, MIOU: 0.9418
INFO:root:VAL MIOU: 0.9342
INFO:root:==============EPOCH 20 START================
INFO:root:epoch: 20/20, iteration: 1/62, loss: 0.0346
INFO:root:epoch: 20/20, iteration: 50/62, loss: 0.0627
INFO:root:EPOCH: 20/20, MIOU: 0.9454
INFO:root:VAL MIOU: 0.9336
from .solver import *
No preview for this file type
No preview for this file type
import torch
import torchvision
from torch import nn
class HED_vgg16(nn.Module):
def __init__(self, num_filters=32, pretrained=False, class_number=2):
# Here is the function part, with no braces ()
super().__init__()
encoder = torchvision.models.vgg16(pretrained=pretrained).features
self.pool = nn.MaxPool2d(2, 2)
self.conv1 = encoder[0:4]
self.score1 = nn.Sequential(nn.Conv2d(num_filters * 2, 1, 1, 1), nn.ReLU(inplace=True)) # 256*256
self.conv2 = encoder[5:9]
self.d_conv2 = nn.Sequential(nn.Conv2d(num_filters * 4, 1, 1, 1), nn.ReLU(inplace=True)) # 128*128
self.score2 = nn.UpsamplingBilinear2d(scale_factor=2) # 256*256
self.conv3 = encoder[10:16]
self.d_conv3 = nn.Sequential(nn.Conv2d(num_filters * 8, 1, 1, 1), nn.ReLU(inplace=True)) # 64*64
self.score3 = nn.UpsamplingBilinear2d(scale_factor=4) # 256*256
self.conv4 = encoder[17:23]
self.d_conv4 = nn.Sequential(nn.Conv2d(num_filters * 16, 1, 1, 1), nn.ReLU(inplace=True)) # 32*32
self.score4 = nn.UpsamplingBilinear2d(scale_factor=8) # 256*256
self.conv5 = encoder[24:30]
self.d_conv5 = nn.Sequential(nn.Conv2d(num_filters * 16, 1, 1, 1), nn.ReLU(inplace=True)) # 16*16
self.score5 = nn.UpsamplingBilinear2d(scale_factor=16) # 256*256
self.score = nn.Conv2d(5, class_number, 1, 1) # No relu
def forward(self, x):
# Here is the part that calculates the return value
x = self.conv1(x)
s1 = self.score1(x)
x = self.pool(x)
x = self.conv2(x)
s_x = self.d_conv2(x)
s2 = self.score2(s_x)
x = self.pool(x)
x = self.conv3(x)
s_x = self.d_conv3(x)
s3 = self.score3(s_x)
x = self.pool(x)
x = self.conv3(x)
s_x = self.d_conv4(x)
s4 = self.score4(s_x)
x = self.pool(x)
x = self.conv5(x)
s_x = self.d_conv5(x)
s5 = self.score5(s_x)
score = self.score(torch.cat((s1, s2, s3, s4, s5), dim=1))
return score
class HED_res34(nn.Module):
def __init__(self, num_filters=32, pretrained=False, class_number=2):
super().__init__()
encoder = torchvision.models.resnet34(pretrained=pretrained)
self.pool = nn.MaxPool2d(3, 2, 1)
# start
self.start = nn.Sequential(encoder.conv1, encoder.bn1, encoder.relu) # 128*128
self.d_convs = nn.Sequential(nn.Conv2d(num_filters * 2, 1, 1, 1), nn.ReLU(inplace=True))
self.scores = nn.UpsamplingBilinear2d(scale_factor=2) # 256*256
self.layer1 = encoder.layer1 # 64*64
self.d_conv1 = nn.Sequential(nn.Conv2d(num_filters * 2, 1, 1, 1), nn.ReLU(inplace=True))
self.score1 = nn.UpsamplingBilinear2d(scale_factor=4) # 256*256
self.layer2 = encoder.layer2 # 32*32
self.d_conv2 = nn.Sequential(nn.Conv2d(num_filters * 4, 1, 1, 1), nn.ReLU(inplace=True))
self.score2 = nn.UpsamplingBilinear2d(scale_factor=8) # 256*256
self.layer3 = encoder.layer3 # 16*16
self.d_conv3 = nn.Sequential(nn.Conv2d(num_filters * 8, 1, 1, 1), nn.ReLU(inplace=True))
self.score3 = nn.UpsamplingBilinear2d(scale_factor=16) # 256*256
self.layer4 = encoder.layer4 # 8*8
self.d_conv4 = nn.Sequential(nn.Conv2d(num_filters * 16, 1, 1, 1), nn.ReLU(inplace=True))
self.score4 = nn.UpsamplingBilinear2d(scale_factor=32) # 256*256
self.score = nn.Conv2d(5, class_number, 1, 1) # No relu loss_func has softmax
def forward(self, x):
x = self.start(x)
s_x = self.d_convs(x)
ss = self.scores(s_x)
x = self.pool(x)
x = self.layer1(x)
s_x = self.d_conv1(x)
s1 = self.score1(s_x)
x = self.layer2(x)
s_x = self.d_conv2(x)
s2 = self.score2(s_x)
x = self.layer3(x)
s_x = self.d_conv3(x)
s3 = self.score3(s_x)
x = self.layer4(x)
s_x = self.d_conv4(x)
s4 = self.score4(s_x)
score = self.score(torch.cat((s1, s2, s3, s4, ss), dim=1))
return score
def cross_entropy_loss_RCF(prediction, labelf, beta=1.1):
label = labelf.long()
mask = labelf.clone()
num_positive = torch.sum(label == 1).float()
num_negative = torch.sum(label == 0).float()
mask[label == 1] = 1.0 * num_negative / (num_positive + num_negative)
mask[label == 0] = beta * num_positive / (num_positive + num_negative)
mask[label == 2] = 0
cost = F.binary_cross_entropy(
prediction, labelf, weight=mask, reduction='sum')
return cost
if __name__ == '__main__':
model = HED_res34()
total = sum([param.nelement() for param in model.parameters()])
print(total / 1e6)
del model
del total
model = HED_vgg16()
total = sum([param.nelement() for param in model.parameters()])
print(total / 1e6)
del model
del total
import torch
import torch.nn as nn
import os
import logging
from model.hed import *
from data.edge_loader import get_loader
from torch.optim import Adam, lr_scheduler
import torch.nn.functional as F
import cv2
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
logging.basicConfig(level=logging.INFO,
filename='/home/lxl/work/ocr/documentSeg/log/logfile/hed_edge_detection.log',
filemode='a')
class Solver:
def __init__(self):
self.train_loader, self.test_loader = get_loader()
self.len_train_loader = len(self.train_loader)
self.len_test_loader = len(self.test_loader)
self.model = HED_res34()
self.model = nn.DataParallel(self.model)
self.model = self.model.cuda()
self.epoch = 10
self.lr = 0.00005
self.weight_decay = 0.00001
self.save_dir = '/home/lxl/work/ocr/documentSeg/log/checkpoint/hed_edge_detection'
if not os.path.exists(self.save_dir):
os.makedirs(self.save_dir)
self.optimizer = Adam(self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay)
def train(self):
lr_decay = lr_scheduler.StepLR(self.optimizer, step_size=1, gamma=0.5)
for t in range(self.epoch):
logging.info("==============EPOCH {} START================".format(t + 1))
self.model.train()
for i, (x, y) in enumerate(self.train_loader):
x = x.cuda()
y = y.cuda()
self.optimizer.zero_grad()
pred = self.model(x)
loss = cross_entropy_loss_RCF(pred, y.type(torch.int64))
iteration = i + 1
if iteration % 50 == 0 or iteration == 1:
logging.info(
'epoch: {}/{}, iteration: {}/{}, loss: {:.4f}'.format(
t + 1, self.epoch, iteration, self.len_train_loader, loss))
loss.backward()
self.optimizer.step()
lr_decay.step()
logging.info('EPOCH: {}/{}, '.format(t + 1, self.epoch, ))
self.model.eval()
torch.save(self.model.module.state_dict(), '%s/ckpt_epoch_%s.pt' % (self.save_dir, str(t + 1)))
self.test()
@torch.no_grad()
def test(self):
self.model.eval()
for i, (x, y) in enumerate(self.test_loader):
x = x.cuda()
y = y.cuda()
pred = self.model(x)
logging.info('VAL '.format())
import torch
import torch.nn as nn
from segmentation_models_pytorch.losses import SoftCrossEntropyLoss
import segmentation_models_pytorch as smp
import os
import logging
from data.finetune_loader import *
from torch.optim import Adam, lr_scheduler
import torch.nn.functional as F
import cv2
import imutils
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
logging.basicConfig(level=logging.INFO,
filename='/home/lxl/work/ocr/documentSeg/log/logfile/finetune_random_crop.log',
filemode='a')
def calc_iou(inputs, targets):
inputs = inputs.detach().cpu()
targets = targets.detach().cpu()
inputs = inputs.view(-1)
targets = targets.view(-1)
intersection = (inputs * targets).sum()
total = (inputs + targets).sum()
union = total - intersection
IoU = intersection / union
del inputs, targets
return IoU
class BCEDiceLoss(nn.Module):
def __init__(self, weight=None, size_average=True):
super().__init__()
def forward(self, input, target):
pred = input.view(-1)
truth = target.view(-1)
# BCE loss
bce_loss = nn.BCELoss()(pred, truth).double()
# Dice Loss
dice_coef = (2.0 * (pred * truth).double().sum() + 1) / (
pred.double().sum() + truth.double().sum() + 1
)
return bce_loss + (1 - dice_coef)
class Solver:
def __init__(self):
self.train_loader, self.test_loader = get_loader()
self.len_train_loader = len(self.train_loader)
self.len_test_loader = len(self.test_loader)
self.model = smp.DeepLabV3Plus(
encoder_name='resnet50',
encoder_weights='imagenet',
in_channels=3,
classes=1,
activation='sigmoid'
)
self.save_dir = '/home/lxl/work/ocr/documentSeg/log/checkpoint/finetune_random_crop/'
self.model = nn.DataParallel(self.model)
self.model = self.model.cuda()
self.epoch = 20
self.lr = 0.0008
self.weight_decay = 0.00001
if not os.path.exists(self.save_dir):
os.makedirs(self.save_dir)
self.optimizer = Adam(self.model.parameters(), lr=self.lr, weight_decay=self.weight_decay)
self.criterion = BCEDiceLoss().cuda()
def train(self):
lr_decay = lr_scheduler.StepLR(self.optimizer, step_size=2, gamma=0.5)
for t in range(self.epoch):
logging.info("==============EPOCH {} START================".format(t + 1))
self.model.train()
all_iou = 0.0
for i, (x, y) in enumerate(self.train_loader):
x = x.cuda()
y = y.cuda()
self.optimizer.zero_grad()
pred = self.model(x)
loss = self.criterion(pred, y)
iou = calc_iou(pred, y)
all_iou += iou
iteration = i + 1
if iteration % 50 == 0 or iteration == 1:
logging.info(
'epoch: {}/{}, iteration: {}/{}, loss: {:.4f}'.format(
t + 1, self.epoch, iteration, self.len_train_loader, loss.item()))
loss.backward()
self.optimizer.step()
lr_decay.step()
miou = all_iou / self.len_train_loader
logging.info('EPOCH: {}/{}, MIOU: {:.4f}'.format(t + 1, self.epoch, miou))
self.model.eval()
torch.save(self.model.module.state_dict(), '%s/ckpt_epoch_%s.pt' % (self.save_dir, str(t + 1)))
self.val()
@torch.no_grad()
def val(self):
self.model.eval()
all_iou = 0.0
for i, (x, y) in enumerate(self.test_loader):
x = x.cuda()
y = y.cuda()
pred = self.model(x)
iou = calc_iou(pred, y)
all_iou += iou
miou = all_iou / self.len_test_loader
logging.info('VAL MIOU: {:.4f}'.format(miou))
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import torch
import torch.nn as nn
import albumentations as A
import segmentation_models_pytorch as smp
import os
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
root = '/home/mly/data/datasets/text_recognition/finetune/src/'
img_root = os.path.join(root, 'img')
df = pd.read_csv(os.path.join(root, 'test.csv'))
visual_root = os.path.join(root, 'visual')
def load_model(ckpt_path):
model = smp.DeepLabV3Plus(
encoder_name='resnet50',
encoder_weights='imagenet',
in_channels=3,
classes=1,
activation='sigmoid'
)
model.load_state_dict(torch.load(ckpt_path))
model.eval()
return model
def infer(model, img):
h, w = img.shape[:2]
transform = A.Compose([
A.Resize(512, 512),
A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
augmented = transform(image=img)
img = augmented['image']
img = np.transpose(img, (2, 0, 1))
img = torch.from_numpy(img).unsqueeze(0)
img = img.to(torch.float32)
out = model(img)
out = out.squeeze(0).detach().cpu().numpy().transpose((1, 2, 0))
threshold = 0.1
out[out >= threshold] = 1.
out[out < threshold] = 0.
out = np.uint8(out * 255)
out = cv2.resize(out, (w, h))
return out
def main():
model = load_model('./log/checkpoint/finetune_random_crop/ckpt_epoch_20.pt')
name_list = df.path.to_list()
for name in tqdm(name_list):
img = cv2.imread(os.path.join(img_root, name))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
mask = infer(model, img)
cv2.imwrite(os.path.join(visual_root, name), mask)
if __name__ == '__main__':
main()
from model import *
if __name__ == '__main__':
solver = Solver()
solver.train()
nohup python -u run.py &
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!