Example Usage
Here some example code can be found to see how to use the GOOSE Dataset in a custom application. Everything can be tested in the given Jupyter Notebook link
2D Image Segmentation
Workspace Preparation
In this guide, we use the SuperGradients [1] to conveniently load and evaluate different models. You can use either conda or venv to create a new virtual environment with the following dependencies:
name: sg_env
channels:
- pytorch
- nvidia
dependencies:
- python==3.9
- pytorch==1.13.1
- torchvision==0.14.1
- pytorch-cuda==11.7
- pip
- pip:
- super_gradients==3.2.0
- matplotlib
- pillow
- numpy
- torchmetrics==0.8.0
Save the requirements as file and create the environment (e.g. using conda env create -f env.yaml
). Then activate the environment (e.g. using conda activate sg_env
) and you are good to go.
Pytorch Dataset
The GOOSE Dataset is divided into three subcategories: train, test and validation. The first step is to read the data from the dataset folder.
In the following example this is achieved in two main steps:
- Parse images from root folder into (three) python dictionaries with images paths and information.
- Create Pytorch Dataset objects to load the images and use them to train models or inference.
Additionally, the dataset also has a mapping CSV File which contains information about the classes such as label id, class name or whether the class has instances or not (thing or stuff).
Parsing and Reading Data
def __check_labels(img_path: str, lbl_path: str) -> bool:
'''
Check if pair of labels and images exist. Filter non-existing pairs.
'''
name = os.path.basename(img_path)
name, ext = name.split('.')
name = name.split('_')[:-2]
name = '_'.join(name)
names = []
for l in ['color', 'instanceids', 'labelids']:
# Check if label exists
lbl_name = name + '_' + l + '.' + ext
if not os.path.exists(os.path.join(lbl_path, lbl_name)):
return False, None
names.append(lbl_name)
return True, names
def __goose_datadict_folder(img_path: str, lbl_path: str):
'''
Create a data Dictionary with image paths
'''
subfolders = glob.glob(os.path.join(img_path, '*/'), recursive = False)
subfolders = [f.split('/')[-2] for f in subfolders]
valid_imgs = []
valid_lbls = []
valid_insta= []
valid_color= []
datadict = []
for s in tqdm.tqdm(subfolders):
imgs_p = os.path.join(img_path, s)
lbls_p = os.path.join(lbl_path, s)
imgs = glob.glob(os.path.join(imgs_p, '*.png'))
for i in imgs:
valid, lbl_names = __check_labels(i, lbls_p)
if not valid:
continue
valid_imgs.append(i)
valid_color.append(os.path.join(lbls_p, lbl_names[0]))
valid_insta.append(os.path.join(lbls_p, lbl_names[1]))
valid_lbls.append(os.path.join(lbls_p, lbl_names[2]))
for i,m,p,c in zip(valid_imgs, valid_lbls, valid_insta, valid_color):
datadict.append({
'img_path': i,
'semantic_path': m,
'instance_path':p,
'color_path': c,
})
return datadict
def goose_create_dataDict(src_path: str, mapping_csv_name: str = 'goose_label_mapping.csv') -> Dict:
'''
Parameters:
src_path : path to dataset
Returns:
datadict_train : dict with the dataset train images information
datadict_val : dict with the dataset validation images information
datadict_test : dict with the dataset test images information
'''
if mapping_csv_name is not None:
mapping_path = os.path.join(src_path, mapping_csv_name)
mapping = []
with open(mapping_path, newline='') as f:
reader = csv.DictReader(f)
for r in reader:
mapping.append(r)
else:
mapping = None
img_path = os.path.join(src_path, 'images')
lbl_path = os.path.join(src_path, 'labels')
datadicts = []
for c in ['test', 'train', 'val']:
print("### " + c.capitalize() + " Data ###")
datadicts.append(
__goose_datadict_folder(
os.path.join(img_path, c),
os.path.join(lbl_path, c)
)
)
test,train,val = datadicts
return test,train,val, mapping
Dataset Module
This Dataset class is specific for semantic segmentation and performs and square crop and resize of the images. It can be used as any other Dataset object in Pytorch to train a model.
class GOOSE_SemanticDataset(Dataset):
"""
Example Pytorch Dataset Module for semantic tasks with GOOSE.
"""
def __init__(self, dataset_dict: List[Dict], crop: bool = True, resize_size: Iterable[int] = None):
'''
Parameters:
dataset_dict [Iter] : List of Dicts with the images information generated by *goose_create_dataDict*
crop [Bool] : Whether to make a square crop of the images or not
resize_size [Iter] : List with the target resize size of the images (After the crop if crop == True)
'''
self.dataset_dict = dataset_dict
self.transforms = transforms.Compose([
transforms.ToTensor(),
])
self.resize_size = resize_size
self.crop = crop
def preprocess(self, image):
if image is None:
return None
if self.crop:
# Square-Crop in the center
s = min([image.width , image.height])
image = transforms.CenterCrop((s,s)).forward(image)
if self.resize_size is not None:
# Resize to given size
image = image.resize(self.resize_size, resample=Image.NEAREST)
return image
def __getitem__(self, i):
'''
Parameter:
i [int] : Index of the image to get
Returns:
image_tensor [torch.Tensor] : 3 x H x W Tensor
label_tensor [torch.Tensor] : H x W Tensor as semantic map
'''
image = Image.open(self.dataset_dict[i]['img_path']).convert('RGB')
label = Image.open(self.dataset_dict[i]['semantic_path']).convert('L')
image = self.preprocess(image)
label = self.preprocess(label)
image_tensor = self.transforms(image)
label_tensor = torch.from_numpy(np.array(label)).long()
return image_tensor, label_tensor
def __len__(self):
return len(self.dataset_dict)
Training
For this example we used SuperGradients to ease the training process. But the workflow would be very similar with any other custom model or framework.
Firstly, the images are parsed into the dictionaries and the Datadicts are created with their information. These are then passed into a Dataloader object in order to use them with the SuperGradients Trainer.
from torch.utils.data import DataLoader
import super_gradients as sg
from super_gradients.training.metrics.segmentation_metrics import IoU
## Load the data
#
PATH = '/path/to/goose'
test_dict, train_dict, val_dict, mapping_dict = goose_create_dataDict(PATH)
train_dataset = GOOSE_SemanticDataset(train_dict, crop=True, resize_size=(768,768))
val_dataset = GOOSE_SemanticDataset(val_dict, crop=True, resize_size=(768,768))
## Set-up for training
#
# Create output directory
EXPERIMENT_NAME = "GOOSE_train"
WS_PATH = os.getcwd()
CHECKPOINT_DIR = os.path.join(WS_PATH, 'output', 'ckpts')
if not os.path.isdir(CHECKPOINT_DIR):
os.makedirs(CHECKPOINT_DIR)
# Params
BATCH_SIZE = 5
N_EPOCHS = 10
# Dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=5, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=5, drop_last=True)
Then the Trainer is configured and the model is loaded. In this case, the pre-trained weights with the Cityscapes datasets are loaded.
# Trainer Set-up
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
sg.setup_device(device=device)
trainer = sg.Trainer(experiment_name=EXPERIMENT_NAME, ckpt_root_dir=CHECKPOINT_DIR)
## Load Model
#
model = sg.training.models.get(model_name=Models.DDRNET_39,
num_classes=64,
pretrained_weights='cityscapes')
model.eval()
# Set-up Training params
lr_updates = [int(.3 * N_EPOCHS), int(.6 * N_EPOCHS), int(.9 * N_EPOCHS)]
train_params = {
"max_epochs": N_EPOCHS,
"lr_mode":"step",
"lr_updates": lr_updates,
"lr_decay_factor": 0.1,
"initial_lr": 0.005,
"optimizer": 'sgd',
"loss": 'cross_entropy',
"average_best_models": False,
"greater_metric_to_watch_is_better": True,
"loss_logging_items_names": ["loss"],
"drop_last": True,
}
train_params["train_metrics_list"] = [IoU(num_classes=64)]
train_params["valid_metrics_list"] = [IoU(num_classes=64)]
train_params["metric_to_watch"] = "IoU"
Lastly the training is started.
## Train
#
trainer.train(model=model, training_params=train_params, train_loader=train_dataloader, valid_loader=val_dataloader)
Test Inference
This example shows how to simply run inference with a pre-trained checkpoint using Pytorch and represent the results in a semantic map.
Load Model
from matplotlib import pyplot as plt
import matplotlib
import matplotlib.patches as mpatches
def run_inference(img, model):
'''
Run inference and return semantic mask
'''
if len(img.shape) != 4:
img = torch.unsqueeze(img, 0)
mask = model(img)
masks = torch.sigmoid(mask).squeeze()
label = torch.max(masks, 0)[1]
return label
## Import Model
#
model = model = sg.training.models.get(model_name=Models.DDRNET_39,
num_classes=64,
checkpoint_path="path/to/checkpoint.pth")
model.eval()
Run Inference on Images
This iterates through the images in a dataset object and runs inference on them. The input image, output of net and ground truth are displayed with the corresponding class ids.
## Iterate through images and run inference on them
#
N_SAMPLES = 10
viridis = matplotlib.colormaps['viridis'].resampled(64)
for idx in np.random.randint(0, len(test_dataset), min(N_SAMPLES, len(test_dataset))):
# Configure plot
plt.figure()
f, axarr = plt.subplots(1,3)
f.subplots_adjust(hspace=10.0, right=1.5)
axarr[0].set_xlabel("RGB")
axarr[1].set_xlabel("Predicted")
axarr[2].set_xlabel("Ground Truth")
# Get images
img, label = test_dataset[idx]
mask = run_inference(img, model)
imgs = [np.transpose(img, (1, 2, 0)), np.asarray(mask), np.asarray(label)]
for i in range(len(axarr)):
if i != 0:
im = axarr[i].imshow(imgs[i], cmap = viridis)
im.set_clim(0, 64)
# Legend
handles = []
for i_c,c in enumerate(np.unique(imgs[i])):
segment_id = i_c
segment_label = c
label = f"{segment_label}"
color = viridis(segment_label / 63)
handles.append(mpatches.Patch(color=color, label=label))
axarr[i].legend(handles=handles, bbox_to_anchor=(1.0, 1.00))
else:
im = axarr[i].imshow(imgs[i])
plt.show()
The results should look similar to this:
3D Pointcloud Segmentation
Coming soon.
References
- Aharon et al. "Super-Gradients" https://zenodo.org/record/7789328 (2021) for Real-Time and Accurate Semantic Segmentation of Traffic Scenes" in IEEE Trans. Intell. Transp. Syst. (2022)