From 4fc55958b1360e893a5a4073c6f4b4589af9b2b6 Mon Sep 17 00:00:00 2001 From: shayaharon Date: Mon, 13 Nov 2023 11:03:18 +0200 Subject: [PATCH] final touches --- notebooks/quickstart_segmentation.ipynb | 1 - .../segmentation_connect_custom_dataset.ipynb | 1941 ++++++++--------- ...nsfer_learning_semantic_segmentation.ipynb | 1 - 3 files changed, 970 insertions(+), 973 deletions(-) diff --git a/notebooks/quickstart_segmentation.ipynb b/notebooks/quickstart_segmentation.ipynb index 63150b51c5..13798e9e3a 100644 --- a/notebooks/quickstart_segmentation.ipynb +++ b/notebooks/quickstart_segmentation.ipynb @@ -145,7 +145,6 @@ ], "source": [ "! pip install -qq super-gradients==3.4.1\n", - "\n", "! pip install -qq prettyformatter\n" ] }, diff --git a/notebooks/segmentation_connect_custom_dataset.ipynb b/notebooks/segmentation_connect_custom_dataset.ipynb index 2e3b465b5b..1e2d55dad4 100644 --- a/notebooks/segmentation_connect_custom_dataset.ipynb +++ b/notebooks/segmentation_connect_custom_dataset.ipynb @@ -1,980 +1,979 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "sh6t_y7KzqBH" - }, - "source": [ - "![SG - Horizontal.png]()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5aISf1B-AGDQ" - }, - "source": [ - "# SuperGradients Semantic Segmentation How to Connect Custom Dataset\n", - "\n", - "In this tutorial we will explore how you can connect your custom Semantic Segmentation dataset to SG.\n", - "\n", - "Since SG trainer is fully compatible with PyTorch data loaders, we will demonstrate how to build one and use it.\n", - "\n", - "The notebook is divided into 5 sections:\n", - "1. Experiment setup\n", - "2. Dataset definition: create a proxy dataset and create a dataloader\n", - "3. Architecture definition: pre-trained PPLiteSeg on Cityscapes \n", - "4. Training setup\n", - "5. Training and Evaluation\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-1nPOPmc1lGp" - }, - "source": [ - "#Install SG" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "VAssbjJw7Yt1" - }, - "source": [ - "The cell below will install **super_gradients** which will automatically get all its dependencies. Let's import all the installed libraries to make sure they installed succesfully." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "JKce1SM6voVH", - "outputId": "a6397510-a140-443f-f13c-eec1272cc1a8" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.1.0+cu118)\n", - "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (0.16.0+cu118)\n", - "Requirement already satisfied: torchaudio in /usr/local/lib/python3.10/dist-packages (2.1.0+cu118)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.13.1)\n", - "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch) (4.5.0)\n", - "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.12)\n", - "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.2.1)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.2)\n", - "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2023.6.0)\n", - "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch) (2.1.0)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from torchvision) (1.23.5)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from torchvision) (2.31.0)\n", - "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.10/dist-packages (from torchvision) (9.4.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.3)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (3.3.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (2.0.7)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (2023.7.22)\n", - "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n", - " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Building wheel for pycocotools (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", - " Building wheel for termcolor (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Building wheel for treelib (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Building wheel for coverage (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Building wheel for xhtml2pdf (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Building wheel for antlr4-python3-runtime (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Building wheel for stringcase (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Building wheel for svglib (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "lida 0.0.10 requires fastapi, which is not installed.\n", - "lida 0.0.10 requires kaleido, which is not installed.\n", - "lida 0.0.10 requires python-multipart, which is not installed.\n", - "lida 0.0.10 requires uvicorn, which is not installed.\n", - "tensorflow 2.14.0 requires numpy>=1.23.5, but you have numpy 1.23.0 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0m" - ] - } - ], - "source": [ - "! pip install torch torchvision torchaudio\n", - "! pip install -qq super-gradients==3.4.1\n", - "! pip install -qq prettyformatter" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "njthhNJR1pJm" - }, - "source": [ - "# 1. Experiment setup" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YPym4wvpOcOJ" - }, - "source": [ - "We will first initialize our **trainer** which will be in charge of everything, like training, evaluation, saving checkpoints, plotting etc.\n", - "\n", - "The **experiment name** argument is important as every checkpoints, logs and tensorboards to be saved in a directory with the same name. This directory will be created as a sub-directory of **ckpt_root_dir** as follow:\n", - "\n", - "```\n", - "ckpt_root_dir\n", - "|─── experiment_name_1\n", - "│ ckpt_best.pth # Model checkpoint on best epoch\n", - "│ ckpt_latest.pth # Model checkpoint on last epoch\n", - "│ average_model.pth # Model checkpoint averaged over epochs\n", - "│ events.out.tfevents.1659878383... # Tensorflow artifacts of a specific run\n", - "│ log_Aug07_11_52_48.txt # Trainer logs of a specific run\n", - "└─── experiment_name_2\n", - " ...\n", - "```\n", - "In this notebook multi-gpu training is set as `OFF`, for Distributed training multi_gpu can be set as\n", - " `MultiGPUMode.DISTRIBUTED_DATA_PARALLEL` or `MultiGPUMode.DATA_PARALLEL`.\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "A2PlnTWpimnH" - }, - "source": [ - "Let's define **ckpt_root_dir** inside the Colab, later we can use it to start TensorBoard and monitor the run." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "_v1N3kXs3wo1" - }, - "outputs": [], - "source": [ - "from super_gradients.training import Trainer, MultiGPUMode\n", - "\n", - "\n", - "CHECKPOINT_DIR = '/home/notebook_ckpts/'\n", - "trainer = Trainer(experiment_name='transfer_learning_semantic_segementation_ppLite', ckpt_root_dir=CHECKPOINT_DIR)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "J9ZaMulSvwhr" - }, - "source": [ - "# 2. Dataset definition\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_1TXuJKkKzFJ" - }, - "source": [ - "## 2.A Generate Proxy Dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Y7us7VHRig7M" - }, - "source": [ - "\n", - "A proxy dataset generation is available merely to demonstrate an end-to-end training pipeline in this notebook.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "id": "wbdVYnIyjgv-" - }, - "outputs": [], - "source": [ - "from PIL import Image\n", - "import os\n", - "import numpy as np\n", - "\n", - "\n", - "# creation of proxy dataset to demonstrate usage\n", - "def generate_proxy_dataset(write_path: str, num_samples: int, num_classes: int, img_size: int = 256):\n", - " # Create training files and text\n", - " os.makedirs(os.path.join(write_path, 'images', 'train'), exist_ok=True)\n", - " os.makedirs(os.path.join(write_path, 'images', 'val'), exist_ok=True)\n", - " os.makedirs(os.path.join(write_path, 'labels', 'train'), exist_ok=True)\n", - " os.makedirs(os.path.join(write_path, 'labels', 'val'), exist_ok=True)\n", - "\n", - " train_fp = open(os.path.join(write_path, 'train.txt'), 'w')\n", - " val_fp = open(os.path.join(write_path, 'val.txt'), 'w')\n", - "\n", - " # Create random samples\n", - " for n in range(num_samples):\n", - " img = np.random.rand(img_size, img_size, 3) * 255\n", - " img = Image.fromarray(img.astype('uint8')).convert('RGB')\n", - "\n", - " lbl = np.random.randint(0, num_classes, size=(img_size, img_size))\n", - " lbl = Image.fromarray(lbl.astype('uint8')).convert('L')\n", - "\n", - " im_string = '%000d.jpg' % n\n", - " lbl_string = '%000d.png' % n\n", - "\n", - " img_train_fn = os.path.join(write_path, 'images', 'train', im_string)\n", - " img_val_fn = img_train_fn.replace(\"train\", \"val\")\n", - " img.save(img_train_fn)\n", - " img.save(img_val_fn)\n", - "\n", - " lbl_train_fn = os.path.join(write_path, 'labels', 'train', lbl_string)\n", - " lbl_val_fn = lbl_train_fn.replace(\"train\", \"val\")\n", - " lbl.save(lbl_train_fn)\n", - " lbl.save(lbl_val_fn)\n", - "\n", - " train_fp.write(f\"{img_train_fn} {lbl_train_fn}\\n\")\n", - " val_fp.write(f\"{img_val_fn} {lbl_val_fn}\\n\")\n", - "\n", - " train_fp.close()\n", - " val_fp.close()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "id": "DXu4yfuZoiv0", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "dccaf4ba-159f-4a47-d13d-ba4b60eaac80" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Train file `train.txt` content: \n", - "/content/example_data/images/train/0.jpg /content/example_data/labels/train/0.png\n", - "/content/example_data/images/train/1.jpg /content/example_data/labels/train/1.png\n", - "/content/example_data/images/train/2.jpg /content/example_data/labels/train/2.png\n", - "/content/example_data/images/train/3.jpg /content/example_data/labels/train/3.png\n", - "/content/example_data/images/train/4.jpg /content/example_data/labels/train/4.png\n", - "/content/example_data/images/train/5.jpg /content/example_data/labels/train/5.png\n", - "/content/example_data/images/train/6.jpg /content/example_data/labels/train/6.png\n", - "/content/example_data/images/train/7.jpg /content/example_data/labels/train/7.png\n", - "/content/example_data/images/train/8.jpg /content/example_data/labels/train/8.png\n", - "/content/example_data/images/train/9.jpg /content/example_data/labels/train/9.png\n" - ] - } - ], - "source": [ - "num_classes = 10\n", - "generate_proxy_dataset('/content/example_data', num_samples=10, num_classes=num_classes)\n", - "\n", - "print(\"Train file `train.txt` content: \")\n", - "! cat /content/example_data/train.txt" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MDksFYrIqClt" - }, - "source": [ - "## 2.B Create Torch Dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "id": "AGziBKSIqaUu" - }, - "outputs": [], - "source": [ - "import torch\n", - "from torch.utils.data import Dataset\n", - "from torchvision import transforms, utils\n", - "\n", - "\n", - "class CustomDataset(Dataset):\n", - " \"\"\"\n", - " A PyTorch Dataset class to be used in a PyTorch DataLoader to create batches.\n", - " \"\"\"\n", - "\n", - " def __init__(self, data_folder, split):\n", - " \"\"\"\n", - " :param data_folder: folder where data files are stored\n", - " :param split: split, one of 'TRAIN' or 'TEST'\n", - " \"\"\"\n", - " self.data_folder = data_folder\n", - " self.split = split.lower()\n", - " assert self.split in {'train', 'val'}\n", - "\n", - " # Read data files\n", - " with open(os.path.join(data_folder, self.split + '.txt'), 'r') as f:\n", - " data_lines = f.readlines()\n", - " self.samples_fn = [line.strip().split(\" \") for line in data_lines]\n", - "\n", - " self.transforms = transforms.Compose([transforms.ToTensor()])\n", - "\n", - " def __getitem__(self, i):\n", - " # Read image and label\n", - " image = Image.open(self.samples_fn[i][0]).convert('RGB')\n", - " label = Image.open(self.samples_fn[i][1])\n", - "\n", - " image_tensor = self.transforms(image)\n", - " label_tensor = torch.from_numpy(np.array(label)).long()\n", - "\n", - " return image_tensor, label_tensor\n", - "\n", - "\n", - " def __len__(self):\n", - " return len(self.samples_fn)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "id": "2B0hlas_1Rh-" - }, - "outputs": [], - "source": [ - "train_dataset = CustomDataset(\"/content/example_data\", split=\"train\")\n", - "val_dataset = CustomDataset(\"/content/example_data\", split=\"val\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "eIG5tsiuor9E" - }, - "source": [ - "Let's have a look at the first sample:" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "id": "ZsHqcq1jpN0F", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "e9c1182a-5359-45b6-c0f3-ad430a4fc67d" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "torch.Size([3, 256, 256]) torch.Size([256, 256])\n", - "tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])\n" - ] - } - ], - "source": [ - "img, lbl = train_dataset[0]\n", - "print(img.shape, lbl.shape)\n", - "print(torch.unique(lbl))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "aWfFrYLzo9j8" - }, - "source": [ - "## 2.C Create Torch Dataloader" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "D3ThxDIopDDB" - }, - "source": [] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "id": "XrWjWfjXnw_r" - }, - "outputs": [], - "source": [ - "from torch.utils.data import Dataset, DataLoader\n", - "\n", - "train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=2)\n", - "val_dataloader = DataLoader(val_dataset, batch_size=4, shuffle=False, num_workers=2)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vB1sGPO8qwZJ" - }, - "source": [ - "Lets' have a look at the first batch:\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "id": "O-KuZQ3XBduM", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "489fc05e-f972-464d-c150-360e4f2dbd7e" - }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "torch.Size([4, 3, 256, 256])" - ] - }, - "metadata": {}, - "execution_count": 10 - } - ], - "source": [ - "next(iter(train_dataloader))[0].shape" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fFfvyMHU32QF" - }, - "source": [ - "\n", - "# 3. Architecture definition" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "EpqgjQjl4awr" - }, - "source": [ - "SG includes implementations of many different architectures for object detection tasks that can be found [here](https://github.com/Deci-AI/super-gradients#implemented-model-architectures)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "GNM64JAa4sbF" - }, - "source": [ - "Create a PPLiteSeg nn.Module, with 1 class segmentation head classifier. For simplicity `use_aux_head` is set as `False`\n", - "and extra Auxiliary heads aren't used for training.\n", - "\n", - "Other segmentation modules can be used for this task such as, DDRNet, STDC and RegSeg.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "id": "YDK4btf04Gbu", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "cc7f1ab1-3a01-49c1-c9a2-6ca434dcc192" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "Downloading: \"https://sghub.deci.ai/models/pp_lite_t_seg75_cityscapes.pth\" to /root/.cache/torch/hub/checkpoints/pp_lite_t_seg75_cityscapes.pth\n", - "100%|██████████| 31.4M/31.4M [00:01<00:00, 32.4MB/s]\n", - "[2023-11-12 14:41:45] INFO - checkpoint_utils.py - Successfully loaded pretrained weights for architecture pp_lite_t_seg75\n" - ] - } - ], - "source": [ - "from super_gradients.training import models\n", - "from super_gradients.common.object_names import Models\n", - "\n", - "model = models.get(model_name=Models.PP_LITE_T_SEG75,\n", - " arch_params={\"use_aux_heads\": False},\n", - " num_classes=num_classes,\n", - " pretrained_weights=\"cityscapes\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "40UcYJ3u5JyF" - }, - "source": [ - "That being said, SG allows you to use one of SG implemented architectures or your custom architecture, as long as it inherits torch.nn.Module." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "LYPVR-XM4GsZ" - }, - "source": [ - "# 4. Training setup\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6K_56lDV8azX" - }, - "source": [ - "\n", - "Here we define the training recipe. The full parameters can be found here [training parameters supported](https://deci-ai.github.io/super-gradients/user_guide.html#training-parameters).\n" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "id": "3eRe0hBz4G1n" - }, - "outputs": [], - "source": [ - "from super_gradients.training.metrics.segmentation_metrics import IoU\n", - "from super_gradients.training.utils.callbacks import BinarySegmentationVisualizationCallback, Phase\n", - "\n", - "\n", - "train_params = {\"max_epochs\": 10,\n", - " \"lr_mode\": \"cosine\",\n", - " \"initial_lr\": 0.005,\n", - " \"optimizer\": \"SGD\",\n", - " \"loss\": \"cross_entropy\",\n", - " \"average_best_models\": False,\n", - " \"metric_to_watch\": \"IoU\",\n", - " \"greater_metric_to_watch_is_better\": True,\n", - " \"train_metrics_list\": [IoU(num_classes=10)],\n", - " \"valid_metrics_list\": [IoU(num_classes=10)],\n", - " \"loss_logging_items_names\": [\"loss\"],\n", - " \"phase_callbacks\": [BinarySegmentationVisualizationCallback(phase=Phase.VALIDATION_BATCH_END,\n", - " freq=1,\n", - " last_img_idx_in_batch=4)],\n", - "\n", - " }" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "D3tVVUhy4OqP" - }, - "source": [ - "# 5. Training and evaluation\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "8tKUuxbe9NlQ" - }, - "source": [ - "The logs and the checkpoint for the latest epoch will be kept in your experiment folder.\n", - "\n", - "To start training we'll call train(...) and provide it with the objects we construted above: the model, the training parameters and the data loaders." - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "sh6t_y7KzqBH" + }, + "source": [ + "![SG - Horizontal.png]()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5aISf1B-AGDQ" + }, + "source": [ + "# SuperGradients Semantic Segmentation How to Connect Custom Dataset\n", + "\n", + "In this tutorial we will explore how you can connect your custom Semantic Segmentation dataset to SG.\n", + "\n", + "Since SG trainer is fully compatible with PyTorch data loaders, we will demonstrate how to build one and use it.\n", + "\n", + "The notebook is divided into 5 sections:\n", + "1. Experiment setup\n", + "2. Dataset definition: create a proxy dataset and create a dataloader\n", + "3. Architecture definition: pre-trained PPLiteSeg on Cityscapes \n", + "4. Training setup\n", + "5. Training and Evaluation\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-1nPOPmc1lGp" + }, + "source": [ + "#Install SG" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VAssbjJw7Yt1" + }, + "source": [ + "The cell below will install **super_gradients** which will automatically get all its dependencies. Let's import all the installed libraries to make sure they installed succesfully." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JKce1SM6voVH", + "outputId": "a6397510-a140-443f-f13c-eec1272cc1a8" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.1.0+cu118)\n", + "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (0.16.0+cu118)\n", + "Requirement already satisfied: torchaudio in /usr/local/lib/python3.10/dist-packages (2.1.0+cu118)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.13.1)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch) (4.5.0)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.12)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.2.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.2)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2023.6.0)\n", + "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch) (2.1.0)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from torchvision) (1.23.5)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from torchvision) (2.31.0)\n", + "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.10/dist-packages (from torchvision) (9.4.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.3)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision) (2023.7.22)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n", + " Preparing metadata (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + " Installing build dependencies ... \u001B[?25l\u001B[?25hdone\n", + " Getting requirements to build wheel ... \u001B[?25l\u001B[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001B[?25l\u001B[?25hdone\n", + " Preparing metadata (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + " Preparing metadata (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + " Preparing metadata (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + " Preparing metadata (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + " Preparing metadata (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + " Preparing metadata (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + " Building wheel for pycocotools (pyproject.toml) ... \u001B[?25l\u001B[?25hdone\n", + " Building wheel for termcolor (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + " Building wheel for treelib (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + " Building wheel for coverage (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + " Building wheel for xhtml2pdf (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + " Building wheel for antlr4-python3-runtime (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + " Building wheel for stringcase (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + " Building wheel for svglib (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + "\u001B[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "lida 0.0.10 requires fastapi, which is not installed.\n", + "lida 0.0.10 requires kaleido, which is not installed.\n", + "lida 0.0.10 requires python-multipart, which is not installed.\n", + "lida 0.0.10 requires uvicorn, which is not installed.\n", + "tensorflow 2.14.0 requires numpy>=1.23.5, but you have numpy 1.23.0 which is incompatible.\u001B[0m\u001B[31m\n", + "\u001B[0m" + ] + } + ], + "source": [ + "! pip install -qq super-gradients==3.4.1\n", + "! pip install -qq prettyformatter" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "njthhNJR1pJm" + }, + "source": [ + "# 1. Experiment setup" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YPym4wvpOcOJ" + }, + "source": [ + "We will first initialize our **trainer** which will be in charge of everything, like training, evaluation, saving checkpoints, plotting etc.\n", + "\n", + "The **experiment name** argument is important as every checkpoints, logs and tensorboards to be saved in a directory with the same name. This directory will be created as a sub-directory of **ckpt_root_dir** as follow:\n", + "\n", + "```\n", + "ckpt_root_dir\n", + "|─── experiment_name_1\n", + "│ ckpt_best.pth # Model checkpoint on best epoch\n", + "│ ckpt_latest.pth # Model checkpoint on last epoch\n", + "│ average_model.pth # Model checkpoint averaged over epochs\n", + "│ events.out.tfevents.1659878383... # Tensorflow artifacts of a specific run\n", + "│ log_Aug07_11_52_48.txt # Trainer logs of a specific run\n", + "└─── experiment_name_2\n", + " ...\n", + "```\n", + "In this notebook multi-gpu training is set as `OFF`, for Distributed training multi_gpu can be set as\n", + " `MultiGPUMode.DISTRIBUTED_DATA_PARALLEL` or `MultiGPUMode.DATA_PARALLEL`.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A2PlnTWpimnH" + }, + "source": [ + "Let's define **ckpt_root_dir** inside the Colab, later we can use it to start TensorBoard and monitor the run." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "_v1N3kXs3wo1" + }, + "outputs": [], + "source": [ + "from super_gradients.training import Trainer, MultiGPUMode\n", + "\n", + "\n", + "CHECKPOINT_DIR = '/home/notebook_ckpts/'\n", + "trainer = Trainer(experiment_name='transfer_learning_semantic_segementation_ppLite', ckpt_root_dir=CHECKPOINT_DIR)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "J9ZaMulSvwhr" + }, + "source": [ + "# 2. Dataset definition\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_1TXuJKkKzFJ" + }, + "source": [ + "## 2.A Generate Proxy Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Y7us7VHRig7M" + }, + "source": [ + "\n", + "A proxy dataset generation is available merely to demonstrate an end-to-end training pipeline in this notebook.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "wbdVYnIyjgv-" + }, + "outputs": [], + "source": [ + "from PIL import Image\n", + "import os\n", + "import numpy as np\n", + "\n", + "\n", + "# creation of proxy dataset to demonstrate usage\n", + "def generate_proxy_dataset(write_path: str, num_samples: int, num_classes: int, img_size: int = 256):\n", + " # Create training files and text\n", + " os.makedirs(os.path.join(write_path, 'images', 'train'), exist_ok=True)\n", + " os.makedirs(os.path.join(write_path, 'images', 'val'), exist_ok=True)\n", + " os.makedirs(os.path.join(write_path, 'labels', 'train'), exist_ok=True)\n", + " os.makedirs(os.path.join(write_path, 'labels', 'val'), exist_ok=True)\n", + "\n", + " train_fp = open(os.path.join(write_path, 'train.txt'), 'w')\n", + " val_fp = open(os.path.join(write_path, 'val.txt'), 'w')\n", + "\n", + " # Create random samples\n", + " for n in range(num_samples):\n", + " img = np.random.rand(img_size, img_size, 3) * 255\n", + " img = Image.fromarray(img.astype('uint8')).convert('RGB')\n", + "\n", + " lbl = np.random.randint(0, num_classes, size=(img_size, img_size))\n", + " lbl = Image.fromarray(lbl.astype('uint8')).convert('L')\n", + "\n", + " im_string = '%000d.jpg' % n\n", + " lbl_string = '%000d.png' % n\n", + "\n", + " img_train_fn = os.path.join(write_path, 'images', 'train', im_string)\n", + " img_val_fn = img_train_fn.replace(\"train\", \"val\")\n", + " img.save(img_train_fn)\n", + " img.save(img_val_fn)\n", + "\n", + " lbl_train_fn = os.path.join(write_path, 'labels', 'train', lbl_string)\n", + " lbl_val_fn = lbl_train_fn.replace(\"train\", \"val\")\n", + " lbl.save(lbl_train_fn)\n", + " lbl.save(lbl_val_fn)\n", + "\n", + " train_fp.write(f\"{img_train_fn} {lbl_train_fn}\\n\")\n", + " val_fp.write(f\"{img_val_fn} {lbl_val_fn}\\n\")\n", + "\n", + " train_fp.close()\n", + " val_fp.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "DXu4yfuZoiv0", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "dccaf4ba-159f-4a47-d13d-ba4b60eaac80" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Train file `train.txt` content: \n", + "/content/example_data/images/train/0.jpg /content/example_data/labels/train/0.png\n", + "/content/example_data/images/train/1.jpg /content/example_data/labels/train/1.png\n", + "/content/example_data/images/train/2.jpg /content/example_data/labels/train/2.png\n", + "/content/example_data/images/train/3.jpg /content/example_data/labels/train/3.png\n", + "/content/example_data/images/train/4.jpg /content/example_data/labels/train/4.png\n", + "/content/example_data/images/train/5.jpg /content/example_data/labels/train/5.png\n", + "/content/example_data/images/train/6.jpg /content/example_data/labels/train/6.png\n", + "/content/example_data/images/train/7.jpg /content/example_data/labels/train/7.png\n", + "/content/example_data/images/train/8.jpg /content/example_data/labels/train/8.png\n", + "/content/example_data/images/train/9.jpg /content/example_data/labels/train/9.png\n" + ] + } + ], + "source": [ + "num_classes = 10\n", + "generate_proxy_dataset('/content/example_data', num_samples=10, num_classes=num_classes)\n", + "\n", + "print(\"Train file `train.txt` content: \")\n", + "! cat /content/example_data/train.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MDksFYrIqClt" + }, + "source": [ + "## 2.B Create Torch Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "AGziBKSIqaUu" + }, + "outputs": [], + "source": [ + "import torch\n", + "from torch.utils.data import Dataset\n", + "from torchvision import transforms, utils\n", + "\n", + "\n", + "class CustomDataset(Dataset):\n", + " \"\"\"\n", + " A PyTorch Dataset class to be used in a PyTorch DataLoader to create batches.\n", + " \"\"\"\n", + "\n", + " def __init__(self, data_folder, split):\n", + " \"\"\"\n", + " :param data_folder: folder where data files are stored\n", + " :param split: split, one of 'TRAIN' or 'TEST'\n", + " \"\"\"\n", + " self.data_folder = data_folder\n", + " self.split = split.lower()\n", + " assert self.split in {'train', 'val'}\n", + "\n", + " # Read data files\n", + " with open(os.path.join(data_folder, self.split + '.txt'), 'r') as f:\n", + " data_lines = f.readlines()\n", + " self.samples_fn = [line.strip().split(\" \") for line in data_lines]\n", + "\n", + " self.transforms = transforms.Compose([transforms.ToTensor()])\n", + "\n", + " def __getitem__(self, i):\n", + " # Read image and label\n", + " image = Image.open(self.samples_fn[i][0]).convert('RGB')\n", + " label = Image.open(self.samples_fn[i][1])\n", + "\n", + " image_tensor = self.transforms(image)\n", + " label_tensor = torch.from_numpy(np.array(label)).long()\n", + "\n", + " return image_tensor, label_tensor\n", + "\n", + "\n", + " def __len__(self):\n", + " return len(self.samples_fn)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "2B0hlas_1Rh-" + }, + "outputs": [], + "source": [ + "train_dataset = CustomDataset(\"/content/example_data\", split=\"train\")\n", + "val_dataset = CustomDataset(\"/content/example_data\", split=\"val\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eIG5tsiuor9E" + }, + "source": [ + "Let's have a look at the first sample:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "ZsHqcq1jpN0F", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "e9c1182a-5359-45b6-c0f3-ad430a4fc67d" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "torch.Size([3, 256, 256]) torch.Size([256, 256])\n", + "tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])\n" + ] + } + ], + "source": [ + "img, lbl = train_dataset[0]\n", + "print(img.shape, lbl.shape)\n", + "print(torch.unique(lbl))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aWfFrYLzo9j8" + }, + "source": [ + "## 2.C Create Torch Dataloader" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D3ThxDIopDDB" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "XrWjWfjXnw_r" + }, + "outputs": [], + "source": [ + "from torch.utils.data import Dataset, DataLoader\n", + "\n", + "train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=2)\n", + "val_dataloader = DataLoader(val_dataset, batch_size=4, shuffle=False, num_workers=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vB1sGPO8qwZJ" + }, + "source": [ + "Lets' have a look at the first batch:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "O-KuZQ3XBduM", + "colab": { + "base_uri": "https://localhost:8080/" }, + "outputId": "489fc05e-f972-464d-c150-360e4f2dbd7e" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "id": "-Ojnc1bk9L3s", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "b36b8b9b-b554-444e-d440-02bf623c3efa" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "[2023-11-12 14:41:51] WARNING - sg_trainer.py - Train dataset size % batch_size != 0 and drop_last=False, this might result in smaller last batch.\n", - "[2023-11-12 14:41:58] INFO - sg_trainer.py - Starting a new run with `run_id=RUN_20231112_144158_892860`\n", - "[2023-11-12 14:41:58] INFO - sg_trainer.py - Checkpoints directory: /home/notebook_ckpts/transfer_learning_semantic_segementation_ppLite/RUN_20231112_144158_892860\n", - "/usr/local/lib/python3.10/dist-packages/super_gradients/common/registry/registry.py:72: DeprecationWarning: Object name `cross_entropy` is now deprecated. Please replace it with `CrossEntropyLoss`.\n", - " warnings.warn(f\"Object name `{name}` is now deprecated. Please replace it with `{deprecated_names[name]}`.\", DeprecationWarning)\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "The console stream is now moved to /home/notebook_ckpts/transfer_learning_semantic_segementation_ppLite/RUN_20231112_144158_892860/console_Nov12_14_41_58.txt\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "[2023-11-12 14:41:59] INFO - sg_trainer_utils.py - TRAINING PARAMETERS:\n", - " - Mode: Single GPU\n", - " - Number of GPUs: 1 (1 available on the machine)\n", - " - Full dataset size: 10 (len(train_set))\n", - " - Batch size per GPU: 4 (batch_size)\n", - " - Batch Accumulate: 1 (batch_accumulate)\n", - " - Total batch size: 4 (num_gpus * batch_size)\n", - " - Effective Batch size: 4 (num_gpus * batch_size * batch_accumulate)\n", - " - Iterations per epoch: 3 (len(train_loader))\n", - " - Gradient updates per epoch: 3 (len(train_loader) / batch_accumulate)\n", - "\n", - "[2023-11-12 14:41:59] INFO - sg_trainer.py - Started training for 10 epochs (0/9)\n", - "\n", - "Train epoch 0: 100%|██████████| 3/3 [00:08<00:00, 2.91s/it, CrossEntropyLoss=3.49, IoU=0.0319, gpu_mem=0.686]\n", - "Validating: 100%|██████████| 3/3 [00:00<00:00, 3.83it/s]\n", - "[2023-11-12 14:42:09] INFO - base_sg_logger.py - Checkpoint saved in /home/notebook_ckpts/transfer_learning_semantic_segementation_ppLite/RUN_20231112_144158_892860/ckpt_best.pth\n", - "[2023-11-12 14:42:09] INFO - sg_trainer.py - Best checkpoint overriden: validation IoU: 0.013753225095570087\n", - "Train epoch 1: 0%| | 0/3 [00:00