From c6b75159e906127f1b1f5b789296f1cbf766d23d Mon Sep 17 00:00:00 2001
From: 8ToThePowerOfMol <albert.mohwald@pm.me>
Date: Sun, 13 Aug 2023 17:23:45 +0200
Subject: [PATCH 1/5] feat: add FastSAM inference

(1) only inference is supported, (2) FastSAM might segment non-building objects
---
 hot_fair_utilities/inference/predict.py | 58 ++++++++++++++++---------
 hot_fair_utilities/inference/utils.py   | 21 +++++++++
 requirements.txt                        |  3 +-
 3 files changed, 61 insertions(+), 21 deletions(-)

diff --git a/hot_fair_utilities/inference/predict.py b/hot_fair_utilities/inference/predict.py
index c2b4494d..7fdc1344 100644
--- a/hot_fair_utilities/inference/predict.py
+++ b/hot_fair_utilities/inference/predict.py
@@ -6,11 +6,13 @@
 
 # Third party imports
 import numpy as np
+import torch
 from tensorflow import keras
+from ultralytics import YOLO, FastSAM
 
 from ..georeferencing import georeference
 from ..utils import remove_files
-from .utils import open_images, save_mask
+from .utils import open_images, save_mask, initialize_model
 
 BATCH_SIZE = 8
 IMAGE_SIZE = 256
@@ -43,34 +45,50 @@ def predict(
     """
     start = time.time()
     print(f"Using : {checkpoint_path}")
-    model = keras.models.load_model(checkpoint_path)
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    model = initialize_model(checkpoint_path, device=device)
     print(f"It took {round(time.time()-start)} sec to load model")
     start = time.time()
 
     os.makedirs(prediction_path, exist_ok=True)
     image_paths = glob(f"{input_path}/*.png")
 
-    for i in range((len(image_paths) + BATCH_SIZE - 1) // BATCH_SIZE):
-        image_batch = image_paths[BATCH_SIZE * i : BATCH_SIZE * (i + 1)]
-        images = open_images(image_batch)
-        images = images.reshape(-1, IMAGE_SIZE, IMAGE_SIZE, 3)
-
-        preds = model.predict(images)
-        preds = np.argmax(preds, axis=-1)
-        preds = np.expand_dims(preds, axis=-1)
-        preds = np.where(
-            preds > confidence, 1, 0
-        )  # Filter out low confidence predictions
-
-        for idx, path in enumerate(image_batch):
-            save_mask(
-                preds[idx],
-                str(f"{prediction_path}/{Path(path).stem}.png"),
-            )
+    if isinstance(model, keras.Model):
+        for i in range((len(image_paths) + BATCH_SIZE - 1) // BATCH_SIZE):
+            image_batch = image_paths[BATCH_SIZE * i : BATCH_SIZE * (i + 1)]
+            images = open_images(image_batch)
+            images = images.reshape(-1, IMAGE_SIZE, IMAGE_SIZE, 3)
+
+            preds = model.predict(images)
+            preds = np.argmax(preds, axis=-1)
+            preds = np.expand_dims(preds, axis=-1)
+            preds = np.where(
+                preds > confidence, 1, 0
+            )  # Filter out low confidence predictions
+
+            for idx, path in enumerate(image_batch):
+                save_mask(
+                    preds[idx],
+                    str(f"{prediction_path}/{Path(path).stem}.png"),
+                )
+    elif isinstance(model, YOLO):
+        raise NotImplementedError
+    elif isinstance(model, FastSAM):
+        results = model(image_paths, stream=True, imgsz=IMAGE_SIZE,
+                        prompts=["building" for _ in range(len(image_paths))])
+        for i, r in enumerate(results):
+            preds = r.masks.data.max(dim=0)[0]
+            preds = torch.where(preds > confidence, torch.tensor(1), torch.tensor(0))
+            preds = preds.detach().cpu().numpy()
+            save_mask(preds, str(f"{prediction_path}/{Path(image_paths[i]).stem}.png"))
+    else:
+        raise RuntimeError("Loaded model is not supported")
+
     print(
         f"It took {round(time.time()-start)} sec to predict with {confidence} Confidence Threshold"
     )
-    keras.backend.clear_session()
+    if isinstance(model, keras.Model):
+        keras.backend.clear_session()
     del model
     start = time.time()
 
diff --git a/hot_fair_utilities/inference/utils.py b/hot_fair_utilities/inference/utils.py
index 44109041..6df6c2b0 100644
--- a/hot_fair_utilities/inference/utils.py
+++ b/hot_fair_utilities/inference/utils.py
@@ -1,8 +1,10 @@
 from typing import List
 
 import numpy as np
+import torch
 from PIL import Image
 from tensorflow import keras
+from ultralytics import YOLO, FastSAM
 
 IMAGE_SIZE = 256
 
@@ -25,3 +27,22 @@ def save_mask(mask: np.ndarray, filename: str) -> None:
     reshaped_mask = mask.reshape((IMAGE_SIZE, IMAGE_SIZE)) * 255
     result = Image.fromarray(reshaped_mask.astype(np.uint8))
     result.save(filename)
+
+
+def initialize_model(path, device=None):
+    """Loads either keras or pytorch model."""
+    if not device:
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+
+    if path.lower() == "yolo":  # Ultralytics pretrained YOLOv8
+        model = YOLO('yolov8n-seg.pt')
+    elif path.lower() == "fastsam":  # Ultralytics pretrained FastSAM
+        model = FastSAM('FastSAM-s.pt')
+    elif path.endswith('.pth') or path.endswith('.pt'):  # Pytorch saved checkpoint
+        model = torch.load(path, map_location=device)
+    elif path.endswith('.pb') or path.endswith('.tf'):  # Tensorflow saved checkpoint
+        model = keras.models.load_model(path)
+    else:
+        raise ValueError("Unsupported model format or path")
+
+    return model
diff --git a/requirements.txt b/requirements.txt
index 0d49a92e..1d33c58c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,4 +15,5 @@ bumpver
 black
 isort
 build
-twine
\ No newline at end of file
+twine
+ultralytics

From a4b5d22fc1733c0fdfb9315b832aee65f3ec51db Mon Sep 17 00:00:00 2001
From: 8ToThePowerOfMol <albert.mohwald@pm.me>
Date: Sun, 5 Nov 2023 19:14:24 +0100
Subject: [PATCH 2/5] Introduce evaluation to fair

---
 Dockerfile                               |  3 ++
 hot_fair_utilities/__init__.py           |  2 +-
 hot_fair_utilities/inference/__init__.py |  1 +
 hot_fair_utilities/inference/evaluate.py | 55 ++++++++++++++++++++++++
 hot_fair_utilities/inference/predict.py  |  8 ++--
 hot_fair_utilities/inference/utils.py    | 21 ++++-----
 requirements.txt                         |  2 +-
 7 files changed, 74 insertions(+), 18 deletions(-)
 create mode 100644 hot_fair_utilities/inference/evaluate.py

diff --git a/Dockerfile b/Dockerfile
index 841b0dda..4af541c1 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -8,6 +8,9 @@ RUN apt-get install -y libgdal-dev
 ENV CPLUS_INCLUDE_PATH=/usr/include/gdal
 ENV C_INCLUDE_PATH=/usr/include/gdal
 
+# Install ultralytics for YOLO, FastSAM, etc. together with pytorch and other dependencies
+RUN pip install ultralytics==8.0.203
+
 #install numpy before gdal 
 RUN pip install numpy==1.23.5
 
diff --git a/hot_fair_utilities/__init__.py b/hot_fair_utilities/__init__.py
index 724b35ff..d326788a 100644
--- a/hot_fair_utilities/__init__.py
+++ b/hot_fair_utilities/__init__.py
@@ -1,5 +1,5 @@
 from .georeferencing import georeference
-from .inference import predict
+from .inference import predict, evaluate
 from .postprocessing import polygonize, vectorize
 from .preprocessing import preprocess
 from .training import train
diff --git a/hot_fair_utilities/inference/__init__.py b/hot_fair_utilities/inference/__init__.py
index f606e56b..ea947c3b 100644
--- a/hot_fair_utilities/inference/__init__.py
+++ b/hot_fair_utilities/inference/__init__.py
@@ -1 +1,2 @@
 from .predict import predict
+from .evaluate import evaluate
diff --git a/hot_fair_utilities/inference/evaluate.py b/hot_fair_utilities/inference/evaluate.py
new file mode 100644
index 00000000..50d0cec6
--- /dev/null
+++ b/hot_fair_utilities/inference/evaluate.py
@@ -0,0 +1,55 @@
+# Patched from ramp-code.scripts.calculate_accuracy.iou created for ramp project by carolyn.johnston@dev.global
+
+from pathlib import Path
+import geopandas as gpd
+
+from ramp.utils.eval_utils import get_iou_accuracy_metrics
+
+
+def evaluate(test_path, truth_path, filter_area_m2=None, iou_threshold=0.5, verbose=False):
+    """
+    Calculate precision/recall/F1-score based on intersection-over-union accuracy evaluation protocol defined by RAMP.
+
+    The predicted masks will be georeferenced with EPSG:3857 as CRS
+    Args:
+        test_path: Path where the weights of the model can be found.
+        truth_path: Path of the directory where the images are stored.
+        filter_area_m2: Minimum area of buildings to analyze in m^2.
+        iou_threshold: (float, 0<threshold<1) above which value of IoU of a detection is considered to be accurate
+        verbose: Bool, more statistics are printed when turned on.
+
+    Example::
+        evaluate(
+            "data/prediction.geojson",
+            "data/labels.geojson"
+        )
+    """
+
+    test_path, truth_path = Path(test_path), Path(truth_path)
+    truth_df, test_df = gpd.read_file(str(truth_path)), gpd.read_file(str(test_path))
+    metrics = get_iou_accuracy_metrics(test_df, truth_df, filter_area_m2, iou_threshold)
+
+    n_detections = metrics['n_detections']
+    n_truth = metrics["n_truth"]
+    n_truepos = metrics['true_pos']
+    n_falsepos = n_detections - n_truepos
+    n_falseneg = n_truth - n_truepos
+    agg_precision = n_truepos / n_detections
+    agg_recall = n_truepos / n_truth
+    agg_f1 = 2 * n_truepos / (n_truth + n_detections)
+
+    if verbose:
+        print(f"Detections: {n_detections}")
+        print(f"Truth buildings: {n_truth}")
+        print(f"True positives: {n_truepos}")
+        print(f"False positives: {n_falsepos}")
+        print(f"False negatives: {n_falseneg}")
+        print(f"Precision IoU@p: {agg_precision}")
+        print(f"Recall IoU@p: {agg_recall}")
+        print(f"F1 IoU@p: {agg_f1}")
+
+    return {
+        "precision": agg_precision,
+        "recall": agg_recall,
+        "f1": agg_f1,
+    }
diff --git a/hot_fair_utilities/inference/predict.py b/hot_fair_utilities/inference/predict.py
index 7fdc1344..7f85ce33 100644
--- a/hot_fair_utilities/inference/predict.py
+++ b/hot_fair_utilities/inference/predict.py
@@ -20,7 +20,7 @@
 
 
 def predict(
-    checkpoint_path: str, input_path: str, prediction_path: str, confidence: float = 0.5
+    checkpoint_path: str, input_path: str, prediction_path: str, confidence: float = 0.5, remove_images=True
 ) -> None:
     """Predict building footprints for aerial images given a model checkpoint.
 
@@ -34,6 +34,7 @@ def predict(
         input_path: Path of the directory where the images are stored.
         prediction_path: Path of the directory where the predicted images will go.
         confidence: Threshold probability for filtering out low-confidence predictions.
+        remove_images: Bool indicating whether delete prediction images after they were georeferenced.
 
     Example::
 
@@ -95,5 +96,6 @@ def predict(
     georeference(prediction_path, prediction_path, is_mask=True)
     print(f"It took {round(time.time()-start)} sec to georeference")
 
-    remove_files(f"{prediction_path}/*.xml")
-    remove_files(f"{prediction_path}/*.png")
+    if remove_images:
+        remove_files(f"{prediction_path}/*.xml")
+        remove_files(f"{prediction_path}/*.png")
diff --git a/hot_fair_utilities/inference/utils.py b/hot_fair_utilities/inference/utils.py
index 6df6c2b0..7b5eb5db 100644
--- a/hot_fair_utilities/inference/utils.py
+++ b/hot_fair_utilities/inference/utils.py
@@ -4,7 +4,7 @@
 import torch
 from PIL import Image
 from tensorflow import keras
-from ultralytics import YOLO, FastSAM
+
 
 IMAGE_SIZE = 256
 
@@ -31,18 +31,13 @@ def save_mask(mask: np.ndarray, filename: str) -> None:
 
 def initialize_model(path, device=None):
     """Loads either keras or pytorch model."""
-    if not device:
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-
-    if path.lower() == "yolo":  # Ultralytics pretrained YOLOv8
-        model = YOLO('yolov8n-seg.pt')
-    elif path.lower() == "fastsam":  # Ultralytics pretrained FastSAM
-        model = FastSAM('FastSAM-s.pt')
-    elif path.endswith('.pth') or path.endswith('.pt'):  # Pytorch saved checkpoint
+    if not isinstance(path, str):  # probably loaded model
+        return path
+
+    if path.endswith('.pth') or path.endswith('.pt'):  # Pytorch saved checkpoint
+        if not device:
+            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         model = torch.load(path, map_location=device)
-    elif path.endswith('.pb') or path.endswith('.tf'):  # Tensorflow saved checkpoint
-        model = keras.models.load_model(path)
     else:
-        raise ValueError("Unsupported model format or path")
-
+        model = keras.models.load_model(path)
     return model
diff --git a/requirements.txt b/requirements.txt
index 13721b49..faa7e418 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -17,4 +17,4 @@ black
 isort
 build
 twine
-ultralytics
+ultralytics==8.0.203

From c45dd7e9a5241f5744d0ab44ffff15cca618b6ad Mon Sep 17 00:00:00 2001
From: Albert Mohwald <albert.mohwald@pm.me>
Date: Fri, 8 Mar 2024 18:12:45 +0100
Subject: [PATCH 3/5] fix(predict): support both .png and .tif in inference

---
 hot_fair_utilities/inference/predict.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hot_fair_utilities/inference/predict.py b/hot_fair_utilities/inference/predict.py
index 7f85ce33..3b7fed87 100644
--- a/hot_fair_utilities/inference/predict.py
+++ b/hot_fair_utilities/inference/predict.py
@@ -52,7 +52,7 @@ def predict(
     start = time.time()
 
     os.makedirs(prediction_path, exist_ok=True)
-    image_paths = glob(f"{input_path}/*.png")
+    image_paths = glob(f"{input_path}/*.png") + glob(f"{input_path}/*.tif")
 
     if isinstance(model, keras.Model):
         for i in range((len(image_paths) + BATCH_SIZE - 1) // BATCH_SIZE):

From 8433a0c2c9afe4234cd93d83a4e74a8d73c9342b Mon Sep 17 00:00:00 2001
From: Albert Mohwald <albert.mohwald@pm.me>
Date: Fri, 8 Mar 2024 18:21:32 +0100
Subject: [PATCH 4/5] feat: replace FastSAM with YOLO including training

BREAKING CHANGE:
---
 Dockerfile                                    |  10 +-
 hot_fair_utilities/inference/predict.py       |  21 +--
 hot_fair_utilities/inference/utils.py         |   7 +-
 hot_fair_utilities/model/__init__.py          |   0
 hot_fair_utilities/model/yolo.py              |  47 ++++++
 .../preprocessing/multimasks_from_polygons.py | 123 ++++++++++++++++
 .../preprocessing/preprocess.py               |   8 +
 .../preprocessing/yolo_format.py              | 138 ++++++++++++++++++
 requirements.txt                              |   4 +-
 test_yolo.py                                  |  81 ++++++++++
 train_yolo.py                                 |  96 ++++++++++++
 11 files changed, 518 insertions(+), 17 deletions(-)
 create mode 100644 hot_fair_utilities/model/__init__.py
 create mode 100644 hot_fair_utilities/model/yolo.py
 create mode 100644 hot_fair_utilities/preprocessing/multimasks_from_polygons.py
 create mode 100644 hot_fair_utilities/preprocessing/yolo_format.py
 create mode 100644 test_yolo.py
 create mode 100644 train_yolo.py

diff --git a/Dockerfile b/Dockerfile
index 4af541c1..4802feec 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -8,9 +8,6 @@ RUN apt-get install -y libgdal-dev
 ENV CPLUS_INCLUDE_PATH=/usr/include/gdal
 ENV C_INCLUDE_PATH=/usr/include/gdal
 
-# Install ultralytics for YOLO, FastSAM, etc. together with pytorch and other dependencies
-RUN pip install ultralytics==8.0.203
-
 #install numpy before gdal 
 RUN pip install numpy==1.23.5
 
@@ -20,6 +17,11 @@ RUN pip install --global-option=build_ext --global-option="-I/usr/include/gdal"
 COPY docker/ramp/docker-requirements.txt docker-requirements.txt
 RUN pip install -r docker-requirements.txt
 
+# Install ultralytics for YOLO, FastSAM, etc. together with pytorch and other dependencies
+# For exact pytorch+cuda versions, see https://pytorch.org/get-started/previous-versions/
+RUN pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113
+RUN pip install ultralytics==8.1.6
+
 # pip install solaris -- try with tmp-free build
 # COPY docker/ramp/solaris /tmp/solaris
 COPY docker/solaris/solaris  /tmp/solaris/solaris
@@ -59,3 +61,5 @@ RUN unzip checkpoint.tf.zip -d ramp-code/ramp
 
 # Copy test_app.py
 COPY test_app.py ./test_app.py
+COPY test_yolo.py ./test_yolo.py
+COPY Package_Test.ipynb ./Package_Test.ipynb
diff --git a/hot_fair_utilities/inference/predict.py b/hot_fair_utilities/inference/predict.py
index 3b7fed87..177ce0df 100644
--- a/hot_fair_utilities/inference/predict.py
+++ b/hot_fair_utilities/inference/predict.py
@@ -8,7 +8,7 @@
 import numpy as np
 import torch
 from tensorflow import keras
-from ultralytics import YOLO, FastSAM
+from ultralytics import YOLO
 
 from ..georeferencing import georeference
 from ..utils import remove_files
@@ -73,15 +73,16 @@ def predict(
                     str(f"{prediction_path}/{Path(path).stem}.png"),
                 )
     elif isinstance(model, YOLO):
-        raise NotImplementedError
-    elif isinstance(model, FastSAM):
-        results = model(image_paths, stream=True, imgsz=IMAGE_SIZE,
-                        prompts=["building" for _ in range(len(image_paths))])
-        for i, r in enumerate(results):
-            preds = r.masks.data.max(dim=0)[0]
-            preds = torch.where(preds > confidence, torch.tensor(1), torch.tensor(0))
-            preds = preds.detach().cpu().numpy()
-            save_mask(preds, str(f"{prediction_path}/{Path(image_paths[i]).stem}.png"))
+        for idx in range(0, len(image_paths), BATCH_SIZE):
+            batch = image_paths[idx:idx + BATCH_SIZE]
+            for i, r in enumerate(model(batch, stream=True, conf=confidence, verbose=False)):
+                if r.masks is None:
+                    preds = np.zeros((IMAGE_SIZE, IMAGE_SIZE,), dtype=np.float32)
+                else:
+                    preds = r.masks.data.max(dim=0)[0]  # dim=0 means to take only footprint
+                    preds = torch.where(preds > confidence, torch.tensor(1), torch.tensor(0))
+                    preds = preds.detach().cpu().numpy()
+                save_mask(preds, str(f"{prediction_path}/{Path(batch[i]).stem}.png"))
     else:
         raise RuntimeError("Loaded model is not supported")
 
diff --git a/hot_fair_utilities/inference/utils.py b/hot_fair_utilities/inference/utils.py
index 7b5eb5db..bea61bdc 100644
--- a/hot_fair_utilities/inference/utils.py
+++ b/hot_fair_utilities/inference/utils.py
@@ -4,6 +4,7 @@
 import torch
 from PIL import Image
 from tensorflow import keras
+from ultralytics import YOLO
 
 
 IMAGE_SIZE = 256
@@ -30,14 +31,14 @@ def save_mask(mask: np.ndarray, filename: str) -> None:
 
 
 def initialize_model(path, device=None):
-    """Loads either keras or pytorch model."""
+    """Loads either keras or yolo model."""
     if not isinstance(path, str):  # probably loaded model
         return path
 
-    if path.endswith('.pth') or path.endswith('.pt'):  # Pytorch saved checkpoint
+    if path.endswith('.pt'):  # YOLO
         if not device:
             device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        model = torch.load(path, map_location=device)
+        model = YOLO(path).to(device)
     else:
         model = keras.models.load_model(path)
     return model
diff --git a/hot_fair_utilities/model/__init__.py b/hot_fair_utilities/model/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/hot_fair_utilities/model/yolo.py b/hot_fair_utilities/model/yolo.py
new file mode 100644
index 00000000..e1fcfbb8
--- /dev/null
+++ b/hot_fair_utilities/model/yolo.py
@@ -0,0 +1,47 @@
+import torch
+import torch.nn as nn
+import ultralytics
+
+from ultralytics.utils import RANK
+
+
+#
+# Binary cross entropy with p_c
+#
+
+class YOLOSegWithPosWeight(ultralytics.YOLO):
+
+    def train(self, trainer=None, pc=1.0, **kwargs):
+        return super().train(trainer, **{**kwargs, "pose": pc})  # Hide pc inside pose (pose est loss weight arg)
+
+    @property
+    def task_map(self):
+        map = super().task_map
+        map['segment']['model'] = SegmentationModelWithPosWeight
+        map['segment']['trainer'] = SegmentationTrainerWithPosWeight
+        return map
+
+
+class SegmentationTrainerWithPosWeight(ultralytics.models.yolo.segment.train.SegmentationTrainer):
+
+    def get_model(self, cfg=None, weights=None, verbose=True):
+        """Return a YOLO segmentation model."""
+        model = SegmentationModelWithPosWeight(cfg, nc=self.data['nc'], verbose=verbose and RANK == -1)
+        if weights:
+            model.load(weights)
+        return model
+
+
+class SegmentationModelWithPosWeight(ultralytics.models.yolo.segment.train.SegmentationModel):
+
+    def init_criterion(self):
+        return v8SegmentationLossWithPosWeight(model=self)
+
+
+class v8SegmentationLossWithPosWeight(ultralytics.utils.loss.v8SegmentationLoss):
+
+    def __init__(self, model):
+        super().__init__(model)
+        pc = model.args.pose  # hidden in pose arg (used in different task)
+        pos_weight = torch.full((model.nc,), pc).to(self.device)
+        self.bce = nn.BCEWithLogitsLoss(reduction="none", pos_weight=pos_weight)
diff --git a/hot_fair_utilities/preprocessing/multimasks_from_polygons.py b/hot_fair_utilities/preprocessing/multimasks_from_polygons.py
new file mode 100644
index 00000000..d4646c2a
--- /dev/null
+++ b/hot_fair_utilities/preprocessing/multimasks_from_polygons.py
@@ -0,0 +1,123 @@
+# Patched from ramp-code.scripts.multi_masks_from_polygons created for ramp project by carolyn.johnston@dev.global
+
+from pathlib import Path
+from tqdm import tqdm
+from ramp.data_mgmt.chip_label_pairs import get_tq_chip_label_pairs, construct_mask_filepath
+from solaris.vector.mask import crs_is_metric
+from solaris.utils.geo import get_crs
+from solaris.utils.core import _check_rasterio_im_load
+from ramp.utils.multimask_utils import df_to_px_mask, multimask_to_sparse_multimask
+import rasterio as rio
+from ramp.utils.img_utils import to_channels_first
+import geopandas as gpd
+
+
+def get_rasterio_shape_and_transform(image_path):
+    # get the image shape and the affine transform to pass into df_to_px_mask.
+    with rio.open(image_path) as rio_dset:
+        shape = rio_dset.shape
+        transform = rio_dset.transform
+    return shape, transform
+
+
+def multimasks_from_polygons(in_poly_dir, in_chip_dir, out_mask_dir, input_contact_spacing=4, input_boundary_width=2):
+    """
+    Create multichannel building footprint masks from a folder of geojson files.
+    This also requires the path to the matching image chips directory.
+
+    Args:
+        in_poly_dir (str): Path to directory containing geojson files.
+        in_chip_dir (str): Path to directory containing image chip files with names matching geojson files.
+        out_mask_dir (str): Path to directory containing output SDT masks.
+        input_contact_spacing (int, optional): Width in pixel units of boundary class around building footprints,
+            in pixels.
+        input_boundary_width (int, optional): Pixels that are closer to two different polygons than contact_spacing
+            (in pixel units) will be labeled with the contact mask.
+
+    Example:
+        multimasks_from_polygons(
+            "data/preprocessed/labels",
+            "data/preprocessed/chips",
+            "data/preprocessed/multimasks"
+        )
+    """
+
+    # If output mask directory doesn't exist, try to create it.
+    Path(out_mask_dir).mkdir(parents=True, exist_ok=True)
+
+    chip_label_pairs = get_tq_chip_label_pairs(in_chip_dir, in_poly_dir)
+
+    chip_paths, label_paths = list(zip(*chip_label_pairs))
+
+    # construct the output mask file names from the chip file names.
+    # these will have the same base filenames as the chip files,
+    # with a mask.tif extension in place of the .tif extension.
+    mask_paths = [construct_mask_filepath(out_mask_dir, chip_path) for chip_path in chip_paths]
+
+    # construct a list of full paths to the mask files
+    json_chip_mask_zips = zip(label_paths, chip_paths, mask_paths)
+
+    for json_path, chip_path, mask_path in tqdm(json_chip_mask_zips, desc="Multimasks for input"):
+
+        # We will run this on very large directories, and some label files might fail to process.
+        # We want to be able to resume mask creation from where we left off.
+        if Path(mask_path).is_file():
+            continue
+
+        # workaround for bug in solaris
+        mask_shape, mask_transform = get_rasterio_shape_and_transform(chip_path)
+
+        gdf = gpd.read_file(json_path)
+
+        # remove empty and null geometries
+        gdf = gdf[~gdf["geometry"].isna()]
+        gdf = gdf[~gdf.is_empty]
+
+        reference_im = _check_rasterio_im_load(chip_path)
+
+        if get_crs(gdf) != get_crs(reference_im):
+            # BUGFIX: if crs's don't match, reproject the geodataframe
+            gdf = gdf.to_crs(get_crs(reference_im))
+
+        if crs_is_metric(gdf):
+            meters = True
+
+            # CJ 20220824: convert pixels to meters for call to df_to_pix_mask
+            boundary_width = min(reference_im.res) * input_boundary_width
+            contact_spacing = min(reference_im.res) * input_contact_spacing
+        else:
+            meters = False
+            boundary_width = input_boundary_width
+            contact_spacing = input_contact_spacing
+
+        # NOTE: solaris does not support multipolygon geodataframes
+        # So first we call explode() to turn multipolygons into polygon dataframes
+        # ignore_index=True prevents polygons from the same multipolygon from being grouped into a series. -+
+        gdf_poly = gdf.explode(ignore_index=True)
+
+        # multi_mask is a one-hot, channels-last encoded mask
+        onehot_multi_mask = df_to_px_mask(df=gdf_poly,
+                                          out_file=mask_path,
+                                          shape=mask_shape,
+                                          do_transform=True,
+                                          affine_obj=None,
+                                          channels=['footprint', 'boundary', 'contact'],
+                                          reference_im=reference_im,
+                                          boundary_width=boundary_width,
+                                          contact_spacing=contact_spacing,
+                                          out_type="uint8",
+                                          meters=meters)
+
+        # convert onehot_multi_mask to a sparse encoded mask
+        # of shape (1,H,W) for compatibility with rasterio writer
+        sparse_multi_mask = multimask_to_sparse_multimask(onehot_multi_mask)
+        sparse_multi_mask = to_channels_first(sparse_multi_mask)
+
+        # write out sparse mask file with rasterio.
+        with rio.open(chip_path, "r") as src:
+            meta = src.meta.copy()
+            meta.update(count=sparse_multi_mask.shape[0])
+            meta.update(dtype='uint8')
+            meta.update(nodata=None)
+            with rio.open(mask_path, 'w', **meta) as dst:
+                dst.write(sparse_multi_mask)
diff --git a/hot_fair_utilities/preprocessing/preprocess.py b/hot_fair_utilities/preprocessing/preprocess.py
index 4536170f..3e243f43 100644
--- a/hot_fair_utilities/preprocessing/preprocess.py
+++ b/hot_fair_utilities/preprocessing/preprocess.py
@@ -5,6 +5,7 @@
 from .clip_labels import clip_labels
 from .fix_labels import fix_labels
 from .reproject_labels import reproject_labels_to_epsg3857
+from .multimasks_from_polygons import multimasks_from_polygons
 
 
 def preprocess(
@@ -13,6 +14,7 @@ def preprocess(
     rasterize=False,
     rasterize_options=None,
     georeference_images=False,
+    multimasks=False
 ) -> None:
     """Fully preprocess the input data.
 
@@ -29,6 +31,7 @@ def preprocess(
             (if georeference_images=True), and the directories
             "binarymasks" and "grayscale_labels" if the corresponding
             rasterizing options are chosen.
+            "multimasks" - for the multimasks labels (if multimasks=True)
         rasterize: Whether to create the raster labels.
         rasterize_options: A list with options how to rasterize the
             label, if rasterize=True. Possible options: "grayscale"
@@ -37,6 +40,7 @@ def preprocess(
             for the ramp model).
             If rasterize=False, rasterize_options will be ignored.
         georeference_images: Whether to georeference the OAM images.
+        multimasks: Whether to additionally output multimask labels.
 
     Example::
 
@@ -82,3 +86,7 @@ def preprocess(
 
     os.remove(f"{output_path}/corrected_labels.geojson")
     os.remove(f"{output_path}/labels_epsg3857.geojson")
+
+    if multimasks:
+        assert os.path.isdir(f"{output_path}/chips"), "Chips do not exist. Set georeference_images=True."
+        multimasks_from_polygons(f"{output_path}/labels", f"{output_path}/chips", f"{output_path}/multimasks")
diff --git a/hot_fair_utilities/preprocessing/yolo_format.py b/hot_fair_utilities/preprocessing/yolo_format.py
new file mode 100644
index 00000000..2ad55c84
--- /dev/null
+++ b/hot_fair_utilities/preprocessing/yolo_format.py
@@ -0,0 +1,138 @@
+import concurrent.futures
+import cv2
+import numpy as np
+import yaml
+import rasterio
+import random
+import warnings
+import traceback
+from pathlib import Path
+
+
+# Mask types from https://rampml.global/data-preparation/
+CLASS_NAMES = ["footprint", "boundary", "contact"]
+
+
+def yolo_format(preprocessed_dirs, yolo_dir, val_dirs=None, multimask=False, p_val=None):
+    """
+    Creates ultralytics YOLOv5 format dataset from RAMP preprocessed data.
+    Supports either single data directory or multiple directories.
+    For multiple directories, data can be split to train, val.
+    Dataset can be inspected using fiftyone, see
+    https://docs.voxel51.com/user_guide/dataset_creation/datasets.html#yolov5dataset
+
+    Args:
+        preprocessed_dirs (any): Path or list of paths containing
+            directories: "chips", "binarymasks" or "multimasks" from RAMP preprocessing phase.
+        yolo_dir (str): Path where YOLO data will be stored.
+        val_dirs (list, optional): List of paths from preprocessed_dirs that will be added to validation set
+            instead of training set.
+        multimask (bool, optional): If true, multimasks are used instead of binarymasks.
+        p_val (float, optional): Float in [0,1] specifying the probability of an image being added to val.
+            If val_dirs is set, this option has no effect.
+
+    Examples:
+        yolo_format("ramp_sample_1", "yolo")
+        yolo_format(["ramp_sample_1", "ramp_sample_2"], "yolo")
+        yolo_format(["ramp_sample_1"], "yolo", ["ramp_sample_2"])
+    """
+    classes = [1, 2, 3] if multimask else [1]
+    if isinstance(preprocessed_dirs, str):
+        preprocessed_dirs = [preprocessed_dirs]
+    if val_dirs is not None:
+        preprocessed_dirs = list(set(preprocessed_dirs) - set(val_dirs))
+        p_val = None
+    else:
+        val_dirs = []
+    preprocessed_dirs, yolo_dir = [Path(x) for x in preprocessed_dirs], Path(yolo_dir)
+    val_dirs = [Path(x) for x in val_dirs]
+    mask_dirname = Path("multimasks") if multimask else Path("binarymasks")
+
+    preprocessed_dirs_stems = [x.stem for x in preprocessed_dirs]
+    val_dirs_stems = [x.stem for x in val_dirs] if val_dirs is not None else []
+    yolo_dir_suffixes = ["_train", "_val"] if p_val else [""]
+
+    # Save image symlinks and labels
+    for dname, dname_stem in zip(preprocessed_dirs + val_dirs, preprocessed_dirs_stems + val_dirs_stems):
+        img_dir = dname / "chips" if (dname / "chips").is_dir() else dname / "source"
+        mask_dir = dname / mask_dirname
+        yolo_img_dir, yolo_label_dir = yolo_dir / "images" / dname_stem, yolo_dir / "labels" / dname_stem
+
+        for dir in [yolo_img_dir, yolo_label_dir]:
+            for suf in yolo_dir_suffixes:
+                Path(str(dir) + suf).mkdir(parents=True, exist_ok=True)
+
+        files = list(img_dir.iterdir())
+        random.shuffle(files)
+        _image_iteration(files[0], img_dir, mask_dir, yolo_img_dir, yolo_label_dir, classes, 1.0 if p_val else None)
+        with concurrent.futures.ThreadPoolExecutor(max_workers=6) as executor:
+            executor.map(
+                lambda x: __image_iteration_func(x, img_dir, mask_dir, yolo_img_dir, yolo_label_dir, classes, p_val),
+                files[1:]
+            )
+
+    if p_val:
+        val_dirs_stems = [str(p) + "_val" for p in preprocessed_dirs_stems]
+        preprocessed_dirs_stems = [str(p) + "_train" for p in preprocessed_dirs_stems]
+
+    # Save dataset.yaml
+    dataset = {
+        "names": {i-1: name for i, name in zip(classes, CLASS_NAMES[:len(classes)])},
+        "path": str(yolo_dir.absolute()),
+        "train": f"./images/{str(preprocessed_dirs_stems[0])}/" if len(preprocessed_dirs) == 1 else \
+            [f"./images/{str(d)}" for d in preprocessed_dirs_stems],
+    }
+    if len(val_dirs_stems) > 0:
+        dataset["val"] = f"./images/{str(val_dirs_stems[0])}/" if len(val_dirs_stems) == 1 else \
+            [f"./images/{str(d)}" for d in val_dirs_stems]
+    with open(yolo_dir / "dataset.yaml", 'w') as handle:
+        yaml.dump(dataset, handle, default_flow_style=False)
+
+
+def _image_iteration(img, img_dir, mask_dir, yolo_img_dir, yolo_label_dir, classes, p_val):
+    if p_val:
+        if random.uniform(0, 1) > p_val:
+            yolo_img_dir = Path(str(yolo_img_dir) + "_train")
+            yolo_label_dir = Path(str(yolo_label_dir) + "_train")
+        else:
+            yolo_img_dir = Path(str(yolo_img_dir) + "_val")
+            yolo_label_dir = Path(str(yolo_label_dir) + "_val")
+
+    img = img.name
+    mask = Path(str(img)[:-4] + ".mask.tif")
+    assert (mask_dir / mask).exists(), f"{img} does not have its {mask} in {mask_dir}"
+
+    # Image -> symlink
+    if not (yolo_img_dir / img).is_symlink():
+        (yolo_img_dir / img).symlink_to(img_dir / img)
+
+    # Mask -> find contour points, write them to txt
+    with rasterio.open(str(mask_dir / mask)) as handle:
+        data = handle.read()
+    h, w = data.shape[1:]
+    label = str(img)[:-4] + ".txt"
+    with open(yolo_label_dir / label, 'w') as handle:
+        for cls in classes:
+            x = np.where(data == cls, 255, 0).squeeze().astype("uint8")
+            contours, _ = cv2.findContours(x, mode=cv2.RETR_EXTERNAL, method=cv2.CHAIN_APPROX_TC89_KCOS)
+            for contour in contours:  # contour (n, 1, 2)
+                if contour.shape[0] > 2:  # at least 3-point polygon
+                    contour = contour / [w, h]
+                    line = f"{cls - 1} {' '.join([str(c) for c in contour.flatten().tolist()])}\n"
+                    handle.write(line)
+
+
+def __image_iteration_func(img, img_dir, mask_dir, yolo_img_dir, yolo_label_dir, classes, p_val):
+    try:
+        _image_iteration(img, img_dir, mask_dir, yolo_img_dir, yolo_label_dir, classes, p_val)
+    except Exception as e:
+        full_trace = "\n" + " ".join(traceback.format_exception(e))
+        warnings.warn(f"Image {img.name} caused {full_trace}")
+
+
+if __name__ == "__main__":
+    random.seed(0)
+    # root = "/tf/ramp-data/sample_119"
+    root = "/home/powmol/wip/hotosm/fAIr-utilities/ramp-data/sample_119"
+    yolo_format([root + "/preprocessed"], root + "/yolo", multimask=False, p_val=0.05)
+
diff --git a/requirements.txt b/requirements.txt
index faa7e418..7a594796 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -17,4 +17,6 @@ black
 isort
 build
 twine
-ultralytics==8.0.203
+torch==1.12.1
+torchvision==0.13.1
+ultralytics==8.1.6
diff --git a/test_yolo.py b/test_yolo.py
new file mode 100644
index 00000000..09bcb689
--- /dev/null
+++ b/test_yolo.py
@@ -0,0 +1,81 @@
+import os
+import time
+import warnings
+import tensorflow as tf
+
+from hot_fair_utilities import preprocess, predict, polygonize
+from hot_fair_utilities.preprocessing.yolo_format import yolo_format
+from train_yolo import train as train_yolo
+
+warnings.simplefilter(action='ignore', category=FutureWarning)
+
+
+class print_time:
+    def __init__(self, name):
+        self.name = name
+
+    def __enter__(self):
+        self.start = time.perf_counter()
+        return self
+
+    def __exit__(self, type, value, traceback):
+        print(f"{self.name} took {round(time.perf_counter() - self.start, 2)} seconds")
+
+
+print(
+    f"\nUsing tensorflow version {tf.__version__} with no of gpu : {len(tf.config.experimental.list_physical_devices('GPU'))}\n"
+)
+os.environ.update(os.environ)
+os.environ["RAMP_HOME"] = os.getcwd()
+print(os.environ["RAMP_HOME"])
+
+start_time = time.time()
+base_path = f"{os.getcwd()}/ramp-data/sample_2"
+
+model_input_image_path = f"{base_path}/input"
+preprocess_output = f"{base_path}/preprocessed"
+with print_time("preprocessing"):
+    preprocess(
+        input_path=model_input_image_path,
+        output_path=preprocess_output,
+        rasterize=True,
+        rasterize_options=["binary"],
+        georeference_images=True,
+        multimasks=True  # new arg
+    )
+
+yolo_data_dir = f"{base_path}/yolo"
+with print_time("yolo conversion"):
+    yolo_format(
+        preprocessed_dirs=preprocess_output,
+        yolo_dir=yolo_data_dir,
+        multimask=True,
+        p_val=0.05
+    )
+
+train_yolo(data=f"{base_path}",
+               weights=f"{os.getcwd()}/checkpoints/yolov8n-seg_ramp-training_ep500_bs16_deg30_pc2.0/weights/best.pt",
+               gpu="cpu",
+               epochs=2,
+               batch_size=16,
+               pc=2.0
+               )
+
+prediction_output = f"{base_path}/prediction/output"
+model_path = f"{os.getcwd()}/checkpoints/yolov8n-seg_sample_2_ep2_bs16_pc2.0/weights/best.pt"
+with print_time("inference"):
+    predict(
+        checkpoint_path=model_path,
+        input_path=f"{base_path}/prediction/input",
+        prediction_path=prediction_output,
+    )
+
+geojson_output = f"{prediction_output}/prediction.geojson"
+with print_time("polygonization"):
+    polygonize(
+        input_path=prediction_output,
+        output_path=geojson_output,
+        remove_inputs=False,
+    )
+
+print(f"\n Total Process Completed in : {time.time()-start_time} sec")
diff --git a/train_yolo.py b/train_yolo.py
new file mode 100644
index 00000000..4725b1b0
--- /dev/null
+++ b/train_yolo.py
@@ -0,0 +1,96 @@
+import argparse
+import torch
+import os
+import ultralytics
+from pathlib import Path
+
+from hot_fair_utilities.model.yolo import YOLOSegWithPosWeight
+
+
+ROOT = Path(__file__).parent.absolute()
+DATA_ROOT = str(ROOT / "ramp-training")
+LOGS_ROOT = str(ROOT / "checkpoints")
+
+
+#
+# Different hyperparameters from default in YOLOv8 release models
+# https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/default.yaml
+#
+
+HYPERPARAM_CHANGES = {
+    "imgsz": 256,
+    "mosaic": 0.0,
+    "overlap_mask": False,
+    "cls": 0.5,
+    "degrees": 30.0,
+    # "optimizer": "SGD",
+    # "weight_decay": 0.001,
+}
+
+
+# torch.set_float32_matmul_precision("high")
+
+
+def parse_opt():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--gpu', type=str, default="0", help='GPU id')
+    parser.add_argument('--data', type=str, default=os.path.join(DATA_ROOT),
+                        help='Directory containing diractory \'yolo\' with dataset.yaml.')
+    parser.add_argument('--weights', type=str, default="yolov8n-seg.yaml",
+                        help='See https://docs.ultralytics.com/tasks/detect/#train')
+    parser.add_argument('--epochs', type=int, default=100,
+                        help='Num of training epochs. Default is 100.')
+    parser.add_argument('--batch-size', type=int, default=16,
+                        help='Number of images in a single batch.')
+    parser.add_argument('--pc', type=float, default=1.0,
+                        help='Positive weight in BCE loss. pc > 1 (pc < 1) encourages higher recall (precision)')
+    opt = parser.parse_args()
+    return opt
+
+
+def main():
+    opt = parse_opt()
+    os.environ["CUDA_VISIBLE_DEVICES"] = str(opt.gpu)
+    print(f"GPU available: {torch.cuda.is_available()}, GPU count: {torch.cuda.device_count()}")
+    train(**vars(opt))
+
+
+def train(data, weights, gpu, epochs, batch_size, pc):
+    back = "n" if "yolov8n" in weights else "s" if "yolov8s" in weights else "m" if "yolov8m" in weights else "?"
+    data_scn = str(Path(data) / "yolo" / "dataset.yaml")
+    dataset = data_scn.split("/")[-3]
+    kwargs = HYPERPARAM_CHANGES
+
+    print(f"Backbone: {back}, Dataset: {dataset}, Epochs: {epochs}")
+    name = f"yolov8{back}-seg_{dataset}_ep{epochs}_bs{batch_size}"
+    if float(pc) != 0.0:
+        name += f"_pc{pc}"
+        kwargs = {**kwargs, "pc": pc}
+        yolo = YOLOSegWithPosWeight
+    else:
+        yolo = ultralytics.YOLO
+
+    weights, resume = check4checkpoint(name, weights)
+    model = yolo(weights)
+    model.train(
+        data=data_scn,
+        project=LOGS_ROOT,
+        name=name,
+        epochs=int(epochs),
+        resume=resume,
+        deterministic=False,
+        device=[int(i) for i in gpu.split(",")] if "," in gpu else gpu,
+        **kwargs
+    )
+
+
+def check4checkpoint(name, weights):
+    ckpt = os.path.join(LOGS_ROOT, name, "weights", "last.pt")
+    if os.path.exists(ckpt):
+        print(f"Set weights to {ckpt}")
+        return ckpt, True
+    return weights, False
+
+
+if __name__ == "__main__":
+    main()

From b690ced35645c2e1bcb53a2e00431f966de8da32 Mon Sep 17 00:00:00 2001
From: Albert Mohwald <albert.mohwald@pm.me>
Date: Fri, 8 Mar 2024 18:24:28 +0100
Subject: [PATCH 5/5] fix(postprocessing/utils): resolve OAM-x-y-z.mask.tif

---
 hot_fair_utilities/postprocessing/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hot_fair_utilities/postprocessing/utils.py b/hot_fair_utilities/postprocessing/utils.py
index 3bc82384..7c3bfa83 100644
--- a/hot_fair_utilities/postprocessing/utils.py
+++ b/hot_fair_utilities/postprocessing/utils.py
@@ -24,6 +24,7 @@ def tiles_from_directory(dir_path):
     """
     for path in glob(f"{dir_path}/*"):
         _, *tile_info = re.split("-", Path(path).stem)
+        tile_info[-1] = tile_info[-1].replace(".mask", "")  # resolve OAM-x-y-z.mask.tif
         x, y, z = map(int, tile_info)
         tile = mercantile.Tile(x=x, y=y, z=z)
         yield tile, path