Skip to content

Commit

Permalink
Fix bug on preprocessing for yolo
Browse files Browse the repository at this point in the history
  • Loading branch information
kshitijrajsharma committed Nov 2, 2024
1 parent 0b51a80 commit a9bf7a6
Show file tree
Hide file tree
Showing 11 changed files with 104 additions and 93 deletions.
6 changes: 3 additions & 3 deletions hot_fair_utilities/georeferencing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from .utils import get_bounding_box


def georeference(input_path: str, output_path: str, is_mask=False) -> None:
def georeference(input_path: str, output_path: str, is_mask=False,epsg=3857) -> None:
"""Perform georeferencing and remove the fourth band from images (if any).
CRS of the georeferenced images will be EPSG:3857 ('WGS 84 / Pseudo-Mercator').
Expand All @@ -38,7 +38,7 @@ def georeference(input_path: str, output_path: str, is_mask=False) -> None:
out_file = f"{output_path}/{filename}.tif"

# Get bounding box in EPSG:3857
x_min, y_min, x_max, y_max = get_bounding_box(filename)
x_min, y_min, x_max, y_max = get_bounding_box(filename,epsg=epsg)

# Use one band for masks and the first three bands for images
bands = [1] if is_mask else [1, 2, 3]
Expand All @@ -51,7 +51,7 @@ def georeference(input_path: str, output_path: str, is_mask=False) -> None:
format="GTiff",
bandList=bands,
outputBounds=[x_min, y_max, x_max, y_min],
outputSRS="EPSG:3857",
outputSRS=f"EPSG:{epsg}",
)
# Close dataset
_ = None
11 changes: 7 additions & 4 deletions hot_fair_utilities/preprocessing/clip_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@


def clip_labels(
input_path: str, output_path: str, rasterize=False, rasterize_options=None
input_path: str, output_path: str, rasterize=False, rasterize_options=None,all_geojson_file=None,epsg=3857
) -> None:
"""Clip and rasterize the GeoJSON labels for each aerial image.
Expand Down Expand Up @@ -71,11 +71,14 @@ def clip_labels(
glob(f"{input_path}/*.png"), desc=f"Clipping labels for {Path(input_path).stem}"
):
filename = Path(path).stem
geojson_file_all_labels = f"{output_path}/labels_epsg3857.geojson"
if all_geojson_file:
geojson_file_all_labels=all_geojson_file
else :
geojson_file_all_labels = f"{output_path}/labels_epsg3857.geojson"
clipped_geojson_file = f"{output_geojson_path}/{filename}.geojson"

# Bounding box as a tuple
x_min, y_min, x_max, y_max = get_bounding_box(filename)
x_min, y_min, x_max, y_max = get_bounding_box(filename,epsg=epsg)
# Bounding box as a polygon
bounding_box_polygon = box(x_min, y_min, x_max, y_max)

Expand All @@ -87,7 +90,7 @@ def clip_labels(
gdf_clipped.to_file(clipped_geojson_file)
else:
schema = {"geometry": "Polygon", "properties": {"id": "int"}}
crs = "EPSG:3857"
crs = f"EPSG:{epsg}"
gdf_clipped.to_file(clipped_geojson_file, schema=schema, crs=crs)

# Rasterizing
Expand Down
19 changes: 11 additions & 8 deletions hot_fair_utilities/preprocessing/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def preprocess(
multimasks=False,
input_contact_spacing=8, # only required if multimasks is set to true
input_boundary_width=3, # only required if mulltimasks is set to true
epsg=3857,
) -> None:
"""Fully preprocess the input data.
Expand Down Expand Up @@ -63,6 +64,7 @@ def preprocess(
)
"""
# Check if rasterizing options are valid
assert epsg in (4326,3857),"Projection not supported"
if rasterize:
assert (
rasterize_options is not None
Expand All @@ -80,22 +82,23 @@ def preprocess(
os.makedirs(output_path, exist_ok=True)

if georeference_images:
georeference(input_path, f"{output_path}/chips")
georeference(input_path, f"{output_path}/chips",epsg=epsg)

fix_labels(
f"{input_path}/labels.geojson",
f"{output_path}/corrected_labels.geojson",
)
if epsg==3857:
reproject_labels_to_epsg3857(
f"{output_path}/corrected_labels.geojson",
f"{output_path}/labels_epsg3857.geojson",
)

reproject_labels_to_epsg3857(
f"{output_path}/corrected_labels.geojson",
f"{output_path}/labels_epsg3857.geojson",
)

clip_labels(input_path, output_path, rasterize, rasterize_options)
clip_labels(input_path, output_path, rasterize, rasterize_options,all_geojson_file=f"{output_path}/corrected_labels.geojson" if epsg==4326 else f"{output_path}/labels_epsg3857.geojson",epsg=epsg)

os.remove(f"{output_path}/corrected_labels.geojson")
os.remove(f"{output_path}/labels_epsg3857.geojson")
if epsg==3857:
os.remove(f"{output_path}/labels_epsg3857.geojson")

if multimasks:

Expand Down
4 changes: 2 additions & 2 deletions hot_fair_utilities/preprocessing/yolo_v8_v1/yolo_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def yolo_format(
val_dirs_stems = [str(p) + "_val" for p in preprocessed_dirs_stems]
preprocessed_dirs_stems = [str(p) + "_train" for p in preprocessed_dirs_stems]

# Save dataset.yaml
# Save yolo_dataset.yaml
dataset = {
"names": {i - 1: name for i, name in zip(classes, CLASS_NAMES[: len(classes)])},
"path": str(yolo_dir.absolute()),
Expand All @@ -110,7 +110,7 @@ def yolo_format(
if len(val_dirs_stems) == 1
else [f"./images/{str(d)}" for d in val_dirs_stems]
)
with open(yolo_dir / "dataset.yaml", "w") as handle:
with open(yolo_dir / "yolo_dataset.yaml", "w") as handle:
yaml.dump(dataset, handle, default_flow_style=False)


Expand Down
9 changes: 5 additions & 4 deletions hot_fair_utilities/preprocessing/yolo_v8_v2/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,10 @@ def convert_coordinates(coordinates, geo_dict):
return coordinates






def write_yolo_file(iwp, folder, output_path, class_index=0):
"""
Writes YOLO label file based on the given image with path and class index.
Expand All @@ -217,10 +221,7 @@ def write_yolo_file(iwp, folder, output_path, class_index=0):
lwp = iwp.replace(".tif", ".geojson").replace("chips", "labels")

# Create the YOLO label filename with path from the chip filename with path
ywp = os.path.join(output_path, iwp.split("/")[-1].replace(".tif", ".txt")).replace(
"folder", folder
)

ywp = os.path.join(output_path,'labels',folder, os.path.basename(iwp).replace(".tif", ".txt"))
# Create the YOLO label folder if it does not exist
os.makedirs(os.path.dirname(ywp), exist_ok=True)

Expand Down
96 changes: 48 additions & 48 deletions hot_fair_utilities/preprocessing/yolo_v8_v2/yolo_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np
import yaml
from tqdm import tqdm

import shutil
from .utils import convert_tif_to_jpg, write_yolo_file


Expand Down Expand Up @@ -68,53 +68,53 @@ def yolo_format(
print(f"Test array size: {len(test_cwps)}\n")

# Check if the YOLO folder exists, if not create labels, images, and folders
if not os.path.exists(output_path):
os.makedirs(output_path)

# Write the YOLO label files for the training set
print("Generating training labels")
for train_cwp in tqdm(train_cwps):
write_yolo_file(train_cwp, "train", output_path)

# Write the YOLO label files for the validation set
print("Generating validation labels")
for val_cwp in tqdm(val_cwps):
write_yolo_file(val_cwp, "val", output_path)

# Write the YOLO label files for the test set
print("Generating test labels")
for test_cwp in tqdm(test_cwps):
write_yolo_file(test_cwp, "test", output_path)

# Convert the chip files to JPEG format
print("Generating training images")
for train_cwp in tqdm(train_cwps):
convert_tif_to_jpg(train_cwp, "train", output_path)

print("Generating validation images")
for val_cwp in tqdm(val_cwps):
convert_tif_to_jpg(val_cwp, "val", output_path)

print("Generating test images")
for test_cwp in tqdm(test_cwps):
convert_tif_to_jpg(test_cwp, "test", output_path)

attr = {
"path": output_path,
"train": "images/train",
"val": "images/val",
"names": {0: 1},
}
# os.makedirs(os.path.join(output_path, "yolo"), exist_ok=True)

YAML_PATH = os.path.join(output_path, "dataset.yaml")
print(f"Writing the data file with path={YAML_PATH}")
# Write the file
with open(YAML_PATH, "w") as f:
yaml.dump(attr, f)

else:
print("Data already converted")
if os.path.exists(output_path):
shutil.rmtree(output_path)

os.makedirs(output_path)

# Write the YOLO label files for the training set
print("Generating training labels")
for train_cwp in tqdm(train_cwps):
write_yolo_file(train_cwp, "train", output_path)

# Write the YOLO label files for the validation set
print("Generating validation labels")
for val_cwp in tqdm(val_cwps):
write_yolo_file(val_cwp, "val", output_path)

# Write the YOLO label files for the test set
print("Generating test labels")
for test_cwp in tqdm(test_cwps):
write_yolo_file(test_cwp, "test", output_path)

# Convert the chip files to JPEG format
print("Generating training images")
for train_cwp in tqdm(train_cwps):
convert_tif_to_jpg(train_cwp, "train", output_path)

print("Generating validation images")
for val_cwp in tqdm(val_cwps):
convert_tif_to_jpg(val_cwp, "val", output_path)

print("Generating test images")
for test_cwp in tqdm(test_cwps):
convert_tif_to_jpg(test_cwp, "test", output_path)

attr = {
"path": output_path,
"train": "images/train",
"val": "images/val",
"names": {0: 1},
}
# os.makedirs(os.path.join(output_path, "yolo"), exist_ok=True)

YAML_PATH = os.path.join(output_path, "yolo_dataset.yaml")
print(f"Writing the data file with path={YAML_PATH}")
# Write the file
with open(YAML_PATH, "w") as f:
yaml.dump(attr, f)



def find_files(data_folders):
Expand Down
15 changes: 7 additions & 8 deletions hot_fair_utilities/training/yolo_v8_v1/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def parse_opt():
"--data",
type=str,
default=DATA_ROOT, # Using the environment variable with fallback
help="Directory containing directory 'yolo' with dataset.yaml.",
help="Directory containing directory 'yolo' with yolo_dataset.yaml.",
)
parser.add_argument(
"--weights",
Expand Down Expand Up @@ -94,26 +94,25 @@ def train(
if "yolov8n" in weights
else "s" if "yolov8s" in weights else "m" if "yolov8m" in weights else "?"
)
data_scn = str(Path(data) / "yolo" / "dataset.yaml")
data_scn = dataset_yaml_path
dataset = data_scn.split("/")[-3]
kwargs = HYPERPARAM_CHANGES
print(f"Backbone: {back}, Dataset: {dataset}, Epochs: {epochs}")

name = f"yolov8{back}-seg_{dataset}_ep{epochs}_bs{batch_size}"
if output_path:
name = output_path

if float(pc) != 0.0:
name += f"_pc{pc}"
kwargs = {**kwargs, "pc": pc}
yolo = YOLOSegWithPosWeight
else:
yolo = ultralytics.YOLO

weights, resume = check4checkpoint(name, weights)
weights, resume = check4checkpoint(name, weights,output_path)
model = yolo(weights)
model.train(
data=data_scn,
project=LOGS_ROOT, # Using the environment variable with fallback
project=os.path.join(output_path,'checkpoints'),
name=name,
epochs=int(epochs),
resume=resume,
Expand All @@ -124,8 +123,8 @@ def train(
return weights


def check4checkpoint(name, weights):
ckpt = os.path.join(LOGS_ROOT, name, "weights", "last.pt")
def check4checkpoint(name, weights,output_path):
ckpt = os.path.join(os.path.join(output_path,'checkpoints'), name, "weights", "last.pt")
if os.path.exists(ckpt):
print(f"Set weights to {ckpt}")
return ckpt, True
Expand Down
16 changes: 7 additions & 9 deletions hot_fair_utilities/training/yolo_v8_v2/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

ROOT = Path(os.getenv("YOLO_ROOT", Path(__file__).parent.absolute()))
DATA_ROOT = str(Path(os.getenv("YOLO_DATA_ROOT", ROOT / "yolo-training")))
LOGS_ROOT = str(Path(os.getenv("YOLO_LOGS_ROOT", ROOT / "checkpoints")))


HYPERPARAM_CHANGES = {
Expand Down Expand Up @@ -52,32 +51,31 @@
}


def train(data, weights, gpu, epochs, batch_size, pc, output_path=None):
def train(data, weights, gpu, epochs, batch_size, pc, output_path, dataset_yaml_path):
back = (
"n"
if "yolov8n" in weights
else "s" if "yolov8s" in weights else "m" if "yolov8m" in weights else "?"
)
data_scn = str(Path(data) / "yolo" / "dataset.yaml")
data_scn = dataset_yaml_path
dataset = data_scn.split("/")[-3]
kwargs = HYPERPARAM_CHANGES
print(f"Backbone: {back}, Dataset: {dataset}, Epochs: {epochs}")

name = f"yolov8{back}-seg_{dataset}_ep{epochs}_bs{batch_size}"
if output_path:
name = output_path

if float(pc) != 0.0:
name += f"_pc{pc}"
kwargs = {**kwargs, "pc": pc}
yolo = YOLOSegWithPosWeight
else:
yolo = ultralytics.YOLO

weights, resume = check4checkpoint(name, weights)
weights, resume = check4checkpoint(name, weights,output_path)
model = yolo(weights)
model.train(
data=data_scn,
project=LOGS_ROOT, # Using the environment variable with fallback
project=os.path.join(output_path,"checkpoints"), # Using the environment variable with fallback
name=name,
epochs=int(epochs),
resume=resume,
Expand All @@ -88,8 +86,8 @@ def train(data, weights, gpu, epochs, batch_size, pc, output_path=None):
return weights


def check4checkpoint(name, weights):
ckpt = os.path.join(LOGS_ROOT, name, "weights", "last.pt")
def check4checkpoint(name, weights,output_path):
ckpt = os.path.join(os.path.join(output_path,"checkpoints"), name, "weights", "last.pt")
if os.path.exists(ckpt):
print(f"Set weights to {ckpt}")
return ckpt, True
Expand Down
5 changes: 3 additions & 2 deletions hot_fair_utilities/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def get_prefix(path: str) -> str:
return os.path.splitext(filename)[0]


def get_bounding_box(filename: str) -> Tuple[float, float, float, float]:
def get_bounding_box(filename: str,epsg=3857) -> Tuple[float, float, float, float]:
"""Get the EPSG:3857 coordinates of bounding box for the OAM image.
This function gives the coordinates of lower left and upper right
Expand All @@ -49,7 +49,8 @@ def get_bounding_box(filename: str) -> Tuple[float, float, float, float]:
gdf_4326 = geopandas.GeoDataFrame({"geometry": [box_4326]}, crs="EPSG:4326")

# Reproject to EPSG:3857
gdf_3857 = gdf_4326.to_crs("EPSG:3857")

gdf_3857 = gdf_4326.to_crs(f"EPSG:{epsg}")

# Bounding box in EPSG:3857 as a tuple (x_min, y_min, x_max, y_max)
box_3857 = gdf_3857.iloc[0, 0].bounds
Expand Down
Loading

0 comments on commit a9bf7a6

Please sign in to comment.