From c7e469ffc6a3cd7525df634c170bd92bcd0eb2d9 Mon Sep 17 00:00:00 2001
From: bw4sz <benweinstein2010@gmail.com>
Date: Tue, 17 Dec 2024 15:13:16 -0800
Subject: [PATCH 01/23] Work in progress to refactor evaluation epoch end with
 attention paid to empty frames

---
 src/deepforest/main.py | 59 ++++++++++++++++++++++++--------------
 tests/test_main.py     | 64 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 102 insertions(+), 21 deletions(-)

diff --git a/src/deepforest/main.py b/src/deepforest/main.py
index f2f1695d..6c1a6312 100644
--- a/src/deepforest/main.py
+++ b/src/deepforest/main.py
@@ -665,7 +665,7 @@ def validation_step(self, batch, batch_idx):
             print("Empty batch encountered, skipping")
             return None
 
-        # Get loss from "train" mode, but don't allow optimization
+        # Get loss from "train" mode, but don't allow optimization. Torchvision has a 'train' mode that returns a loss and a 'eval' mode that returns predictions. The names are confusing, but this is the correct way to get the loss.
         self.model.train()
         with torch.no_grad():
             loss_dict = self.model.forward(images, targets)
@@ -674,6 +674,7 @@ def validation_step(self, batch, batch_idx):
         losses = sum([loss for loss in loss_dict.values()])
 
         self.model.eval()
+        # Can we avoid another forward pass here? https://discuss.pytorch.org/t/how-to-get-losses-and-predictions-at-the-same-time/167223
         preds = self.model.forward(images)
 
         # Calculate intersection-over-union
@@ -682,7 +683,10 @@ def validation_step(self, batch, batch_idx):
 
         # Log loss
         for key, value in loss_dict.items():
-            self.log("val_{}".format(key), value, on_epoch=True)
+            try:
+                self.log("val_{}".format(key), value, on_epoch=True)
+            except:
+                pass
 
         for index, result in enumerate(preds):
             # Skip empty predictions
@@ -698,15 +702,24 @@ def on_validation_epoch_start(self):
         self.predictions = []
 
     def on_validation_epoch_end(self):
+        """Compute metrics"""
+
         output = self.iou_metric.compute()
-        self.log_dict(output)
-        self.iou_metric.reset()
+        try:
+            # This is a bug in lightning, it claims this is a warning but it is not. https://github.com/Lightning-AI/pytorch-lightning/pull/9733/files
+            self.log_dict(output)
+        except:
+            pass
 
+        self.iou_metric.reset()
         output = self.mAP_metric.compute()
 
         # Remove classes from output dict
         output = {key: value for key, value in output.items() if not key == "classes"}
-        self.log_dict(output)
+        try:
+            self.log_dict(output)
+        except:
+            pass
         self.mAP_metric.reset()
 
         if len(self.predictions) == 0:
@@ -739,22 +752,26 @@ def on_validation_epoch_end(self):
                     numeric_to_label_dict=self.numeric_to_label_dict)
 
                 # Log each key value pair of the results dict
-                for key, value in results.items():
-                    if key in ["class_recall"]:
-                        for index, row in value.iterrows():
-                            self.log(
-                                "{}_Recall".format(
-                                    self.numeric_to_label_dict[row["label"]]),
-                                row["recall"])
-                            self.log(
-                                "{}_Precision".format(
-                                    self.numeric_to_label_dict[row["label"]]),
-                                row["precision"])
-                    else:
-                        try:
-                            self.log(key, value)
-                        except:
-                            pass
+                if not results["class_recall"] is None:
+                    for key, value in results.items():
+                        if key in ["class_recall"]:
+                            for index, row in value.iterrows():
+                                try:
+                                    self.log(
+                                        "{}_Recall".format(
+                                            self.numeric_to_label_dict[row["label"]]),
+                                        row["recall"])
+                                    self.log(
+                                        "{}_Precision".format(
+                                            self.numeric_to_label_dict[row["label"]]),
+                                        row["precision"])
+                                except:
+                                    pass
+                        else:
+                            try:
+                                self.log(key, value)
+                            except:
+                                pass
 
     def predict_step(self, batch, batch_idx):
         batch_results = self.model(batch)
diff --git a/tests/test_main.py b/tests/test_main.py
index 13d10826..a1a038f9 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -15,6 +15,8 @@
 from albumentations.pytorch import ToTensorV2
 
 from deepforest import main, get_data, dataset, model
+from deepforest.visualize import format_geometry
+
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks import Callback
 from pytorch_lightning.loggers import TensorBoardLogger
@@ -165,6 +167,26 @@ def test_train_empty(m, tmpdir):
 
 
 def test_validation_step(m):
+    val_dataloader = m.val_dataloader()
+    batch = next(iter(val_dataloader))
+    m.predictions = []
+    val_loss = m.validation_step(batch, 0)
+    assert val_loss != 0
+
+def test_validation_step_empty():
+    """If the model returns an empty prediction, the metrics should not fail"""
+    m = main.deepforest()
+    m.config["validation"]["csv_file"] = get_data("example.csv")
+    m.config["validation"]["root_dir"] = os.path.dirname(get_data("example.csv"))
+    m.create_trainer()
+
+    val_dataloader = m.val_dataloader()
+    batch = next(iter(val_dataloader))
+    m.predictions = []
+    val_loss = m.validation_step(batch, 0)
+    assert len(m.predictions) == 0
+
+def test_validate(m):
     m.trainer = None
     # Turn off trainer to test copying on some linux devices.
     before = copy.deepcopy(m)
@@ -677,6 +699,7 @@ def test_predict_tile_with_crop_model_empty():
     # Assert the result
     assert result is None
 
+<<<<<<< HEAD
 
 # @pytest.mark.parametrize("batch_size", [1, 4, 8])
 # def test_batch_prediction(m, batch_size, raster_path):
@@ -779,3 +802,44 @@ def test_predict_tile_with_crop_model_empty():
 #             "xmin", "ymin", "xmax", "ymax", "label", "score", "geometry"
 #         }
 #         assert not batch_pred.empty
+=======
+def test_epoch_evaluation_end(m):
+    preds = [{
+        'boxes': torch.tensor([
+            [690.3572, 902.9113, 781.1031, 996.5151],
+            [998.1990, 655.7919, 172.4619, 321.8518]
+        ]),
+        'scores': torch.tensor([
+            0.6740, 0.6625
+        ]),
+        'labels': torch.tensor([
+            0, 0
+        ])
+    }]
+    targets = preds
+
+    m.iou_metric.update(preds, targets)
+    m.mAP_metric.update(preds, targets)
+
+    boxes = format_geometry(preds[0])
+    boxes["image_path"] = "test"
+    m.predictions = [boxes]
+    m.on_validation_epoch_end()
+
+def test_epoch_evaluation_end_empty(m):
+    """If the model returns an empty prediction, the metrics should not fail"""
+    preds = [{
+        'boxes': torch.zeros((1, 4)),
+        'scores': torch.zeros(1),
+        'labels': torch.zeros(1, dtype=torch.int64)
+    }]
+    targets = preds
+
+    m.iou_metric.update(preds, targets)
+    m.mAP_metric.update(preds, targets)
+
+    boxes = format_geometry(preds[0])
+    boxes["image_path"] = "test"
+    m.predictions = [boxes]
+    m.on_validation_epoch_end()
+>>>>>>> Work in progress to refactor evaluation epoch end with attention paid to empty frames

From 01f5f4f298a4f9eabce5228423e2a1ae3e4833c0 Mon Sep 17 00:00:00 2001
From: bw4sz <benweinstein2010@gmail.com>
Date: Tue, 17 Dec 2024 17:01:51 -0800
Subject: [PATCH 02/23] docs

---
 src/deepforest/main.py | 2 +-
 tests/test_main.py     | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/deepforest/main.py b/src/deepforest/main.py
index 6c1a6312..047d8d93 100644
--- a/src/deepforest/main.py
+++ b/src/deepforest/main.py
@@ -702,7 +702,7 @@ def on_validation_epoch_start(self):
         self.predictions = []
 
     def on_validation_epoch_end(self):
-        """Compute metrics"""
+        """Compute metrics."""
 
         output = self.iou_metric.compute()
         try:
diff --git a/tests/test_main.py b/tests/test_main.py
index a1a038f9..278e6f73 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -173,6 +173,7 @@ def test_validation_step(m):
     val_loss = m.validation_step(batch, 0)
     assert val_loss != 0
 
+"
 def test_validation_step_empty():
     """If the model returns an empty prediction, the metrics should not fail"""
     m = main.deepforest()
@@ -186,6 +187,8 @@ def test_validation_step_empty():
     val_loss = m.validation_step(batch, 0)
     assert len(m.predictions) == 0
 
+    assert m.iou_metric.compute()["iou"] is None
+
 def test_validate(m):
     m.trainer = None
     # Turn off trainer to test copying on some linux devices.

From 87ab1fa0d41d02501db1f803eb12ea54891a5b77 Mon Sep 17 00:00:00 2001
From: bw4sz <benweinstein2010@gmail.com>
Date: Tue, 17 Dec 2024 17:02:25 -0800
Subject: [PATCH 03/23] docs

---
 tests/test_main.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/test_main.py b/tests/test_main.py
index 278e6f73..93a139ce 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -173,7 +173,6 @@ def test_validation_step(m):
     val_loss = m.validation_step(batch, 0)
     assert val_loss != 0
 
-"
 def test_validation_step_empty():
     """If the model returns an empty prediction, the metrics should not fail"""
     m = main.deepforest()
@@ -186,7 +185,6 @@ def test_validation_step_empty():
     m.predictions = []
     val_loss = m.validation_step(batch, 0)
     assert len(m.predictions) == 0
-
     assert m.iou_metric.compute()["iou"] is None
 
 def test_validate(m):

From bf7e98695aac913c706ecca50735ac69f5a01e8d Mon Sep 17 00:00:00 2001
From: bw4sz <benweinstein2010@gmail.com>
Date: Wed, 18 Dec 2024 10:59:08 -0800
Subject: [PATCH 04/23] test wasn't asserted correctly

---
 tests/test_main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_main.py b/tests/test_main.py
index 93a139ce..2cb36913 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -185,7 +185,7 @@ def test_validation_step_empty():
     m.predictions = []
     val_loss = m.validation_step(batch, 0)
     assert len(m.predictions) == 0
-    assert m.iou_metric.compute()["iou"] is None
+    assert torch.isnan(m.iou_metric.compute()["iou"])
 
 def test_validate(m):
     m.trainer = None

From 71a42d7cc24dc4b6609f987aa4bf4d92678d9828 Mon Sep 17 00:00:00 2001
From: bw4sz <benweinstein2010@gmail.com>
Date: Wed, 18 Dec 2024 11:46:23 -0800
Subject: [PATCH 05/23] upgrade torchmetrics

---
 tests/test_main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_main.py b/tests/test_main.py
index 2cb36913..8f19bd82 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -185,7 +185,7 @@ def test_validation_step_empty():
     m.predictions = []
     val_loss = m.validation_step(batch, 0)
     assert len(m.predictions) == 0
-    assert torch.isnan(m.iou_metric.compute()["iou"])
+    assert m.iou_metric.compute()["iou"] == 0
 
 def test_validate(m):
     m.trainer = None

From d3c7e2fa62229be9608f15b288b3ffdad4c09cb4 Mon Sep 17 00:00:00 2001
From: bw4sz <benweinstein2010@gmail.com>
Date: Fri, 20 Dec 2024 19:52:09 -0800
Subject: [PATCH 06/23] add validation tests for empty frame accuracy

---
 src/deepforest/main.py | 68 ++++++++++++++++++++++++++++++++++++++++--
 tests/test_main.py     | 40 ++++++++++++++++++++++---
 2 files changed, 101 insertions(+), 7 deletions(-)

diff --git a/src/deepforest/main.py b/src/deepforest/main.py
index 047d8d93..c05b66e1 100644
--- a/src/deepforest/main.py
+++ b/src/deepforest/main.py
@@ -4,7 +4,6 @@
 import typing
 import warnings
 
-import geopandas as gpd
 import numpy as np
 import pandas as pd
 import pytorch_lightning as pl
@@ -14,6 +13,8 @@
 from pytorch_lightning.callbacks import LearningRateMonitor
 from torch import optim
 from torchmetrics.detection import IntersectionOverUnion, MeanAveragePrecision
+from torchmetrics.classification import BinaryAccuracy
+
 from huggingface_hub import PyTorchModelHubMixin
 from deepforest import dataset, visualize, get_data, utilities, predict
 from deepforest import evaluate as evaluate_iou
@@ -97,6 +98,9 @@ def __init__(self,
             class_metrics=True, iou_threshold=self.config["validation"]["iou_threshold"])
         self.mAP_metric = MeanAveragePrecision()
 
+        # Empty frame accuracy
+        self.empty_frame_accuracy = BinaryAccuracy()
+
         # Create a default trainer.
         self.create_trainer()
 
@@ -642,7 +646,6 @@ def training_step(self, batch, batch_idx):
 
         # allow for empty data if data augmentation is generated
         path, images, targets = batch
-
         loss_dict = self.model.forward(images, targets)
 
         # sum of regression and classification loss
@@ -701,6 +704,52 @@ def validation_step(self, batch, batch_idx):
     def on_validation_epoch_start(self):
         self.predictions = []
 
+    def calculate_empty_frame_accuracy(self, ground_df, predictions_df):
+        """Calculate accuracy for empty frames (frames with no objects).
+
+        Args:
+            ground_df (pd.DataFrame): Ground truth dataframe containing image paths and bounding boxes.
+                Must have columns 'image_path', 'xmin', 'ymin', 'xmax', 'ymax'.
+            predictions_df (pd.DataFrame): Model predictions dataframe containing image paths and predicted boxes.
+                Must have column 'image_path'.
+
+        Returns:
+            float: Accuracy score for empty frame detection. A score of 1.0 means the model correctly
+                identified all empty frames (no false positives), while 0.0 means it predicted objects
+                in all empty frames (all false positives).
+            None: If there are no empty frames, return None
+        """
+        # Find images that are marked as empty in ground truth (all coordinates are 0)
+        empty_images = ground_df.loc[
+            (ground_df.xmin == 0) & 
+            (ground_df.ymin == 0) & 
+            (ground_df.xmax == 0) & 
+            (ground_df.ymax == 0), 
+            "image_path"
+        ].unique()
+
+        if len(empty_images) == 0:
+            return None
+
+        # Get predictions for empty images
+        empty_predictions = predictions_df.loc[predictions_df.image_path.isin(empty_images)]
+
+        # Create prediction tensor - 1 if model predicted objects, 0 if predicted empty
+        predictions = torch.zeros(len(empty_images))
+        for index, image in enumerate(empty_images):
+            if len(empty_predictions.loc[empty_predictions.image_path == image]) > 0:
+                predictions[index] = 1
+
+        # Ground truth tensor - all zeros since these are empty frames
+        gt = torch.zeros(len(empty_images))
+        predictions = torch.tensor(predictions)
+
+        # Calculate accuracy using metric
+        self.empty_frame_accuracy.update(predictions, gt)
+        empty_accuracy = self.empty_frame_accuracy.compute()
+
+        return empty_accuracy
+
     def on_validation_epoch_end(self):
         """Compute metrics."""
 
@@ -733,8 +782,18 @@ def on_validation_epoch_end(self):
             ground_df = utilities.read_file(self.config["validation"]["csv_file"])
             ground_df["label"] = ground_df.label.apply(lambda x: self.label_dict[x])
 
+            # If there are empty frames, evaluate empty frame accuracy separately
+            empty_accuracy = self.calculate_empty_frame_accuracy(ground_df, self.predictions_df)
+            
+            try:
+                self.log("empty_frame_accuracy", empty_accuracy)
+            except:
+                pass
+        else:      
+            empty_accuracy = None
+
             if self.predictions_df.empty:
-                warnings.warn("No predictions made, skipping evaluation")
+                warnings.warn("No predictions made, skipping detection evaluation")
                 geom_type = utilities.determine_geometry_type(ground_df)
                 if geom_type == "box":
                     result = {
@@ -751,6 +810,9 @@ def on_validation_epoch_end(self):
                     savedir=None,
                     numeric_to_label_dict=self.numeric_to_label_dict)
 
+                if empty_accuracy is not None:
+                    results["empty_frame_accuracy"] = empty_accuracy
+
                 # Log each key value pair of the results dict
                 if not results["class_recall"] is None:
                     for key, value in results.items():
diff --git a/tests/test_main.py b/tests/test_main.py
index 8f19bd82..125b9a68 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -700,8 +700,6 @@ def test_predict_tile_with_crop_model_empty():
     # Assert the result
     assert result is None
 
-<<<<<<< HEAD
-
 # @pytest.mark.parametrize("batch_size", [1, 4, 8])
 # def test_batch_prediction(m, batch_size, raster_path):
 #    
@@ -803,7 +801,7 @@ def test_predict_tile_with_crop_model_empty():
 #             "xmin", "ymin", "xmax", "ymax", "label", "score", "geometry"
 #         }
 #         assert not batch_pred.empty
-=======
+
 def test_epoch_evaluation_end(m):
     preds = [{
         'boxes': torch.tensor([
@@ -843,4 +841,38 @@ def test_epoch_evaluation_end_empty(m):
     boxes["image_path"] = "test"
     m.predictions = [boxes]
     m.on_validation_epoch_end()
->>>>>>> Work in progress to refactor evaluation epoch end with attention paid to empty frames
+
+def test_empty_frame_accuracy_with_predictions(m, tmpdir):
+    """Create a ground truth with empty frames, the accuracy should be 1 with a random model"""
+    # Create ground truth with empty frames
+    ground_df = pd.read_csv(get_data("testfile_deepforest.csv"))
+    # Set all xmin, ymin, xmax, ymax to 0
+    ground_df.loc[:, ["xmin", "ymin", "xmax", "ymax"]] = 0
+    ground_df.drop_duplicates(subset=["image_path"], keep="first", inplace=True)
+    
+    # Save the ground truth to a temporary file
+    ground_df.to_csv(tmpdir.strpath + "/ground_truth.csv", index=False)
+    m.config["validation"]["csv_file"] = tmpdir.strpath + "/ground_truth.csv"
+    m.config["validation"]["root_dir"] = os.path.dirname(get_data("testfile_deepforest.csv"))
+
+    m.create_trainer()
+    results = m.trainer.validate(m)
+    assert results[0]["empty_frame_accuracy"] == 0
+
+def test_empty_frame_accuracy_without_predictions(tmpdir):
+    """Create a ground truth with empty frames, the accuracy should be 1 with a random model"""
+    m = main.deepforest()
+    # Create ground truth with empty frames
+    ground_df = pd.read_csv(get_data("testfile_deepforest.csv"))
+    # Set all xmin, ymin, xmax, ymax to 0
+    ground_df.loc[:, ["xmin", "ymin", "xmax", "ymax"]] = 0
+    ground_df.drop_duplicates(subset=["image_path"], keep="first", inplace=True)
+    
+    # Save the ground truth to a temporary file
+    ground_df.to_csv(tmpdir.strpath + "/ground_truth.csv", index=False)
+    m.config["validation"]["csv_file"] = tmpdir.strpath + "/ground_truth.csv"
+    m.config["validation"]["root_dir"] = os.path.dirname(get_data("testfile_deepforest.csv"))
+
+    m.create_trainer()
+    results = m.trainer.validate(m)
+    assert results[0]["empty_frame_accuracy"] == 1
\ No newline at end of file

From f7754b854bd042b5cf0c4d6d6f8aef27e998248d Mon Sep 17 00:00:00 2001
From: bw4sz <benweinstein2010@gmail.com>
Date: Fri, 20 Dec 2024 20:03:38 -0800
Subject: [PATCH 07/23] local tests pass, but need a test for when there are
 both empty and non-empty mixed

---
 src/deepforest/main.py | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/src/deepforest/main.py b/src/deepforest/main.py
index c05b66e1..a04a9fe2 100644
--- a/src/deepforest/main.py
+++ b/src/deepforest/main.py
@@ -694,10 +694,23 @@ def validation_step(self, batch, batch_idx):
         for index, result in enumerate(preds):
             # Skip empty predictions
             if result["boxes"].shape[0] == 0:
-                continue
-            boxes = visualize.format_geometry(result)
-            boxes["image_path"] = path[index]
-            self.predictions.append(boxes)
+                self.predictions.append(
+                    pd.DataFrame(
+                        {
+                            "image_path": [path[index]],
+                            "xmin": [None],
+                            "ymin": [None],
+                            "xmax": [None],
+                            "ymax": [None],
+                            "label": [None],
+                            "score": [None]
+                        }
+                    )
+                )
+            else:
+                boxes = visualize.format_geometry(result)
+                boxes["image_path"] = path[index]
+                self.predictions.append(boxes)
 
         return losses
 
@@ -731,13 +744,14 @@ def calculate_empty_frame_accuracy(self, ground_df, predictions_df):
         if len(empty_images) == 0:
             return None
 
-        # Get predictions for empty images
-        empty_predictions = predictions_df.loc[predictions_df.image_path.isin(empty_images)]
+        # Get non-empty predictions for empty images
+        non_empty_predictions = predictions_df.loc[predictions_df.xmin.notnull()]
+        predictions_for_empty_images = non_empty_predictions.loc[non_empty_predictions.image_path.isin(empty_images)]
 
         # Create prediction tensor - 1 if model predicted objects, 0 if predicted empty
         predictions = torch.zeros(len(empty_images))
         for index, image in enumerate(empty_images):
-            if len(empty_predictions.loc[empty_predictions.image_path == image]) > 0:
+            if len(predictions_for_empty_images.loc[predictions_for_empty_images.image_path == image]) > 0:
                 predictions[index] = 1
 
         # Ground truth tensor - all zeros since these are empty frames

From 31f1669e25bada2c569d2af4d16a6412e5c75e59 Mon Sep 17 00:00:00 2001
From: bw4sz <benweinstein2010@gmail.com>
Date: Mon, 23 Dec 2024 16:59:48 -0800
Subject: [PATCH 08/23] add multi-class tests

---
 tests/test_main.py | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/tests/test_main.py b/tests/test_main.py
index 125b9a68..97f28f28 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -875,4 +875,26 @@ def test_empty_frame_accuracy_without_predictions(tmpdir):
 
     m.create_trainer()
     results = m.trainer.validate(m)
-    assert results[0]["empty_frame_accuracy"] == 1
\ No newline at end of file
+    assert results[0]["empty_frame_accuracy"] == 1
+
+def test_mulit_class_with_empty_frame_accuracy_without_predictions(two_class_m, tmpdir):
+    """Create a ground truth with empty frames, the accuracy should be 1 with a random model"""
+    # Create ground truth with empty frames
+    ground_df = pd.read_csv(get_data("testfile_deepforest.csv"))
+    # Set all xmin, ymin, xmax, ymax to 0
+    ground_df.loc[:, ["xmin", "ymin", "xmax", "ymax"]] = 0
+    ground_df.drop_duplicates(subset=["image_path"], keep="first", inplace=True)
+    ground_df.loc[:, "label"] = "Alive"
+
+    # Merge with a multi class ground truth
+    multi_class_df = pd.read_csv(get_data("testfile_multi.csv"))
+    ground_df = pd.concat([ground_df, multi_class_df])
+
+    # Save the ground truth to a temporary file
+    ground_df.to_csv(tmpdir.strpath + "/ground_truth.csv", index=False)
+    two_class_m.config["validation"]["csv_file"] = tmpdir.strpath + "/ground_truth.csv"
+    two_class_m.config["validation"]["root_dir"] = os.path.dirname(get_data("testfile_deepforest.csv"))
+
+    two_class_m.create_trainer()
+    results = two_class_m.trainer.validate(two_class_m)
+    assert results[0]["empty_frame_accuracy"] == 1

From 432b294718a46d9be1a7fa1353ef71d391ca313f Mon Sep 17 00:00:00 2001
From: bw4sz <benweinstein2010@gmail.com>
Date: Mon, 23 Dec 2024 17:03:33 -0800
Subject: [PATCH 09/23] add doc text

---
 docs/user_guide/12_evaluation.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/user_guide/12_evaluation.md b/docs/user_guide/12_evaluation.md
index b060bbee..e24b4e3c 100644
--- a/docs/user_guide/12_evaluation.md
+++ b/docs/user_guide/12_evaluation.md
@@ -28,6 +28,10 @@ There is an additional difference between ecological object detection methods li
 
 DeepForest uses the [hungarian matching algorithm](https://thinkautonomous.medium.com/computer-vision-for-tracking-8220759eee85) to assign predictions to ground truth based on maximum IoU overlap. This is slow compared to the methods above, and so isn't a good choice for running hundreds of times during model training see config["validation"]["val_accuracy_interval"] for setting the frequency of the evaluate callback for this metric.
 
+### Empty Frame Accuracy
+
+DeepForest allows the user to pass empty frames to evaluation by setting xmin, ymin, xmax, ymax to 0. This is useful for evaluating models on data that has empty frames. The empty frame accuracy is the proportion of empty frames that are contain no predictions. The 'label' column in this case is ignored, but must be one of the labels in the model to be included in the evaluation.
+
 # Calculating Evaluation Metrics 
 
 ## Torchmetrics and loss scores

From cd3f59536e9eaaf69213080b2efc41dcf89b4810 Mon Sep 17 00:00:00 2001
From: bw4sz <benweinstein2010@gmail.com>
Date: Mon, 23 Dec 2024 20:36:33 -0800
Subject: [PATCH 10/23] realign empty on the end of epoch

---
 src/deepforest/main.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/deepforest/main.py b/src/deepforest/main.py
index a04a9fe2..37b29e96 100644
--- a/src/deepforest/main.py
+++ b/src/deepforest/main.py
@@ -803,9 +803,9 @@ def on_validation_epoch_end(self):
                 self.log("empty_frame_accuracy", empty_accuracy)
             except:
                 pass
-        else:      
-            empty_accuracy = None
-
+            
+            # Remove empty predictions from the rest of the evaluation
+            self.predictions_df = self.predictions_df.loc[self.predictions_df.xmin.notnull()]
             if self.predictions_df.empty:
                 warnings.warn("No predictions made, skipping detection evaluation")
                 geom_type = utilities.determine_geometry_type(ground_df)

From 29b301e04ca560868f7a2852761a64dc8e944f47 Mon Sep 17 00:00:00 2001
From: bw4sz <benweinstein2010@gmail.com>
Date: Tue, 24 Dec 2024 16:13:25 -0800
Subject: [PATCH 11/23] empty predictions have NA as coordinates

---
 src/deepforest/main.py | 7 +++++++
 tests/test_main.py     | 3 ++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/deepforest/main.py b/src/deepforest/main.py
index 37b29e96..492d474c 100644
--- a/src/deepforest/main.py
+++ b/src/deepforest/main.py
@@ -816,6 +816,13 @@ def on_validation_epoch_end(self):
                         "class_recall": pd.DataFrame()
                     }
             else:
+                # Remove empty ground truth
+                ground_df = ground_df.loc[~ground_df.xmin==0]
+                if ground_df.empty:
+                    results = {}
+                    results["empty_frame_accuracy"] = empty_accuracy
+                    return results
+                
                 results = evaluate_iou.__evaluate_wrapper__(
                     predictions=self.predictions_df,
                     ground_df=ground_df,
diff --git a/tests/test_main.py b/tests/test_main.py
index 97f28f28..405ed8d2 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -184,7 +184,8 @@ def test_validation_step_empty():
     batch = next(iter(val_dataloader))
     m.predictions = []
     val_loss = m.validation_step(batch, 0)
-    assert len(m.predictions) == 0
+    assert len(m.predictions) == 1
+    assert m.predictions[0].xmin.isna().all()
     assert m.iou_metric.compute()["iou"] == 0
 
 def test_validate(m):

From 80e9704ed24f21179ee3b964f251be3a9d307a68 Mon Sep 17 00:00:00 2001
From: bw4sz <benweinstein2010@gmail.com>
Date: Sat, 4 Jan 2025 08:14:13 -0800
Subject: [PATCH 12/23] missing parenthesis

---
 src/deepforest/main.py |   9 ++--
 tests/test_main.py     | 111 +++--------------------------------------
 2 files changed, 13 insertions(+), 107 deletions(-)

diff --git a/src/deepforest/main.py b/src/deepforest/main.py
index 492d474c..0fe2ff38 100644
--- a/src/deepforest/main.py
+++ b/src/deepforest/main.py
@@ -799,10 +799,11 @@ def on_validation_epoch_end(self):
             # If there are empty frames, evaluate empty frame accuracy separately
             empty_accuracy = self.calculate_empty_frame_accuracy(ground_df, self.predictions_df)
             
-            try:
-                self.log("empty_frame_accuracy", empty_accuracy)
-            except:
-                pass
+            if empty_accuracy is not None:
+                try:
+                    self.log("empty_frame_accuracy", empty_accuracy)
+                except:
+                        pass
             
             # Remove empty predictions from the rest of the evaluation
             self.predictions_df = self.predictions_df.loc[self.predictions_df.xmin.notnull()]
diff --git a/tests/test_main.py b/tests/test_main.py
index 405ed8d2..c41c76c2 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -701,107 +701,12 @@ def test_predict_tile_with_crop_model_empty():
     # Assert the result
     assert result is None
 
-# @pytest.mark.parametrize("batch_size", [1, 4, 8])
-# def test_batch_prediction(m, batch_size, raster_path):
-#    
-#     # Prepare input data
-#     tile = np.array(Image.open(raster_path))
-#     ds = dataset.TileDataset(tile=tile, patch_overlap=0.1, patch_size=100)
-#     dl = DataLoader(ds, batch_size=batch_size)
-    
-#     # Perform prediction
-#     predictions = []
-#     for batch in dl:
-#         prediction = m.predict_batch(batch)
-#         predictions.append(prediction)
-    
-#     # Check results
-#     assert len(predictions) == len(dl)
-#     for batch_pred in predictions:
-#         assert isinstance(batch_pred, pd.DataFrame)
-#         assert set(batch_pred.columns) == {
-#             "xmin", "ymin", "xmax", "ymax", "label", "score", "geometry"
-#         }
-
-# @pytest.mark.parametrize("batch_size", [1, 4])
-# def test_batch_training(m, batch_size, tmpdir):
-#     
-#     # Generate synthetic training data
-#     csv_file = get_data("example.csv")
-#     root_dir = os.path.dirname(csv_file)
-#     train_ds = m.load_dataset(csv_file, root_dir=root_dir)
-#     train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
-    
-#     # Configure the model and trainer
-#     m.config["batch_size"] = batch_size
-#     m.create_trainer()
-#     trainer = m.trainer
-    
-#     # Train the model
-#     trainer.fit(m, train_dl)
-    
-#     # Assertions
-#     assert trainer.current_epoch == 1
-#     assert trainer.batch_size == batch_size
-
-# @pytest.mark.parametrize("batch_size", [2, 4])
-# def test_batch_data_augmentation(m, batch_size, raster_path):
-#     
-#     tile = np.array(Image.open(raster_path))
-#     ds = dataset.TileDataset(tile=tile, patch_overlap=0.1, patch_size=100, augment=True)
-#     dl = DataLoader(ds, batch_size=batch_size)
-    
-#     predictions = []
-#     for batch in dl:
-#         prediction = m.predict_batch(batch)
-#         predictions.append(prediction)
-    
-#     assert len(predictions) == len(dl)
-#     for batch_pred in predictions:
-#         assert isinstance(batch_pred, pd.DataFrame)
-#         assert set(batch_pred.columns) == {
-#             "xmin", "ymin", "xmax", "ymax", "label", "score", "geometry"
-#         }
-
-# def test_batch_inference_consistency(m, raster_path):
-#     
-#     tile = np.array(Image.open(raster_path))
-#     ds = dataset.TileDataset(tile=tile, patch_overlap=0.1, patch_size=100)
-#     dl = DataLoader(ds, batch_size=4)
-    
-#     batch_predictions = []
-#     for batch in dl:
-#         prediction = m.predict_batch(batch)
-#         batch_predictions.append(prediction)
-    
-#     single_predictions = []
-#     for image in ds:
-#         prediction = m.predict_image(image=image)
-#         single_predictions.append(prediction)
-    
-#     batch_df = pd.concat(batch_predictions, ignore_index=True)
-#     single_df = pd.concat(single_predictions, ignore_index=True)
-    
-#     pd.testing.assert_frame_equal(batch_df, single_df)
-
-# def test_large_batch_handling(m, raster_path):
-#    
-#     tile = np.array(Image.open(raster_path))
-#     ds = dataset.TileDataset(tile=tile, patch_overlap=0.1, patch_size=100)
-#     dl = DataLoader(ds, batch_size=16)
-    
-#     predictions = []
-#     for batch in dl:
-#         prediction = m.predict_batch(batch)
-#         predictions.append(prediction)
-    
-#     assert len(predictions) > 0
-#     for batch_pred in predictions:
-#         assert isinstance(batch_pred, pd.DataFrame)
-#         assert set(batch_pred.columns) == {
-#             "xmin", "ymin", "xmax", "ymax", "label", "score", "geometry"
-#         }
-#         assert not batch_pred.empty
+def test_evaluate_on_epoch_interval(m):
+    m.config["val_accuracy_interval"] = 1
+    m.config["train"]["epochs"] = 1
+    m.trainer.fit(m)
+    assert m.trainer.logged_metrics["box_precision"]
+    assert m.trainer.logged_metrics["box_recall"]
 
 def test_epoch_evaluation_end(m):
     preds = [{
@@ -850,7 +755,7 @@ def test_empty_frame_accuracy_with_predictions(m, tmpdir):
     # Set all xmin, ymin, xmax, ymax to 0
     ground_df.loc[:, ["xmin", "ymin", "xmax", "ymax"]] = 0
     ground_df.drop_duplicates(subset=["image_path"], keep="first", inplace=True)
-    
+
     # Save the ground truth to a temporary file
     ground_df.to_csv(tmpdir.strpath + "/ground_truth.csv", index=False)
     m.config["validation"]["csv_file"] = tmpdir.strpath + "/ground_truth.csv"
@@ -868,7 +773,7 @@ def test_empty_frame_accuracy_without_predictions(tmpdir):
     # Set all xmin, ymin, xmax, ymax to 0
     ground_df.loc[:, ["xmin", "ymin", "xmax", "ymax"]] = 0
     ground_df.drop_duplicates(subset=["image_path"], keep="first", inplace=True)
-    
+
     # Save the ground truth to a temporary file
     ground_df.to_csv(tmpdir.strpath + "/ground_truth.csv", index=False)
     m.config["validation"]["csv_file"] = tmpdir.strpath + "/ground_truth.csv"

From e0c7e75988e1c7e01960ff452659a0cd246d7df9 Mon Sep 17 00:00:00 2001
From: bw4sz <benweinstein2010@gmail.com>
Date: Sat, 4 Jan 2025 13:10:59 -0800
Subject: [PATCH 13/23] making sure its synced, passes locally

---
 src/deepforest/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/deepforest/main.py b/src/deepforest/main.py
index 0fe2ff38..72d08c55 100644
--- a/src/deepforest/main.py
+++ b/src/deepforest/main.py
@@ -818,7 +818,7 @@ def on_validation_epoch_end(self):
                     }
             else:
                 # Remove empty ground truth
-                ground_df = ground_df.loc[~ground_df.xmin==0]
+                ground_df = ground_df.loc[~(ground_df.xmin==0)]
                 if ground_df.empty:
                     results = {}
                     results["empty_frame_accuracy"] = empty_accuracy

From 5ace3553a19999387acfe2fdf443e0da40585586 Mon Sep 17 00:00:00 2001
From: bw4sz <benweinstein2010@gmail.com>
Date: Sat, 4 Jan 2025 19:22:35 -0800
Subject: [PATCH 14/23] style update

---
 src/deepforest/main.py | 53 ++++++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 28 deletions(-)

diff --git a/src/deepforest/main.py b/src/deepforest/main.py
index 72d08c55..a1b746ed 100644
--- a/src/deepforest/main.py
+++ b/src/deepforest/main.py
@@ -695,18 +695,15 @@ def validation_step(self, batch, batch_idx):
             # Skip empty predictions
             if result["boxes"].shape[0] == 0:
                 self.predictions.append(
-                    pd.DataFrame(
-                        {
-                            "image_path": [path[index]],
-                            "xmin": [None],
-                            "ymin": [None],
-                            "xmax": [None],
-                            "ymax": [None],
-                            "label": [None],
-                            "score": [None]
-                        }
-                    )
-                )
+                    pd.DataFrame({
+                        "image_path": [path[index]],
+                        "xmin": [None],
+                        "ymin": [None],
+                        "xmax": [None],
+                        "ymax": [None],
+                        "label": [None],
+                        "score": [None]
+                    }))
             else:
                 boxes = visualize.format_geometry(result)
                 boxes["image_path"] = path[index]
@@ -733,25 +730,23 @@ def calculate_empty_frame_accuracy(self, ground_df, predictions_df):
             None: If there are no empty frames, return None
         """
         # Find images that are marked as empty in ground truth (all coordinates are 0)
-        empty_images = ground_df.loc[
-            (ground_df.xmin == 0) & 
-            (ground_df.ymin == 0) & 
-            (ground_df.xmax == 0) & 
-            (ground_df.ymax == 0), 
-            "image_path"
-        ].unique()
+        empty_images = ground_df.loc[(ground_df.xmin == 0) & (ground_df.ymin == 0) &
+                                     (ground_df.xmax == 0) & (ground_df.ymax == 0),
+                                     "image_path"].unique()
 
         if len(empty_images) == 0:
             return None
 
         # Get non-empty predictions for empty images
         non_empty_predictions = predictions_df.loc[predictions_df.xmin.notnull()]
-        predictions_for_empty_images = non_empty_predictions.loc[non_empty_predictions.image_path.isin(empty_images)]
+        predictions_for_empty_images = non_empty_predictions.loc[
+            non_empty_predictions.image_path.isin(empty_images)]
 
         # Create prediction tensor - 1 if model predicted objects, 0 if predicted empty
         predictions = torch.zeros(len(empty_images))
         for index, image in enumerate(empty_images):
-            if len(predictions_for_empty_images.loc[predictions_for_empty_images.image_path == image]) > 0:
+            if len(predictions_for_empty_images.loc[
+                    predictions_for_empty_images.image_path == image]) > 0:
                 predictions[index] = 1
 
         # Ground truth tensor - all zeros since these are empty frames
@@ -797,16 +792,18 @@ def on_validation_epoch_end(self):
             ground_df["label"] = ground_df.label.apply(lambda x: self.label_dict[x])
 
             # If there are empty frames, evaluate empty frame accuracy separately
-            empty_accuracy = self.calculate_empty_frame_accuracy(ground_df, self.predictions_df)
-            
+            empty_accuracy = self.calculate_empty_frame_accuracy(
+                ground_df, self.predictions_df)
+
             if empty_accuracy is not None:
                 try:
                     self.log("empty_frame_accuracy", empty_accuracy)
                 except:
-                        pass
-            
+                    pass
+
             # Remove empty predictions from the rest of the evaluation
-            self.predictions_df = self.predictions_df.loc[self.predictions_df.xmin.notnull()]
+            self.predictions_df = self.predictions_df.loc[
+                self.predictions_df.xmin.notnull()]
             if self.predictions_df.empty:
                 warnings.warn("No predictions made, skipping detection evaluation")
                 geom_type = utilities.determine_geometry_type(ground_df)
@@ -818,12 +815,12 @@ def on_validation_epoch_end(self):
                     }
             else:
                 # Remove empty ground truth
-                ground_df = ground_df.loc[~(ground_df.xmin==0)]
+                ground_df = ground_df.loc[~(ground_df.xmin == 0)]
                 if ground_df.empty:
                     results = {}
                     results["empty_frame_accuracy"] = empty_accuracy
                     return results
-                
+
                 results = evaluate_iou.__evaluate_wrapper__(
                     predictions=self.predictions_df,
                     ground_df=ground_df,

From 4e2e3087e436140f375a184dd096ae88f3d5f4ac Mon Sep 17 00:00:00 2001
From: bw4sz <benweinstein2010@gmail.com>
Date: Sun, 5 Jan 2025 07:01:57 -0800
Subject: [PATCH 15/23] docstring got corrupted

---
 src/deepforest/main.py | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/src/deepforest/main.py b/src/deepforest/main.py
index a1b746ed..0ebfd7a8 100644
--- a/src/deepforest/main.py
+++ b/src/deepforest/main.py
@@ -23,7 +23,20 @@
 
 
 class deepforest(pl.LightningModule, PyTorchModelHubMixin):
-    """Class for training and predicting tree crowns in RGB images."""
+    """Class for training and predicting tree crowns in RGB images.
+    
+    Args:
+        num_classes (int): number of classes in the model
+        config_file (str): path to deepforest config file
+        model (model.Model()): a deepforest model object, see model.Model().
+        config_args (dict): a dictionary of key->value to update
+        config file at run time. e.g. {"batch_size":10}
+        This is useful for iterating over arguments during model testing.
+            existing_train_dataloader: a Pytorch dataloader that yields a tuple path, images, targets
+            existing_val_dataloader: a Pytorch dataloader that yields a tuple path, images, targets
+    Returns:
+        self: a deepforest pytorch lightning module
+    """
 
     def __init__(self,
                  num_classes: int = 1,
@@ -34,18 +47,7 @@ def __init__(self,
                  model=None,
                  existing_train_dataloader=None,
                  existing_val_dataloader=None):
-        """Args:
-            num_classes (int): number of classes in the model
-            config_file (str): path to deepforest config file
-            model (model.Model()): a deepforest model object, see model.Model().
-            config_args (dict): a dictionary of key->value to update
-            config file at run time. e.g. {"batch_size":10}
-            This is useful for iterating over arguments during model testing.
-            existing_train_dataloader: a Pytorch dataloader that yields a tuple path, images, targets
-            existing_val_dataloader: a Pytorch dataloader that yields a tuple path, images, targets
-        Returns:
-            self: a deepforest pytorch lightning module
-        """
+
         super().__init__()
 
         # Read config file. Defaults to deepforest_config.yml in working directory.

From 9f070a4c1abed11d3be22bdc924f95b070e84835 Mon Sep 17 00:00:00 2001
From: bw4sz <benweinstein2010@gmail.com>
Date: Sun, 5 Jan 2025 07:56:53 -0800
Subject: [PATCH 16/23] update docs

---
 src/deepforest/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/deepforest/main.py b/src/deepforest/main.py
index 0ebfd7a8..b7cf66e6 100644
--- a/src/deepforest/main.py
+++ b/src/deepforest/main.py
@@ -24,7 +24,7 @@
 
 class deepforest(pl.LightningModule, PyTorchModelHubMixin):
     """Class for training and predicting tree crowns in RGB images.
-    
+
     Args:
         num_classes (int): number of classes in the model
         config_file (str): path to deepforest config file

From 9d6de8ef5a535d1bab27f24d57b3627c5baed46a Mon Sep 17 00:00:00 2001
From: bw4sz <benweinstein2010@gmail.com>
Date: Sun, 5 Jan 2025 08:31:52 -0800
Subject: [PATCH 17/23] update docs

---
 src/deepforest/main.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/deepforest/main.py b/src/deepforest/main.py
index b7cf66e6..a4f00ef1 100644
--- a/src/deepforest/main.py
+++ b/src/deepforest/main.py
@@ -28,12 +28,12 @@ class deepforest(pl.LightningModule, PyTorchModelHubMixin):
     Args:
         num_classes (int): number of classes in the model
         config_file (str): path to deepforest config file
-        model (model.Model()): a deepforest model object, see model.Model().
-        config_args (dict): a dictionary of key->value to update
-        config file at run time. e.g. {"batch_size":10}
-        This is useful for iterating over arguments during model testing.
-            existing_train_dataloader: a Pytorch dataloader that yields a tuple path, images, targets
-            existing_val_dataloader: a Pytorch dataloader that yields a tuple path, images, targets
+        model (model.Model()): a deepforest model object, see model.Model()
+        config_args (dict): a dictionary of key->value to update config file at run time. 
+            e.g. {"batch_size":10}. This is useful for iterating over arguments during model testing.
+        existing_train_dataloader: a Pytorch dataloader that yields a tuple path, images, targets
+        existing_val_dataloader: a Pytorch dataloader that yields a tuple path, images, targets
+
     Returns:
         self: a deepforest pytorch lightning module
     """

From 371af46eeb91ea8a1ae4d4fdcc2984064b5f92d7 Mon Sep 17 00:00:00 2001
From: bw4sz <benweinstein2010@gmail.com>
Date: Sun, 5 Jan 2025 13:42:34 -0800
Subject: [PATCH 18/23] docformatter

---
 src/deepforest/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/deepforest/main.py b/src/deepforest/main.py
index a4f00ef1..5cba0b5a 100644
--- a/src/deepforest/main.py
+++ b/src/deepforest/main.py
@@ -29,7 +29,7 @@ class deepforest(pl.LightningModule, PyTorchModelHubMixin):
         num_classes (int): number of classes in the model
         config_file (str): path to deepforest config file
         model (model.Model()): a deepforest model object, see model.Model()
-        config_args (dict): a dictionary of key->value to update config file at run time. 
+        config_args (dict): a dictionary of key->value to update config file at run time.
             e.g. {"batch_size":10}. This is useful for iterating over arguments during model testing.
         existing_train_dataloader: a Pytorch dataloader that yields a tuple path, images, targets
         existing_val_dataloader: a Pytorch dataloader that yields a tuple path, images, targets

From b3b24993dd0bb4c4a2f0e3615b3af7689c579e97 Mon Sep 17 00:00:00 2001
From: bw4sz <benweinstein2010@gmail.com>
Date: Sun, 5 Jan 2025 15:23:25 -0800
Subject: [PATCH 19/23] builds locally

---
 src/deepforest/main.py | 85 +++++++++++++++++++-----------------------
 1 file changed, 38 insertions(+), 47 deletions(-)

diff --git a/src/deepforest/main.py b/src/deepforest/main.py
index 5cba0b5a..fdd9b3c8 100644
--- a/src/deepforest/main.py
+++ b/src/deepforest/main.py
@@ -36,6 +36,7 @@ class deepforest(pl.LightningModule, PyTorchModelHubMixin):
 
     Returns:
         self: a deepforest pytorch lightning module
+
     """
 
     def __init__(self,
@@ -142,6 +143,7 @@ def load_model(self, model_name="weecology/deepforest-tree", revision='main'):
 
         Returns:
             None
+
         """
         # Load the model using from_pretrained
         self.create_model()
@@ -192,12 +194,9 @@ def create_model(self):
         models/, as is a subclass of model.Model(). The config args in the
         .yaml are specified.
 
-        >>> # retinanet:
-        >>> #   ms_thresh: 0.1
-        >>> #   score_thresh: 0.2
-        >>> # RCNN:
-        >>> #   nms_thresh: 0.1
-        >>> # etc.
+        Returns:
+            None
+
         """
         if self.model is None:
             model_name = importlib.import_module("deepforest.models.{}".format(
@@ -208,7 +207,13 @@ def create_trainer(self, logger=None, callbacks=[], **kwargs):
         """Create a pytorch lightning training by reading config files.
 
         Args:
+            logger: A pytorch lightning logger
             callbacks (list): a list of pytorch-lightning callback classes
+            **kwargs: Additional arguments to pass to the trainer
+
+        Returns:
+            None
+
         """
         # If val data is passed, monitor learning rate and setup classification metrics
         if not self.config["validation"]["csv_file"] is None:
@@ -357,18 +362,20 @@ def predict_image(self,
                       thickness: int = 1):
         """Predict a single image with a deepforest model.
 
-        Deprecation warning: The 'return_plot', and related 'color' and 'thickness' arguments are deprecated and will be removed in 2.0. Use visualize.plot_results on the result instead.
+        Deprecation warning: The 'return_plot', and related 'color' and 'thickness' arguments 
+        are deprecated and will be removed in 2.0. Use visualize.plot_results on the result instead.
 
         Args:
             image: a float32 numpy array of a RGB with channels last format
             path: optional path to read image from disk instead of passing image arg
-            (deprecated) return_plot: return a plot of the image with predictions overlaid
-            (deprecated) color: color of the bounding box as a tuple of BGR color, e.g. orange annotations is (0, 165, 255)
-            (deprectaed) thickness: thickness of the rectangle border line in px
+            return_plot: return a plot of the image with predictions overlaid (deprecated)
+            color: color of the bounding box as a tuple of BGR color (deprecated)
+            thickness: thickness of the rectangle border line in px (deprecated)
 
         Returns:
             result: A pandas dataframe of predictions (Default)
             img: The input with predictions overlaid (Optional)
+
         """
 
         # Ensure we are in eval mode
@@ -490,41 +497,24 @@ def predict_tile(self,
 
         Args:
             raster_path: Path to image on disk
-            image (array): Numpy image array in BGR channel order
-                following openCV convention
-            patch_size: patch size for each window.
-            patch_overlap: patch overlap among windows.
-            iou_threshold: Minimum iou overlap among predictions between
-                windows to be suppressed.
-                Lower values suppress more boxes at edges.
-            in_memory: If true, the entire dataset is loaded into memory, which increases speed. This is useful for small datasets, but not recommended for very large datasets.
+            image (array): Numpy image array in BGR channel order following openCV convention
+            patch_size: patch size for each window
+            patch_overlap: patch overlap among windows
+            iou_threshold: Minimum iou overlap among predictions between windows to be suppressed
+            in_memory: If true, the entire dataset is loaded into memory
             mosaic: Return a single prediction dataframe (True) or a tuple of image crops and predictions (False)
             sigma: variance of Gaussian function used in Gaussian Soft NMS
             thresh: the score thresh used to filter bboxes after soft-nms performed
-            cropModel: a deepforest.model.CropModel object to predict on crops
+            crop_model: a deepforest.model.CropModel object to predict on crops
             crop_transform: a torchvision.transforms object to apply to crops
             crop_augment: a boolean to apply augmentations to crops
-            (deprecated) return_plot: return a plot of the image with predictions overlaid
-            (deprecated) color: color of the bounding box as a tuple of BGR color, e.g. orange annotations is (0, 165, 255)
-            (deprecated) thickness: thickness of the rectangle border line in px
-
-        Deprecated Args:
-            - return_plot: Deprecated in favor of using `visualize.plot_results` for
-              rendering predictions. Will be removed in version 2.0.
-            - color: Deprecated bounding box color for visualizations.
-            - thickness: Deprecated bounding box thickness for visualizations.
-
-        Raises:
-            - ValueError: If `raster_path` is None when `in_memory=False`.
-            - ValueError: If `workers` is greater than 0 when `in_memory=False`. Multiprocessing is not supported when using out-of-memory datasets, rasterio is not threadsafe.
+            return_plot: return a plot of the image with predictions overlaid (deprecated)
+            color: color of the bounding box as a tuple of BGR color (deprecated)
+            thickness: thickness of the rectangle border line in px (deprecated)
 
         Returns:
-            - If `return_plot` is True, returns an image with predictions overlaid (deprecated).
-            - If `mosaic` is True, returns a Pandas DataFrame containing the predicted
-              bounding boxes, scores, and labels.
-            - If `mosaic` is False, returns a list of tuples where each tuple contains
-              a DataFrame of predictions and its corresponding image crop.
-            - Returns None if no predictions are made.
+            pd.DataFrame or tuple: Predictions dataframe or (predictions, crops) tuple
+
         """
         self.model.eval()
         self.model.nms_thresh = self.config["nms_thresh"]
@@ -726,10 +716,10 @@ def calculate_empty_frame_accuracy(self, ground_df, predictions_df):
                 Must have column 'image_path'.
 
         Returns:
-            float: Accuracy score for empty frame detection. A score of 1.0 means the model correctly
+            float or None: Accuracy score for empty frame detection. A score of 1.0 means the model correctly
                 identified all empty frames (no false positives), while 0.0 means it predicted objects
-                in all empty frames (all false positives).
-            None: If there are no empty frames, return None
+                in all empty frames (all false positives). Returns None if there are no empty frames.
+
         """
         # Find images that are marked as empty in ground truth (all coordinates are 0)
         empty_images = ground_df.loc[(ground_df.xmin == 0) & (ground_df.ymin == 0) &
@@ -959,16 +949,17 @@ def configure_optimizers(self):
             return optimizer
 
     def evaluate(self, csv_file, root_dir, iou_threshold=None, savedir=None):
-        """Compute intersection-over-union and precision/recall for a given
-        iou_threshold.
+        """Compute intersection-over-union and precision/recall for a given iou_threshold.
 
         Args:
-            csv_file: location of a csv file with columns "name","xmin","ymin","xmax","ymax","label", each box in a row
-            root_dir: location of files in the dataframe 'name' column.
-            iou_threshold: float [0,1] intersection-over-union union between annotation and prediction to be scored true positive
+            csv_file: location of a csv file with columns "name","xmin","ymin","xmax","ymax","label"
+            root_dir: location of files in the dataframe 'name' column
+            iou_threshold: float [0,1] intersection-over-union threshold for true positive
             savedir: location to save images with bounding boxes
+
         Returns:
-            results: dict of ("results", "precision", "recall") for a given threshold
+            dict: Results dictionary containing precision, recall and other metrics
+
         """
         ground_df = utilities.read_file(csv_file)
         ground_df["label"] = ground_df.label.apply(lambda x: self.label_dict[x])

From 2abcbdf31a2117e212a820ce77f45d6f780bb365 Mon Sep 17 00:00:00 2001
From: bw4sz <benweinstein2010@gmail.com>
Date: Sun, 5 Jan 2025 16:30:47 -0800
Subject: [PATCH 20/23] back to docformatter strings

---
 src/deepforest/main.py | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/src/deepforest/main.py b/src/deepforest/main.py
index fdd9b3c8..6aa99d23 100644
--- a/src/deepforest/main.py
+++ b/src/deepforest/main.py
@@ -36,7 +36,6 @@ class deepforest(pl.LightningModule, PyTorchModelHubMixin):
 
     Returns:
         self: a deepforest pytorch lightning module
-
     """
 
     def __init__(self,
@@ -143,7 +142,6 @@ def load_model(self, model_name="weecology/deepforest-tree", revision='main'):
 
         Returns:
             None
-
         """
         # Load the model using from_pretrained
         self.create_model()
@@ -196,7 +194,6 @@ def create_model(self):
 
         Returns:
             None
-
         """
         if self.model is None:
             model_name = importlib.import_module("deepforest.models.{}".format(
@@ -213,7 +210,6 @@ def create_trainer(self, logger=None, callbacks=[], **kwargs):
 
         Returns:
             None
-
         """
         # If val data is passed, monitor learning rate and setup classification metrics
         if not self.config["validation"]["csv_file"] is None:
@@ -362,7 +358,7 @@ def predict_image(self,
                       thickness: int = 1):
         """Predict a single image with a deepforest model.
 
-        Deprecation warning: The 'return_plot', and related 'color' and 'thickness' arguments 
+        Deprecation warning: The 'return_plot', and related 'color' and 'thickness' arguments
         are deprecated and will be removed in 2.0. Use visualize.plot_results on the result instead.
 
         Args:
@@ -375,7 +371,6 @@ def predict_image(self,
         Returns:
             result: A pandas dataframe of predictions (Default)
             img: The input with predictions overlaid (Optional)
-
         """
 
         # Ensure we are in eval mode
@@ -514,7 +509,6 @@ def predict_tile(self,
 
         Returns:
             pd.DataFrame or tuple: Predictions dataframe or (predictions, crops) tuple
-
         """
         self.model.eval()
         self.model.nms_thresh = self.config["nms_thresh"]
@@ -719,7 +713,6 @@ def calculate_empty_frame_accuracy(self, ground_df, predictions_df):
             float or None: Accuracy score for empty frame detection. A score of 1.0 means the model correctly
                 identified all empty frames (no false positives), while 0.0 means it predicted objects
                 in all empty frames (all false positives). Returns None if there are no empty frames.
-
         """
         # Find images that are marked as empty in ground truth (all coordinates are 0)
         empty_images = ground_df.loc[(ground_df.xmin == 0) & (ground_df.ymin == 0) &
@@ -949,7 +942,8 @@ def configure_optimizers(self):
             return optimizer
 
     def evaluate(self, csv_file, root_dir, iou_threshold=None, savedir=None):
-        """Compute intersection-over-union and precision/recall for a given iou_threshold.
+        """Compute intersection-over-union and precision/recall for a given
+        iou_threshold.
 
         Args:
             csv_file: location of a csv file with columns "name","xmin","ymin","xmax","ymax","label"
@@ -959,7 +953,6 @@ def evaluate(self, csv_file, root_dir, iou_threshold=None, savedir=None):
 
         Returns:
             dict: Results dictionary containing precision, recall and other metrics
-
         """
         ground_df = utilities.read_file(csv_file)
         ground_df["label"] = ground_df.label.apply(lambda x: self.label_dict[x])

From 68a69d78edb1cd2122bb1a75281c8d3abcd249bd Mon Sep 17 00:00:00 2001
From: bw4sz <benweinstein2010@gmail.com>
Date: Tue, 7 Jan 2025 11:43:04 -0800
Subject: [PATCH 21/23] provide specific exception

---
 src/deepforest/main.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/deepforest/main.py b/src/deepforest/main.py
index 6aa99d23..56c41412 100644
--- a/src/deepforest/main.py
+++ b/src/deepforest/main.py
@@ -20,6 +20,7 @@
 from deepforest import evaluate as evaluate_iou
 
 from huggingface_hub import PyTorchModelHubMixin
+from lightning_fabric.utilities.exceptions import MisconfigurationException
 
 
 class deepforest(pl.LightningModule, PyTorchModelHubMixin):
@@ -674,7 +675,7 @@ def validation_step(self, batch, batch_idx):
         for key, value in loss_dict.items():
             try:
                 self.log("val_{}".format(key), value, on_epoch=True)
-            except:
+            except MisconfigurationException:
                 pass
 
         for index, result in enumerate(preds):
@@ -761,7 +762,7 @@ def on_validation_epoch_end(self):
         output = {key: value for key, value in output.items() if not key == "classes"}
         try:
             self.log_dict(output)
-        except:
+        except MisconfigurationException:
             pass
         self.mAP_metric.reset()
 
@@ -831,12 +832,12 @@ def on_validation_epoch_end(self):
                                         "{}_Precision".format(
                                             self.numeric_to_label_dict[row["label"]]),
                                         row["precision"])
-                                except:
+                                except MisconfigurationException:
                                     pass
                         else:
                             try:
                                 self.log(key, value)
-                            except:
+                            except MisconfigurationException:
                                 pass
 
     def predict_step(self, batch, batch_idx):

From 78c97b5ac0446f3a65bd5bcb773237490ab1cb6f Mon Sep 17 00:00:00 2001
From: bw4sz <benweinstein2010@gmail.com>
Date: Tue, 7 Jan 2025 11:52:49 -0800
Subject: [PATCH 22/23] deleted a test by accident during rebase

---
 tests/test_main.py | 115 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 109 insertions(+), 6 deletions(-)

diff --git a/tests/test_main.py b/tests/test_main.py
index c41c76c2..c234aa83 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -701,12 +701,107 @@ def test_predict_tile_with_crop_model_empty():
     # Assert the result
     assert result is None
 
-def test_evaluate_on_epoch_interval(m):
-    m.config["val_accuracy_interval"] = 1
-    m.config["train"]["epochs"] = 1
-    m.trainer.fit(m)
-    assert m.trainer.logged_metrics["box_precision"]
-    assert m.trainer.logged_metrics["box_recall"]
+# @pytest.mark.parametrize("batch_size", [1, 4, 8])
+# def test_batch_prediction(m, batch_size, raster_path):
+#    
+#     # Prepare input data
+#     tile = np.array(Image.open(raster_path))
+#     ds = dataset.TileDataset(tile=tile, patch_overlap=0.1, patch_size=100)
+#     dl = DataLoader(ds, batch_size=batch_size)
+    
+#     # Perform prediction
+#     predictions = []
+#     for batch in dl:
+#         prediction = m.predict_batch(batch)
+#         predictions.append(prediction)
+    
+#     # Check results
+#     assert len(predictions) == len(dl)
+#     for batch_pred in predictions:
+#         assert isinstance(batch_pred, pd.DataFrame)
+#         assert set(batch_pred.columns) == {
+#             "xmin", "ymin", "xmax", "ymax", "label", "score", "geometry"
+#         }
+
+# @pytest.mark.parametrize("batch_size", [1, 4])
+# def test_batch_training(m, batch_size, tmpdir):
+#     
+#     # Generate synthetic training data
+#     csv_file = get_data("example.csv")
+#     root_dir = os.path.dirname(csv_file)
+#     train_ds = m.load_dataset(csv_file, root_dir=root_dir)
+#     train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
+    
+#     # Configure the model and trainer
+#     m.config["batch_size"] = batch_size
+#     m.create_trainer()
+#     trainer = m.trainer
+    
+#     # Train the model
+#     trainer.fit(m, train_dl)
+    
+#     # Assertions
+#     assert trainer.current_epoch == 1
+#     assert trainer.batch_size == batch_size
+
+# @pytest.mark.parametrize("batch_size", [2, 4])
+# def test_batch_data_augmentation(m, batch_size, raster_path):
+#     
+#     tile = np.array(Image.open(raster_path))
+#     ds = dataset.TileDataset(tile=tile, patch_overlap=0.1, patch_size=100, augment=True)
+#     dl = DataLoader(ds, batch_size=batch_size)
+    
+#     predictions = []
+#     for batch in dl:
+#         prediction = m.predict_batch(batch)
+#         predictions.append(prediction)
+    
+#     assert len(predictions) == len(dl)
+#     for batch_pred in predictions:
+#         assert isinstance(batch_pred, pd.DataFrame)
+#         assert set(batch_pred.columns) == {
+#             "xmin", "ymin", "xmax", "ymax", "label", "score", "geometry"
+#         }
+
+# def test_batch_inference_consistency(m, raster_path):
+#     
+#     tile = np.array(Image.open(raster_path))
+#     ds = dataset.TileDataset(tile=tile, patch_overlap=0.1, patch_size=100)
+#     dl = DataLoader(ds, batch_size=4)
+    
+#     batch_predictions = []
+#     for batch in dl:
+#         prediction = m.predict_batch(batch)
+#         batch_predictions.append(prediction)
+    
+#     single_predictions = []
+#     for image in ds:
+#         prediction = m.predict_image(image=image)
+#         single_predictions.append(prediction)
+    
+#     batch_df = pd.concat(batch_predictions, ignore_index=True)
+#     single_df = pd.concat(single_predictions, ignore_index=True)
+    
+#     pd.testing.assert_frame_equal(batch_df, single_df)
+
+# def test_large_batch_handling(m, raster_path):
+#    
+#     tile = np.array(Image.open(raster_path))
+#     ds = dataset.TileDataset(tile=tile, patch_overlap=0.1, patch_size=100)
+#     dl = DataLoader(ds, batch_size=16)
+    
+#     predictions = []
+#     for batch in dl:
+#         prediction = m.predict_batch(batch)
+#         predictions.append(prediction)
+    
+#     assert len(predictions) > 0
+#     for batch_pred in predictions:
+#         assert isinstance(batch_pred, pd.DataFrame)
+#         assert set(batch_pred.columns) == {
+#             "xmin", "ymin", "xmax", "ymax", "label", "score", "geometry"
+#         }
+#         assert not batch_pred.empty
 
 def test_epoch_evaluation_end(m):
     preds = [{
@@ -804,3 +899,11 @@ def test_mulit_class_with_empty_frame_accuracy_without_predictions(two_class_m,
     two_class_m.create_trainer()
     results = two_class_m.trainer.validate(two_class_m)
     assert results[0]["empty_frame_accuracy"] == 1
+
+def test_evaluate_on_epoch_interval(m):
+    m.config["validation"]["val_accuracy_interval"] = 1
+    m.config["train"]["epochs"] = 1
+    m.create_trainer()
+    m.trainer.fit(m)
+    assert m.trainer.logged_metrics["box_precision"]
+    assert m.trainer.logged_metrics["box_recall"]
\ No newline at end of file

From 5a8391579efed707b40e0102c7a7e26706d072f9 Mon Sep 17 00:00:00 2001
From: bw4sz <benweinstein2010@gmail.com>
Date: Tue, 7 Jan 2025 13:19:15 -0800
Subject: [PATCH 23/23] don't log dataframes when running end of epoch
 validation

---
 src/deepforest/main.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/deepforest/main.py b/src/deepforest/main.py
index 56c41412..8e55ed92 100644
--- a/src/deepforest/main.py
+++ b/src/deepforest/main.py
@@ -834,6 +834,9 @@ def on_validation_epoch_end(self):
                                         row["precision"])
                                 except MisconfigurationException:
                                     pass
+                        elif key in ["predictions", "results"]:
+                            # Don't log dataframes of predictions or IoU results per epoch
+                            pass
                         else:
                             try:
                                 self.log(key, value)