Lightning-AI · tanaymeh · Oct 19, 2023 · Oct 19, 2023 · Oct 19, 2023 · Oct 19, 2023
diff --git a/src/lightning/pytorch/callbacks/batch_size_finder.py b/src/lightning/pytorch/callbacks/batch_size_finder.py
@@ -168,14 +168,15 @@ def setup(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", stage: O
                 " If this is not the intended behavior, please remove either one."
             )
 
-    def scale_batch_size(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+    def scale_batch_size(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", is_training: bool) -> None:
         new_size = _scale_batch_size(
             trainer,
             self._mode,
             self._steps_per_trial,
             self._init_val,
             self._max_trials,
             self._batch_arg_name,
+            is_training,
         )
 
         self.optimal_batch_size = new_size
@@ -189,10 +190,13 @@ def on_validation_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningMod
         if trainer.sanity_checking or trainer.state.fn != "validate":
             return
 
-        self.scale_batch_size(trainer, pl_module)
+        is_training = pl_module.training
+        self.scale_batch_size(trainer, pl_module, is_training)
 
     def on_test_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
-        self.scale_batch_size(trainer, pl_module)
+        is_training = pl_module.training
+        self.scale_batch_size(trainer, pl_module, is_training)
 
     def on_predict_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
-        self.scale_batch_size(trainer, pl_module)
+        is_training = pl_module.training
+        self.scale_batch_size(trainer, pl_module, is_training)
@@ -32,6 +32,7 @@ def _scale_batch_size(
     init_val: int = 2,
     max_trials: int = 25,
     batch_arg_name: str = "batch_size",
+    is_training: bool = True,
 ) -> Optional[int]:
     """Iteratively try to find the largest batch size for a given model that does not give an out of memory (OOM)
     error.
@@ -95,6 +96,12 @@ def _scale_batch_size(
     trainer._checkpoint_connector.restore(ckpt_path)
     trainer.strategy.remove_checkpoint(ckpt_path)
 
+    # Set the model to training or evaluation mode based on the is_training parameter
+    if is_training:
+        trainer.lightning_module.train()
+    else:
+        trainer.lightning_module.eval()
+
     return new_size