Lightning-AI · tchaton · Feb 16, 2021 · Jan 31, 2021 · Jan 31, 2021 · Feb 1, 2021
@@ -78,6 +78,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Added AUC/AUROC class interface ([#5479](https://github.com/PyTorchLightning/pytorch-lightning/pull/5479))
 
 
+- Added `PredictLoop` object ([#5752](https://github.com/PyTorchLightning/pytorch-lightning/pull/5752))
+
+
 - Added `QuantizationAwareTraining` callback ([#5706](https://github.com/PyTorchLightning/pytorch-lightning/pull/5706))
 
 

@@ -140,9 +140,8 @@ def training_step(self, args):
 
         args[0] = batch
 
-        with self.precision_plugin.train_step_context():
-            with self.training_type_plugin.train_step_context():
-                return self.training_type_plugin.training_step(*args)
+        with self.precision_plugin.train_step_context(), self.training_type_plugin.train_step_context():
+            return self.training_type_plugin.training_step(*args)
 
     def post_training_step(self):
         self.training_type_plugin.post_training_step()
@@ -162,9 +161,8 @@ def validation_step(self, args):
 
         args[0] = batch
 
-        with self.precision_plugin.val_step_context():
-            with self.training_type_plugin.val_step_context():
-                return self.training_type_plugin.validation_step(*args)
+        with self.precision_plugin.val_step_context(), self.training_type_plugin.val_step_context():
+            return self.training_type_plugin.validation_step(*args)
 
     def test_step(self, args):
         """The actual test step.
@@ -181,9 +179,26 @@ def test_step(self, args):
 
         args[0] = batch
 
-        with self.precision_plugin.test_step_context():
-            with self.training_type_plugin.test_step_context():
-                return self.training_type_plugin.test_step(*args)
+        with self.precision_plugin.test_step_context(), self.training_type_plugin.test_step_context():
+            return self.training_type_plugin.test_step(*args)
+
+    def predict(self, args):
+        """The actual predict step.
+
+        Args:
+            args: the arguments for the models predict step. Can consist of the following:
+                batch (:class:`~torch.Tensor` | (:class:`~torch.Tensor`, ...) | [:class:`~torch.Tensor`, ...]):
+                    The output of your :class:`~torch.utils.data.DataLoader`. A tensor, tuple or list.
+                batch_idx (int): The index of this batch.
+                dataloader_idx (int): The index of the dataloader that produced this batch
+                    (only if multiple predict dataloaders used).
+        """
+        batch = self.to_device(args[0])
+
+        args[0] = batch
+
+        with self.precision_plugin.predict_context(), self.training_type_plugin.predict_context():
+            return self.training_type_plugin.predict(*args)
 
     def training_step_end(self, output):
         """A hook to do something at the end of the training step

@@ -67,6 +67,7 @@ def __init__(self):
         self._train_batch_idx = 0
         self._val_batch_idx = 0
         self._test_batch_idx = 0
+        self._predict_batch_idx = 0
 
     @property
     def trainer(self):
@@ -96,6 +97,14 @@ def test_batch_idx(self) -> int:
         """
         return self._test_batch_idx
 
+    @property
+    def predict_batch_idx(self) -> int:
+        """
+        The current batch index being processed during predicting.
+        Use this to update your progress bar.
+        """
+        return self._predict_batch_idx
+
     @property
     def total_train_batches(self) -> int:
         """
@@ -108,7 +117,7 @@ def total_train_batches(self) -> int:
     @property
     def total_val_batches(self) -> int:
         """
-        The total number of training batches during validation, which may change from epoch to epoch.
+        The total number of validation batches during validation, which may change from epoch to epoch.
         Use this to set the total number of iterations in the progress bar. Can return ``inf`` if the
         validation dataloader is of infinite size.
         """
@@ -121,12 +130,21 @@ def total_val_batches(self) -> int:
     @property
     def total_test_batches(self) -> int:
         """
-        The total number of training batches during testing, which may change from epoch to epoch.
+        The total number of testing batches during testing, which may change from epoch to epoch.
         Use this to set the total number of iterations in the progress bar. Can return ``inf`` if the
         test dataloader is of infinite size.
         """
         return sum(self.trainer.num_test_batches)
 
+    @property
+    def total_predict_batches(self) -> int:
+        """
+        The total number of predicting batches during testing, which may change from epoch to epoch.
+        Use this to set the total number of iterations in the progress bar. Can return ``inf`` if the
+        predict dataloader is of infinite size.
+        """
+        return sum(self.trainer.num_predict_batches)
+
     def disable(self):
         """
         You should provide a way to disable the progress bar.
@@ -168,6 +186,12 @@ def on_test_start(self, trainer, pl_module):
     def on_test_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx):
         self._test_batch_idx += 1
 
+    def on_predict_start(self, trainer, pl_module):
+        self._predict_batch_idx = 0
+
+    def on_predict_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx):
+        self._predict_batch_idx += 1
+
 
 class ProgressBar(ProgressBarBase):
     r"""
@@ -282,6 +306,20 @@ def init_train_tqdm(self) -> tqdm:
         )
         return bar
 
+    def init_predict_tqdm(self) -> tqdm:
+        """ Override this to customize the tqdm bar for predicting. """
+        bar = tqdm(
+            desc='Predicting',
+            initial=self.train_batch_idx,
+            position=(2 * self.process_position),
+            disable=self.is_disabled,
+            leave=True,
+            dynamic_ncols=True,
+            file=sys.stdout,
+            smoothing=0,
+        )
+        return bar
+
     def init_validation_tqdm(self) -> tqdm:
         """ Override this to customize the tqdm bar for validation. """
         bar = tqdm(
@@ -294,12 +332,10 @@ def init_validation_tqdm(self) -> tqdm:
         )
         return bar
 
-    def init_test_tqdm(self, trainer=None) -> tqdm:
+    def init_test_tqdm(self) -> tqdm:
         """ Override this to customize the tqdm bar for testing. """
-        desc = "Testing"
-        desc = "Predicting" if trainer is not None and getattr(trainer, "is_predicting", False) else "Testing"
         bar = tqdm(
-            desc=desc,
+            desc="Testing",
             position=(2 * self.process_position),
             disable=self.is_disabled,
             leave=True,
@@ -365,7 +401,7 @@ def on_train_end(self, trainer, pl_module):
 
     def on_test_start(self, trainer, pl_module):
         super().on_test_start(trainer, pl_module)
-        self.test_progress_bar = self.init_test_tqdm(trainer=trainer)
+        self.test_progress_bar = self.init_test_tqdm()
         self.test_progress_bar.total = convert_inf(self.total_test_batches)
 
     def on_test_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx):
@@ -377,6 +413,19 @@ def on_test_end(self, trainer, pl_module):
         super().on_test_end(trainer, pl_module)
         self.test_progress_bar.close()
 
+    def on_predict_start(self, trainer, pl_module):
+        super().on_predict_start(trainer, pl_module)
+        self.predict_progress_bar = self.init_predict_tqdm()
+        self.predict_progress_bar.total = convert_inf(self.total_predict_batches)
+
+    def on_predict_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx):
+        super().on_predict_batch_end(trainer, pl_module, outputs, batch, batch_idx, dataloader_idx)
+        if self._should_update(self.predict_batch_idx, self.total_predict_batches):
+            self._update_bar(self.predict_progress_bar)
+
+    def on_predict_end(self, trainer, pl_module):
+        self.predict_progress_bar.close()
+
     def _should_update(self, current, total):
         return self.is_enabled and (current % self.refresh_rate == 0 or current == total)
 

@@ -260,6 +260,10 @@ def val_dataloader(self, *args, **kwargs) -> Union[DataLoader, List[DataLoader]]
     def test_dataloader(self, *args, **kwargs) -> Union[DataLoader, List[DataLoader]]:
         pass
 
+    @abstractmethod
+    def predict_dataloader(self, *args, **kwargs) -> Union[DataLoader, List[DataLoader]]:
+        pass
+
     @abstractmethod
     def transfer_batch_to_device(self, batch: Any, device: torch.device) -> Any:
         pass

@@ -204,17 +204,23 @@ def on_test_batch_end(self, outputs: Any, batch: Any, batch_idx: int, dataloader
         """
         # do something when the batch ends
 
+    def on_test_model_train(self) -> None:
+        """
+        Sets the model to train during the test loop
+        """
+        self.train()
+
     def on_test_model_eval(self) -> None:
         """
         Sets the model to eval during the test loop
         """
         self.eval()
 
-    def on_test_model_train(self) -> None:
+    def on_predict_model_eval(self) -> None:
         """
-        Sets the model to train during the test loop
+        Sets the model to eval during the predict loop
         """
-        self.train()
+        self.eval()
 
     def on_epoch_start(self) -> None:
         """
@@ -518,6 +524,31 @@ def val_dataloader(self):
             will have an argument ``dataloader_idx`` which matches the order here.
         """
 
+    def predict_dataloader(self) -> Union[DataLoader, List[DataLoader]]:
+        r"""
+        Implement one or multiple PyTorch DataLoaders for prediction.
+
+        It's recommended that all data downloads and preparation happen in :meth:`prepare_data`.
+
+        - :meth:`~pytorch_lightning.trainer.Trainer.fit`
+        - ...
+        - :meth:`prepare_data`
+        - :meth:`train_dataloader`
+        - :meth:`val_dataloader`
+        - :meth:`test_dataloader`
+
+        Note:
+            Lightning adds the correct sampler for distributed and arbitrary hardware
+            There is no need to set it yourself.
+
+        Return:
+            Single or multiple PyTorch DataLoaders.
+
+        Note:
+            In the case where you return multiple prediction dataloaders, the :meth:`predict`
+            will have an argument ``dataloader_idx`` which matches the order here.
+        """
+
     def transfer_batch_to_device(self, batch: Any, device: Optional[torch.device] = None) -> Any:
         """
         Override this hook if your :class:`~torch.utils.data.DataLoader` returns tensors

@@ -54,14 +54,22 @@ def forward(self, *inputs, **kwargs):
             if not self.module.automatic_optimization:
                 self.module.trainer.model.require_backward_grad_sync = False
             warn_if_output_is_none(output, "training_step")
+
         elif running_stage == RunningStage.TESTING:
             output = self.module.test_step(*inputs, **kwargs)
             warn_if_output_is_none(output, "test_step")
+
         elif running_stage == RunningStage.EVALUATING:
             output = self.module.validation_step(*inputs, **kwargs)
             warn_if_output_is_none(output, "validation_step")
-        else:
+
+        elif running_stage == RunningStage.PREDICTING:
             output = self.module.predict(*inputs, **kwargs)
+            warn_if_output_is_none(output, "predict")
+
+        else:
+            output = self.module(*inputs, **kwargs)
+
         return output
 
 

@@ -33,11 +33,11 @@ def connect(
         Will be called by the accelerator.
         """
 
-    def pre_training(self) -> None:
-        """Hook to do something before the training starts."""
+    def pre_dispatch(self) -> None:
+        """Hook to do something before the training/evaluation/prediction starts."""
 
-    def post_training(self) -> None:
-        """Hook to do something after the training finishes."""
+    def post_dispatch(self) -> None:
+        """Hook to do something after the training/evaluation/prediction finishes."""
 
     @contextlib.contextmanager
     def train_step_context(self) -> Generator:
@@ -53,3 +53,8 @@ def val_step_context(self) -> Generator:
     def test_step_context(self) -> Generator:
         """A contextmanager for the teststep"""
         yield
+
+    @contextlib.contextmanager
+    def predict_context(self) -> Generator:
+        """A contextmanager for the predict step"""
+        yield
@@ -215,7 +215,7 @@ def init_ddp_connection(self, global_rank: int, world_size: int) -> None:
             log.info(f"initializing ddp: GLOBAL_RANK: {global_rank}, MEMBER: {global_rank + 1}/{world_size}")
             torch_distrib.init_process_group(torch_backend, rank=global_rank, world_size=world_size)
 
-    def pre_training(self):
+    def pre_dispatch(self):
         # TODO: check if needed
         seed = os.environ.get("PL_GLOBAL_SEED")
         if seed is not None:
@@ -232,7 +232,7 @@ def pre_training(self):
         # where to store ip_table
         self.init_ddp_connection(self.global_rank, self.world_size)
 
-        # TODO: we moved it to the trainer.fit after calling pre_training
+        # TODO: we moved it to the trainer.fit after calling pre_dispatch
         #   ... need to double check that it is the correct place
         # self.trainer.call_setup_hook(self.model)
 
@@ -257,7 +257,7 @@ def pre_training(self):
 
         self.barrier()
 
-    def post_training(self):
+    def post_dispatch(self):
         if "WORLD_SIZE" in os.environ:
             del os.environ["WORLD_SIZE"]
 

@@ -110,6 +110,9 @@ def start_training(self, trainer):
     def start_testing(self, trainer):
         mp.spawn(self.new_process, **self.mp_spawn_kwargs)
 
+    def start_predicting(self, trainer):
+        mp.spawn(self.new_process, **self.mp_spawn_kwargs)
+
     def new_process(self, process_idx, trainer, mp_queue):
         self.mp_queue = mp_queue
 
@@ -128,7 +131,7 @@ def new_process(self, process_idx, trainer, mp_queue):
         # where to store ip_table
         self.init_ddp_connection(self.global_rank, self.world_size)
 
-        # TODO: we moved it to the trainer.fit after calling pre_training
+        # TODO: we moved it to the trainer.fit after calling pre_dispatch
         #   ... need to double check that it is the correct place
         # self.trainer.call_setup_hook(self.model)
 
@@ -153,15 +156,12 @@ def new_process(self, process_idx, trainer, mp_queue):
 
         self.barrier()
 
-        if trainer.testing:
-            results = trainer.run_test()
-        else:
-            results = trainer.train()
+        results = trainer.train_or_test_or_predict()
 
         # persist info in ddp_spawn
         self.transfer_distrib_spawn_state_on_fit_end(results)
 
-    def post_training(self):
+    def post_dispatch(self):
         # restore main state with best weights
         best_path = self.mp_queue.get()
         last_path = self.mp_queue.get()
Original file line number	Diff line number	Diff line change
Expand Up		@@ -78,6 +78,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
		- Added AUC/AUROC class interface ([#5479](https://github.com/PyTorchLightning/pytorch-lightning/pull/5479))


		- Added `PredictLoop` object ([#5752](https://github.com/PyTorchLightning/pytorch-lightning/pull/5752))


		- Added `QuantizationAwareTraining` callback ([#5706](https://github.com/PyTorchLightning/pytorch-lightning/pull/5706))


Expand Down