invoke-ai · psychedelicious · Feb 26, 2024 · Feb 24, 2024 · Feb 24, 2024 · Feb 26, 2024
@@ -245,7 +245,13 @@ def offload_unlocked_models(self, size_required: int) -> None:
             mps.empty_cache()
 
     def move_model_to_device(self, cache_entry: CacheRecord[AnyModel], target_device: torch.device) -> None:
-        """Move model into the indicated device."""
+        """Move model into the indicated device.
+
+        :param cache_entry: The CacheRecord for the model
+        :param target_device: The torch.device to move the model into
+
+        May raise a torch.cuda.OutOfMemoryError
+        """
         # These attributes are not in the base ModelMixin class but in various derived classes.
         # Some models don't have these attributes, in which case they run in RAM/CPU.
         self.logger.debug(f"Called to move {cache_entry.key} to {target_device}")
@@ -259,6 +265,9 @@ def move_model_to_device(self, cache_entry: CacheRecord[AnyModel], target_device
         if torch.device(source_device).type == torch.device(target_device).type:
             return
 
+        # may raise an exception here if insufficient GPU VRAM
+        self._check_free_vram(target_device, cache_entry.size)
+
         start_model_to_time = time.time()
         snapshot_before = self._capture_memory_snapshot()
         cache_entry.model.to(target_device)
@@ -405,3 +414,13 @@ def make_room(self, model_size: int) -> None:
             mps.empty_cache()
 
         self.logger.debug(f"After making room: cached_models={len(self._cached_models)}")
+
+    def _check_free_vram(self, target_device: torch.device, needed_size: int) -> None:
+        if target_device.type != "cuda":
+            return
+        vram_device = ( # mem_get_info() needs an indexed device
+            target_device if target_device.index is not None else torch.device(str(target_device), index=0)
+        )
+        free_mem, _ = torch.cuda.mem_get_info(torch.device(vram_device))
+        if needed_size > free_mem:
+            raise torch.cuda.OutOfMemoryError
@@ -2,12 +2,12 @@
 Base class and implementation of a class that moves models in and out of VRAM.
 """
 
+import torch
 from invokeai.backend.model_manager import AnyModel
-
 from .model_cache_base import CacheRecord, ModelCacheBase, ModelLockerBase
 
 
 class ModelLocker(ModelLockerBase):
    """Internal class that mediates movement in and out of GPU."""

    def __init__(self, cache: ModelCacheBase[AnyModel], cache_entry: CacheRecord[AnyModel]):
@@ -42,7 +42,10 @@
 
             self._cache.logger.debug(f"Locking {self._cache_entry.key} in {self._cache.execution_device}")
             self._cache.print_cuda_stats()
-
+        except torch.cuda.OutOfMemoryError:
+            self._cache.logger.warning("Insufficient GPU memory to load model. Aborting")
+            self._cache_entry.unlock()
+            raise
         except Exception:
             self._cache_entry.unlock()
             raise