Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gracefully recover from VRAM out of memory errors (next branch version) #5794

Merged
merged 4 commits into from
Feb 26, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,13 @@ def offload_unlocked_models(self, size_required: int) -> None:
mps.empty_cache()

def move_model_to_device(self, cache_entry: CacheRecord[AnyModel], target_device: torch.device) -> None:
"""Move model into the indicated device."""
"""Move model into the indicated device.

:param cache_entry: The CacheRecord for the model
:param target_device: The torch.device to move the model into

May raise a torch.cuda.OutOfMemoryError
"""
# These attributes are not in the base ModelMixin class but in various derived classes.
# Some models don't have these attributes, in which case they run in RAM/CPU.
self.logger.debug(f"Called to move {cache_entry.key} to {target_device}")
Expand All @@ -259,6 +265,9 @@ def move_model_to_device(self, cache_entry: CacheRecord[AnyModel], target_device
if torch.device(source_device).type == torch.device(target_device).type:
return

# may raise an exception here if insufficient GPU VRAM
self._check_free_vram(target_device, cache_entry.size)

start_model_to_time = time.time()
snapshot_before = self._capture_memory_snapshot()
cache_entry.model.to(target_device)
Expand Down Expand Up @@ -405,3 +414,13 @@ def make_room(self, model_size: int) -> None:
mps.empty_cache()

self.logger.debug(f"After making room: cached_models={len(self._cached_models)}")

def _check_free_vram(self, target_device: torch.device, needed_size: int) -> None:
if target_device.type != "cuda":
return
vram_device = ( # mem_get_info() needs an indexed device
target_device if target_device.index is not None else torch.device(str(target_device), index=0)
)
free_mem, _ = torch.cuda.mem_get_info(torch.device(vram_device))
if needed_size > free_mem:
raise torch.cuda.OutOfMemoryError
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
Base class and implementation of a class that moves models in and out of VRAM.
"""

import torch
from invokeai.backend.model_manager import AnyModel

from .model_cache_base import CacheRecord, ModelCacheBase, ModelLockerBase


class ModelLocker(ModelLockerBase):

Check failure on line 10 in invokeai/backend/model_manager/load/model_cache/model_locker.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (I001)

invokeai/backend/model_manager/load/model_cache/model_locker.py:5:1: I001 Import block is un-sorted or un-formatted
"""Internal class that mediates movement in and out of GPU."""

def __init__(self, cache: ModelCacheBase[AnyModel], cache_entry: CacheRecord[AnyModel]):
Expand Down Expand Up @@ -42,7 +42,10 @@

self._cache.logger.debug(f"Locking {self._cache_entry.key} in {self._cache.execution_device}")
self._cache.print_cuda_stats()

except torch.cuda.OutOfMemoryError:
self._cache.logger.warning("Insufficient GPU memory to load model. Aborting")
self._cache_entry.unlock()
raise
except Exception:
self._cache_entry.unlock()
raise
Expand Down
Loading