Skip to content

Commit

Permalink
Use torch.cuda.memory_allocated() rather than torch.cuda.memory_reser…
Browse files Browse the repository at this point in the history
…ved() to be more conservative in setting dynamic VRAM cache limits.
  • Loading branch information
RyanJDick committed Jan 3, 2025
1 parent 568e708 commit d1da699
Showing 1 changed file with 16 additions and 4 deletions.
20 changes: 16 additions & 4 deletions invokeai/backend/model_manager/load/model_cache/model_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,9 +324,13 @@ def _get_vram_available(self, working_mem_bytes: Optional[int] = None) -> int:
working_mem_bytes = max(working_mem_bytes or working_mem_bytes_default, working_mem_bytes_default)

if self._execution_device.type == "cuda":
vram_reserved = torch.cuda.memory_reserved(self._execution_device)
# TODO(ryand): It is debatable whether we should use memory_reserved() or memory_allocated() here.
# memory_reserved() includes memory reserved by the torch CUDA memory allocator that may or may not be
# re-used for future allocations. For now, we use memory_allocated() to be conservative.
# vram_reserved = torch.cuda.memory_reserved(self._execution_device)
vram_allocated = torch.cuda.memory_allocated(self._execution_device)
vram_free, _vram_total = torch.cuda.mem_get_info(self._execution_device)
vram_available_to_process = vram_free + vram_reserved
vram_available_to_process = vram_free + vram_allocated
elif self._execution_device.type == "mps":
vram_reserved = torch.mps.driver_allocated_memory()
# TODO(ryand): Is it accurate that MPS shares memory with the CPU?
Expand All @@ -345,7 +349,6 @@ def _get_vram_in_use(self) -> int:

def _get_ram_available(self) -> int:
"""Get the amount of RAM available for the cache to use, while keeping memory pressure under control."""

# If self._max_ram_cache_size_gb is set, then it overrides the default logic.
if self._max_ram_cache_size_gb is not None:
ram_total_available_to_cache = int(self._max_ram_cache_size_gb * GB)
Expand All @@ -364,7 +367,16 @@ def _get_ram_available(self) -> int:
ram_used = max(cache_ram_used, ram_used)

# Aim to keep 10% of RAM free.
return int(ram_total * 0.9) - ram_used
ram_available_based_on_memory_usage = int(ram_total * 0.9) - ram_used

# If we are running out of RAM, then there's an increased likelihood that we will run into this issue:
# https://github.com/invoke-ai/InvokeAI/issues/7513
# To keep things running smoothly, there's a minimum RAM cache size that we always allow (even if this means
# using swap).
min_ram_cache_size_bytes = 4 * GB
ram_available_based_on_min_cache_size = min_ram_cache_size_bytes - cache_ram_used

return max(ram_available_based_on_memory_usage, ram_available_based_on_min_cache_size)

def _get_ram_in_use(self) -> int:
"""Get the amount of RAM currently in use."""
Expand Down

0 comments on commit d1da699

Please sign in to comment.