From cda2d233a23d8f5af2d71525c7542079eeec456d Mon Sep 17 00:00:00 2001 From: Huazhong Ji Date: Mon, 23 Sep 2024 01:44:09 +0800 Subject: [PATCH] [MISC] rename CudaMemoryProfiler to DeviceMemoryProfiler (#8703) --- vllm/utils.py | 2 +- vllm/worker/model_runner.py | 4 ++-- vllm/worker/xpu_model_runner.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/vllm/utils.py b/vllm/utils.py index 43b64263d645a..b1513b91a06c6 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -757,7 +757,7 @@ def is_pin_memory_available() -> bool: return True -class CudaMemoryProfiler: +class DeviceMemoryProfiler: def __init__(self, device: Optional[torch.types.Device] = None): self.device = device diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index e8c472df8b5fc..0a90f767567d6 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -45,7 +45,7 @@ LRUCacheWorkerPromptAdapterManager) from vllm.sampling_params import SamplingParams from vllm.sequence import IntermediateTensors, SequenceGroupMetadata -from vllm.utils import (CudaMemoryProfiler, PyObjectCache, async_tensor_h2d, +from vllm.utils import (DeviceMemoryProfiler, PyObjectCache, async_tensor_h2d, flatten_2d_lists, is_hip, is_pin_memory_available, supports_dynamo) from vllm.worker.model_runner_base import ( @@ -1012,7 +1012,7 @@ def __init__( def load_model(self) -> None: logger.info("Starting to load model %s...", self.model_config.model) - with CudaMemoryProfiler() as m: + with DeviceMemoryProfiler() as m: self.model = get_model(model_config=self.model_config, device_config=self.device_config, load_config=self.load_config, diff --git a/vllm/worker/xpu_model_runner.py b/vllm/worker/xpu_model_runner.py index f9037625d4af9..d3c763c995b34 100644 --- a/vllm/worker/xpu_model_runner.py +++ b/vllm/worker/xpu_model_runner.py @@ -21,7 +21,7 @@ MultiModalInputs, MultiModalRegistry) from vllm.sampling_params import SamplingParams from vllm.sequence import IntermediateTensors, SequenceGroupMetadata -from vllm.utils import CudaMemoryProfiler, make_tensor_with_pad +from vllm.utils import DeviceMemoryProfiler, make_tensor_with_pad from vllm.worker.model_runner import AttentionMetadata, SamplingMetadata from vllm.worker.model_runner_base import ( ModelRunnerBase, ModelRunnerInputBase, ModelRunnerInputBuilderBase, @@ -391,7 +391,7 @@ def __init__( self.model: nn.Module # Set after init_Model def load_model(self) -> None: - with CudaMemoryProfiler() as m: + with DeviceMemoryProfiler() as m: self.model = get_model( model_config=self.model_config, device_config=self.device_config,