diff --git a/vllm/platforms/__init__.py b/vllm/platforms/__init__.py index d34b660df6a78..9c98942b55699 100644 --- a/vllm/platforms/__init__.py +++ b/vllm/platforms/__init__.py @@ -33,7 +33,8 @@ def cuda_platform_plugin() -> Optional[str]: is_cuda = False try: - import pynvml + from vllm.utils import import_pynvml + pynvml = import_pynvml() pynvml.nvmlInit() try: if pynvml.nvmlDeviceGetCount() > 0: diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 44d2506f0e204..b49852a727fa4 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -8,7 +8,6 @@ from typing import (TYPE_CHECKING, Callable, List, Optional, Tuple, TypeVar, Union) -import pynvml import torch from typing_extensions import ParamSpec @@ -16,6 +15,7 @@ import vllm._C # noqa import vllm.envs as envs from vllm.logger import init_logger +from vllm.utils import import_pynvml from .interface import DeviceCapability, Platform, PlatformEnum, _Backend @@ -29,13 +29,7 @@ _P = ParamSpec("_P") _R = TypeVar("_R") -if pynvml.__file__.endswith("__init__.py"): - logger.warning( - "You are using a deprecated `pynvml` package. Please install" - " `nvidia-ml-py` instead, and make sure to uninstall `pynvml`." - " When both of them are installed, `pynvml` will take precedence" - " and cause errors. See https://pypi.org/project/pynvml " - "for more information.") +pynvml = import_pynvml() # pytorch 2.5 uses cudnn sdpa by default, which will cause crash on some models # see https://github.com/huggingface/diffusers/issues/9704 for details diff --git a/vllm/utils.py b/vllm/utils.py index 3089f0951d1e2..a2b53fcf252d1 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -2208,3 +2208,55 @@ def run_method(obj: Any, method: Union[str, bytes, Callable], args: Tuple[Any], else: func = partial(method, obj) # type: ignore return func(*args, **kwargs) + + +def import_pynvml(): + """ + Historical comments: + + libnvml.so is the library behind nvidia-smi, and + pynvml is a Python wrapper around it. We use it to get GPU + status without initializing CUDA context in the current process. + Historically, there are two packages that provide pynvml: + - `nvidia-ml-py` (https://pypi.org/project/nvidia-ml-py/): The official + wrapper. It is a dependency of vLLM, and is installed when users + install vLLM. It provides a Python module named `pynvml`. + - `pynvml` (https://pypi.org/project/pynvml/): An unofficial wrapper. + Prior to version 12.0, it also provides a Python module `pynvml`, + and therefore conflicts with the official one. What's worse, + the module is a Python package, and has higher priority than + the official one which is a standalone Python file. + This causes errors when both of them are installed. + Starting from version 12.0, it migrates to a new module + named `pynvml_utils` to avoid the conflict. + + TL;DR: if users have pynvml<12.0 installed, it will cause problems. + Otherwise, `import pynvml` will import the correct module. + We take the safest approach here, to manually import the correct + `pynvml.py` module from the `nvidia-ml-py` package. + """ + if TYPE_CHECKING: + import pynvml + return pynvml + if "pynvml" in sys.modules: + import pynvml + if pynvml.__file__.endswith("__init__.py"): + # this is pynvml < 12.0 + raise RuntimeError( + "You are using a deprecated `pynvml` package. " + "Please uninstall `pynvml` or upgrade to at least" + " version 12.0. See https://pypi.org/project/pynvml " + "for more information.") + return sys.modules["pynvml"] + import importlib.util + import os + import site + for site_dir in site.getsitepackages(): + pynvml_path = os.path.join(site_dir, "pynvml.py") + if os.path.exists(pynvml_path): + spec = importlib.util.spec_from_file_location( + "pynvml", pynvml_path) + pynvml = importlib.util.module_from_spec(spec) + sys.modules["pynvml"] = pynvml + spec.loader.exec_module(pynvml) + return pynvml