From 71a9e6b7c46a34493b7cf051ac2b2bb7bfe938e3 Mon Sep 17 00:00:00 2001 From: Mengqing Cao Date: Fri, 8 Nov 2024 13:08:51 +0800 Subject: [PATCH] [Misc] Fix ImportError causing by triton (#9493) Signed-off-by: Sumit Dubey --- vllm/executor/multiproc_gpu_executor.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/vllm/executor/multiproc_gpu_executor.py b/vllm/executor/multiproc_gpu_executor.py index 2dbde778e49b1..3eb14fb931925 100644 --- a/vllm/executor/multiproc_gpu_executor.py +++ b/vllm/executor/multiproc_gpu_executor.py @@ -13,12 +13,15 @@ from vllm.logger import init_logger from vllm.model_executor.layers.sampler import SamplerOutput from vllm.sequence import ExecuteModelRequest -from vllm.triton_utils import maybe_set_triton_cache_manager +from vllm.triton_utils.importing import HAS_TRITON from vllm.utils import (_run_task_with_lock, cuda_device_count_stateless, cuda_is_initialized, get_distributed_init_method, get_open_port, get_vllm_instance_id, make_async, update_environment_variables) +if HAS_TRITON: + from vllm.triton_utils import maybe_set_triton_cache_manager + logger = init_logger(__name__) @@ -59,7 +62,7 @@ def _init_executor(self) -> None: torch.set_num_threads(default_omp_num_threads) # workaround for https://github.com/vllm-project/vllm/issues/6103 - if world_size > 1: + if HAS_TRITON and world_size > 1: maybe_set_triton_cache_manager() # Multiprocessing-based executor does not support multi-node setting.