[Misc] Fix ImportError causing by triton (vllm-project#9493)

Signed-off-by: Sumit Dubey <[email protected]>
sumitd2 · Nov 14, 2024 · 71a9e6b · 71a9e6b
1 parent 30ad107
commit 71a9e6b
Showing 1 changed file with 5 additions and 2 deletions.
diff --git a/vllm/executor/multiproc_gpu_executor.py b/vllm/executor/multiproc_gpu_executor.py
@@ -13,12 +13,15 @@
 from vllm.logger import init_logger
 from vllm.model_executor.layers.sampler import SamplerOutput
 from vllm.sequence import ExecuteModelRequest
-from vllm.triton_utils import maybe_set_triton_cache_manager
+from vllm.triton_utils.importing import HAS_TRITON
 from vllm.utils import (_run_task_with_lock, cuda_device_count_stateless,
                         cuda_is_initialized, get_distributed_init_method,
                         get_open_port, get_vllm_instance_id, make_async,
                         update_environment_variables)
 
+if HAS_TRITON:
+    from vllm.triton_utils import maybe_set_triton_cache_manager
+
 logger = init_logger(__name__)
 
 
@@ -59,7 +62,7 @@ def _init_executor(self) -> None:
             torch.set_num_threads(default_omp_num_threads)
 
         # workaround for https://github.com/vllm-project/vllm/issues/6103
-        if world_size > 1:
+        if HAS_TRITON and world_size > 1:
             maybe_set_triton_cache_manager()
 
         # Multiprocessing-based executor does not support multi-node setting.