vllm-project · jeejeelee · Jan 16, 2025 · youkaichao · Jan 17, 2025 · jeejeelee
diff --git a/tests/lora/test_long_context.py b/tests/lora/test_long_context.py
@@ -1,4 +1,5 @@
 import ast
+import os
 from typing import List, Optional, Tuple
 
 import numpy as np
@@ -113,7 +114,10 @@ def lora_llm(long_context_infos):
         context_len_to_scaling_factor[info["context_length"]]
         for info in long_context_infos.values()
     ]
-
+    # Since dist_init sets CUDA_VISIBLE_DEVICES and affects LLM initialization,
+    # remove this env if it exists.
+    if "CUDA_VISIBLE_DEVICES" in os.environ:
+        del os.environ["CUDA_VISIBLE_DEVICES"]
     llm = vllm.LLM(
         "meta-llama/Llama-2-13b-chat-hf",
         enable_lora=True,

diff --git a/vllm/model_executor/layers/activation.py b/vllm/model_executor/layers/activation.py
@@ -30,7 +30,7 @@ class FatreluAndMul(CustomOp):
     def __init__(self, threshold: float = 0.):
         super().__init__()
         self.threshold = threshold
-        if current_platform.is_cuda_alike() or current_platform.is_cpu():
+        if current_platform.is_cuda_alike():
             self.op = torch.ops._C.fatrelu_and_mul
 
     def forward_native(self, x: torch.Tensor) -> torch.Tensor:
@@ -100,7 +100,7 @@ class MulAndSilu(CustomOp):
 
     def __init__(self):
         super().__init__()
-        if current_platform.is_cuda_alike() or current_platform.is_cpu():
+        if current_platform.is_cuda_alike():
             self.op = torch.ops._C.mul_and_silu
         elif current_platform.is_xpu():
             from vllm._ipex_ops import ipex_ops