vllm-project · youkaichao · Jan 17, 2025 · Jan 17, 2025 · youkaichao · Jan 17, 2025
diff --git a/.buildkite/run-cpu-test.sh b/.buildkite/run-cpu-test.sh
@@ -83,6 +83,6 @@ function cpu_tests() {
     tests/lora/test_qwen2vl.py"
 }
 
-# All of CPU tests are expected to be finished less than 25 mins.
+# All of CPU tests are expected to be finished less than 40 mins.
 export -f cpu_tests
-timeout 30m bash -c "cpu_tests $CORE_RANGE $NUMA_NODE"
+timeout 40m bash -c "cpu_tests $CORE_RANGE $NUMA_NODE"
diff --git a/vllm/model_executor/layers/activation.py b/vllm/model_executor/layers/activation.py
@@ -30,8 +30,10 @@ class FatreluAndMul(CustomOp):
     def __init__(self, threshold: float = 0.):
         super().__init__()
         self.threshold = threshold
-        if current_platform.is_cuda_alike() or current_platform.is_cpu():
+        if current_platform.is_cuda_alike():
             self.op = torch.ops._C.fatrelu_and_mul
+        elif current_platform.is_cpu():
+            self._forward_method = self.forward_native
 
     def forward_native(self, x: torch.Tensor) -> torch.Tensor:
         d = x.shape[-1] // 2
@@ -100,11 +102,13 @@ class MulAndSilu(CustomOp):
 
     def __init__(self):
         super().__init__()
-        if current_platform.is_cuda_alike() or current_platform.is_cpu():
+        if current_platform.is_cuda_alike():
             self.op = torch.ops._C.mul_and_silu
         elif current_platform.is_xpu():
             from vllm._ipex_ops import ipex_ops
             self.op = ipex_ops.silu_and_mul
+        elif current_platform.is_cpu():
+            self._forward_method = self.forward_native
 
     def forward_native(self, x: torch.Tensor) -> torch.Tensor:
         """PyTorch-native implementation equivalent to forward()."""