From ce6ee93c23c628270a815a7e80247678eadcde45 Mon Sep 17 00:00:00 2001 From: Brittany <24945384+bvrockwell@users.noreply.github.com> Date: Thu, 26 Sep 2024 14:47:18 -0700 Subject: [PATCH] Update pallas.py to support trillium Description: These changes are needed to support the new tpu chip. Why it's needed: trillium does not have "lite" in the accelerator_type but it also does not use megacore. --- vllm/attention/backends/pallas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/attention/backends/pallas.py b/vllm/attention/backends/pallas.py index 83fdef16ef5cb..a8a78d41c666c 100644 --- a/vllm/attention/backends/pallas.py +++ b/vllm/attention/backends/pallas.py @@ -130,7 +130,7 @@ def __init__( assert tpu_type is not None tpu_type = tpu_type.lower() - if "lite" not in tpu_type: + if (("lite" not in tpu_type) and ("v6" not in tpu_type)): if self.num_kv_heads % 2 == 0: self.megacore_mode = "kv_head" else: