From ce6ee93c23c628270a815a7e80247678eadcde45 Mon Sep 17 00:00:00 2001
From: Brittany <24945384+bvrockwell@users.noreply.github.com>
Date: Thu, 26 Sep 2024 14:47:18 -0700
Subject: [PATCH] Update pallas.py to support trillium

Description: These changes are needed to support the new tpu chip.

Why it's needed: trillium does not have "lite" in the accelerator_type but it also does not use megacore.
---
 vllm/attention/backends/pallas.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/attention/backends/pallas.py b/vllm/attention/backends/pallas.py
index 83fdef16ef5cb..a8a78d41c666c 100644
--- a/vllm/attention/backends/pallas.py
+++ b/vllm/attention/backends/pallas.py
@@ -130,7 +130,7 @@ def __init__(
         assert tpu_type is not None
         tpu_type = tpu_type.lower()
 
-        if "lite" not in tpu_type:
+        if (("lite" not in tpu_type) and ("v6" not in tpu_type)):
             if self.num_kv_heads % 2 == 0:
                 self.megacore_mode = "kv_head"
             else: