[Bugfix][Kernel] Give unique name to BlockSparseFlashAttention (vllm-…

…project#12040) Signed-off-by: Chen Zhang <[email protected]> Signed-off-by: hzh <[email protected]>
HwwwwwwwH · Jan 22, 2025 · 87a687b · 87a687b
1 parent 3a05c49
commit 87a687b
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 2 deletions.
diff --git a/vllm/attention/backends/blocksparse_attn.py b/vllm/attention/backends/blocksparse_attn.py
@@ -89,8 +89,7 @@ class BlocksparseFlashAttentionBackend(AttentionBackend):
 
     @staticmethod
     def get_name() -> str:
-        # For attention layer compatibility
-        return "FLASH_ATTN"
+        return "BLOCK_SPARSE_FLASH_ATTN"
 
     @staticmethod
     def get_impl_cls() -> Type["BlocksparseFlashAttentionImpl"]:

diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
@@ -33,6 +33,7 @@ class _Backend(enum.Enum):
     HPU_ATTN = enum.auto()
     PALLAS = enum.auto()
     IPEX = enum.auto()
+    BLOCK_SPARSE_FLASH_ATTN = enum.auto()
     NO_ATTENTION = enum.auto()