flashinfer-ai · yzh119 · Mar 16, 2024 · Mar 16, 2024
diff --git a/python/csrc/batch_prefill.cu b/python/csrc/batch_prefill.cu
@@ -105,8 +105,8 @@ std::vector<torch::Tensor> BatchPrefillWithPagedKVCachePyTorchWrapper::Forward(
           static_cast<int32_t*>(paged_kv_indices.data_ptr()),
           static_cast<int32_t*>(paged_kv_indptr.data_ptr()),
           static_cast<int32_t*>(paged_kv_last_page_len.data_ptr()));
-      return DISPATCH_group_size(num_qo_heads / num_kv_heads, [&] {
-        return DISPATCH_head_dim(head_dim, [&] {
+      bool success = DISPATCH_group_size(num_qo_heads / num_kv_heads, [&] {
+        bool success = DISPATCH_head_dim(head_dim, [&] {
           DISPATCH_CAUSAL(causal, CAUSAL, {
             DISPATCH_ALLOW_FP16_QK_REDUCTION(allow_fp16_qk_reduction, ALLOW_FP16_QK_REDUCTION, {
               DISPATCH_POS_ENCODING_MODE(PosEncodingMode(pos_encoding_mode), POS_ENCODING_MODE, {
@@ -127,7 +127,11 @@ std::vector<torch::Tensor> BatchPrefillWithPagedKVCachePyTorchWrapper::Forward(
           });
           return true;
         });
+        TORCH_CHECK(success, "BatchPrefillWithPagedKVCache failed to dispatch head_dim ", head_dim);
+        return success;
       });
+      TORCH_CHECK(success, "BatchPrefillWithPagedKVCache failed to dispatch group_size ",
+                  num_qo_heads / num_kv_heads);
     });
     return true;
   });

diff --git a/python/csrc/single_prefill.cu b/python/csrc/single_prefill.cu
@@ -55,8 +55,8 @@ std::vector<torch::Tensor> single_prefill_with_kv_cache(
   }
 
   bool success = DISPATCH_PYTORCH_DTYPE_TO_CTYPE(q.scalar_type(), c_type, [&] {
-    return DISPATCH_group_size(num_qo_heads / num_kv_heads, [&] {
-      return DISPATCH_head_dim(head_dim, [&] {
+    bool success = DISPATCH_group_size(num_qo_heads / num_kv_heads, [&] {
+      bool success = DISPATCH_head_dim(head_dim, [&] {
         DISPATCH_CAUSAL(causal, CAUSAL, {
           DISPATCH_LAYOUT(kv_layout, KV_LAYOUT, {
             DISPATCH_ALLOW_FP16_QK_REDUCTION(allow_fp16_qk_reduction, ALLOW_FP16_QK_REDUCTION, {
@@ -80,7 +80,15 @@ std::vector<torch::Tensor> single_prefill_with_kv_cache(
         });
         return true;
       });
+      TORCH_CHECK(success,
+                  "SinglePrefillWithKVCache kernel launch failed, error: unknown head_dim ",
+                  head_dim);
+      return success;
     });
+    TORCH_CHECK(success,
+                "SinglePrefillWithKVCache kernel launch failed, error: unknown group_size ",
+                num_qo_heads / num_kv_heads);
+    return success;
   });
 
   TORCH_CHECK(success, "SinglePrefillWithKVCache kernel launch failed, error: unknown dtype");