diff --git a/vllm/attention/backends/flashinfer.py b/vllm/attention/backends/flashinfer.py index dc106332d003b..658805d35be0a 100644 --- a/vllm/attention/backends/flashinfer.py +++ b/vllm/attention/backends/flashinfer.py @@ -871,7 +871,7 @@ def unified_flash_infer( else: assert prefill_meta is not None assert prefill_meta.prefill_wrapper is not None - output = prefill_meta.prefill_wrapper.forward( + prefill_output = prefill_meta.prefill_wrapper.forward( query, kv_cache, logits_soft_cap=logits_soft_cap,