diff --git a/vllm/worker/hpu_model_runner.py b/vllm/worker/hpu_model_runner.py index f67d604234fbe..22aa5bf5a7fc9 100755 --- a/vllm/worker/hpu_model_runner.py +++ b/vllm/worker/hpu_model_runner.py @@ -1121,8 +1121,9 @@ def _prepare_decode( padding_fn = None if self.use_contiguous_pa: + block_bucket_size = max(max(block_list) + 1, len(block_list)) block_bucket_size = find_bucket( - max(block_list) + 1, + block_bucket_size, self.bucketing_global_state.decode_block_bucket_cfg) indices: List[Any] indices = [None] * block_bucket_size