From 8500d8dc86a0addf5e2c8c99422acd659f6fc7cc Mon Sep 17 00:00:00 2001
From: Lukasz Durejko <ldurejko@habana.ai>
Date: Tue, 5 Nov 2024 09:55:07 +0200
Subject: [PATCH 1/2] Fix SchedulerConfig params

---
 vllm/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/config.py b/vllm/config.py
index 67a4ec0761cc3..cbde20c8faf25 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -973,7 +973,6 @@ class SchedulerConfig:
     def __init__(self,
                  max_num_batched_tokens: Optional[int],
                  max_num_seqs: int,
-                 max_num_prefill_seqs: Optional[int],
                  max_model_len: int,
                  use_v2_block_manager: bool = True,
                  num_lookahead_slots: int = 0,
@@ -986,6 +985,7 @@ def __init__(self,
                  multi_step_stream_outputs: bool = False,
                  send_delta_data: bool = False,
                  policy: str = "fcfs",
+                 max_num_prefill_seqs: Optional[int] = None,
                  use_padding_aware_scheduling=False) -> None:
         if max_num_batched_tokens is None:
             if enable_chunked_prefill:

From 6ac0442df4e11945d20950b62fbe9af0f3cbf3fa Mon Sep 17 00:00:00 2001
From: Lukasz Durejko <ldurejko@habana.ai>
Date: Tue, 5 Nov 2024 09:58:04 +0200
Subject: [PATCH 2/2] adapt SchedulerConfig docstring

---
 vllm/config.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vllm/config.py b/vllm/config.py
index cbde20c8faf25..68957771fa7e4 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -940,9 +940,6 @@ class SchedulerConfig:
             a single iteration.
         max_num_seqs: Maximum number of sequences to be processed in a single
             iteration.
-        max_num_prefill_seqs: Maximum number of prefill sequences to be
-             processed in a single iteration. Used only with padding-aware 
-             scheduling.
         max_model_len: Maximum length of a sequence (including prompt
             and generated text).
         use_v2_block_manager: Whether to use the BlockSpaceManagerV2 or not.
@@ -966,6 +963,9 @@ class SchedulerConfig:
             when SPMD worker architecture is enabled. I.e.,
             VLLM_USE_RAY_SPMD_WORKER=1
         policy: The scheduling policy to use. "fcfs" (default) or "priority".
+        max_num_prefill_seqs: Maximum number of prefill sequences to be
+             processed in a single iteration. Used only with padding-aware 
+             scheduling.
         use_padding_aware_scheduling: If True, scheduler will consider padded
             tokens in prefill.
     """