From 0620c3188b8d965b94be044448e88eca92bccc52 Mon Sep 17 00:00:00 2001 From: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com> Date: Sat, 31 Aug 2024 15:44:03 -0400 Subject: [PATCH] [BugFix][Core] Multistep Fix Crash on Request Cancellation (#8059) --- vllm/engine/output_processor/multi_step.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/vllm/engine/output_processor/multi_step.py b/vllm/engine/output_processor/multi_step.py index 0209b0adc9831..e182cee8ba18e 100644 --- a/vllm/engine/output_processor/multi_step.py +++ b/vllm/engine/output_processor/multi_step.py @@ -88,9 +88,15 @@ def process_outputs(self, # TODO: Add support for async if necessary assert not is_async + # Sequences can be in RUNNING or FINISHED_ABORTED state + # once scheduled, as a sequence is moved to FINSIHED_ABORTED + # if a client disconnects from the api server. seqs = sequence_group.get_seqs(status=SequenceStatus.RUNNING) + if seqs is None: + seqs = sequence_group.get_seqs( + status=SequenceStatus.FINISHED_ABORTED) - assert seqs, "expected running sequences" + assert seqs, "Expected RUNNING or FINISHED_ABORTED sequences" assert len(seqs) == 1, ( "Beam search not supported in multi-step decoding.") seq = seqs[0]