From ed920135c8490440453a64e197fce5e1e6459225 Mon Sep 17 00:00:00 2001 From: Reza Salehi Date: Tue, 15 Oct 2024 21:56:09 -0700 Subject: [PATCH] [Bugfix] Molmo text-only input bug fix (#9397) Co-authored-by: sanghol Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com> Co-authored-by: Roger Wang --- vllm/model_executor/models/molmo.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/models/molmo.py b/vllm/model_executor/models/molmo.py index ccfee165368e7..b04916f17088c 100644 --- a/vllm/model_executor/models/molmo.py +++ b/vllm/model_executor/models/molmo.py @@ -946,9 +946,12 @@ def pad_images( def input_processor_for_molmo(ctx: InputContext, llm_inputs: LLMInputs): - prompt = llm_inputs["prompt"] - multi_modal_data = llm_inputs.get("multi_modal_data") - image = multi_modal_data.get("image") + prompt = llm_inputs.get("prompt", None) + multi_modal_data = llm_inputs.get("multi_modal_data", None) + if multi_modal_data is not None: + image = multi_modal_data.get("image", None) + else: + image = None processor = cached_get_processor(ctx.model_config.model, trust_remote_code=True, revision=ctx.model_config.code_revision)