Skip to content

Commit

Permalink
Refactor image parsing in Dockerfile.meipian and chat_utils.py
Browse files Browse the repository at this point in the history
  • Loading branch information
whyiug committed Oct 15, 2024
1 parent 34b0469 commit 3495e80
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 11 deletions.
9 changes: 1 addition & 8 deletions Dockerfile.meipian
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,9 @@ WORKDIR /workspace

RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
pip install redis && \
pip install flash-attn --no-build-isolation && \
pip install https://vllm-wheels.s3.us-west-2.amazonaws.com/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl


# Attention: image &&& wheel && branch are the same version
# sync main
# git clone https://github.com/whyiug/vllm
# cd vllm
# git checkout feature_redis_image_embeds
# git merge origin main

COPY . /workspace/vllm

WORKDIR /workspace/vllm
Expand Down
14 changes: 11 additions & 3 deletions vllm/entrypoints/chat_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ def load_chat_template(
# TODO: Let user specify how to insert multimodal tokens into prompt
# (similar to chat template)
def _get_full_multimodal_text_prompt(placeholder_counts: Dict[str, int],
text_prompt: str) -> str:
text_prompt: str, model_type: str) -> str:
"""Combine multimodal prompts for a multimodal language model."""

# Look through the text prompt to check for missing placeholders
Expand All @@ -378,7 +378,13 @@ def _get_full_multimodal_text_prompt(placeholder_counts: Dict[str, int],

# NOTE: For now we always add missing placeholders at the front of
# the prompt. This may change to be customizable in the future.
return "\n".join(missing_placeholders + [text_prompt])
if model_type == "qwen2_vl":
# TODO: multi images not very well supported
multimodal_prompt = "".join(missing_placeholders + [text_prompt])
else:
multimodal_prompt = "\n".join(missing_placeholders + [text_prompt])

return multimodal_prompt


# No need to validate using Pydantic again
Expand Down Expand Up @@ -442,7 +448,9 @@ def _parse_chat_message_content_parts(
mm_placeholder_counts = mm_parser.mm_placeholder_counts()
if mm_placeholder_counts:
text_prompt = _get_full_multimodal_text_prompt(
mm_placeholder_counts, text_prompt)
mm_placeholder_counts,
text_prompt,
mm_tracker._model_config.hf_config.model_type)
return [ConversationMessage(role=role, content=text_prompt)]


Expand Down

0 comments on commit 3495e80

Please sign in to comment.