Skip to content

Commit

Permalink
[Misc] Add placeholder module (vllm-project#11501)
Browse files Browse the repository at this point in the history
Signed-off-by: DarkLight1337 <[email protected]>
  • Loading branch information
DarkLight1337 authored Dec 26, 2024
1 parent f57ee56 commit eec906d
Show file tree
Hide file tree
Showing 14 changed files with 143 additions and 100 deletions.
9 changes: 7 additions & 2 deletions tests/tensorizer_loader/test_tensorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import pytest
import torch
from huggingface_hub import snapshot_download
from tensorizer import EncryptionParams

from vllm import SamplingParams
from vllm.engine.arg_utils import EngineArgs
Expand All @@ -23,12 +22,18 @@
serialize_vllm_model,
tensorize_vllm_model)
# yapf: enable
from vllm.utils import import_from_path
from vllm.utils import PlaceholderModule, import_from_path

from ..conftest import VllmRunner
from ..utils import VLLM_PATH, RemoteOpenAIServer
from .conftest import retry_until_skip

try:
from tensorizer import EncryptionParams
except ImportError:
tensorizer = PlaceholderModule("tensorizer") # type: ignore[assignment]
EncryptionParams = tensorizer.placeholder_attr("EncryptionParams")

EXAMPLES_PATH = VLLM_PATH / "examples"

prompts = [
Expand Down
19 changes: 12 additions & 7 deletions vllm/assets/audio.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
from dataclasses import dataclass
from typing import Literal, Tuple
from typing import Literal
from urllib.parse import urljoin

import librosa
import numpy as np
import numpy.typing as npt

from vllm.assets.base import get_vllm_public_assets, vLLM_S3_BUCKET_URL
from vllm.utils import PlaceholderModule

from .base import VLLM_S3_BUCKET_URL, get_vllm_public_assets

try:
import librosa
except ImportError:
librosa = PlaceholderModule("librosa") # type: ignore[assignment]

ASSET_DIR = "multimodal_asset"

Expand All @@ -15,8 +21,7 @@ class AudioAsset:
name: Literal["winning_call", "mary_had_lamb"]

@property
def audio_and_sample_rate(self) -> Tuple[np.ndarray, int]:

def audio_and_sample_rate(self) -> tuple[npt.NDArray, int]:
audio_path = get_vllm_public_assets(filename=f"{self.name}.ogg",
s3_prefix=ASSET_DIR)
y, sr = librosa.load(audio_path, sr=None)
Expand All @@ -25,4 +30,4 @@ def audio_and_sample_rate(self) -> Tuple[np.ndarray, int]:

@property
def url(self) -> str:
return urljoin(vLLM_S3_BUCKET_URL, f"{ASSET_DIR}/{self.name}.ogg")
return urljoin(VLLM_S3_BUCKET_URL, f"{ASSET_DIR}/{self.name}.ogg")
7 changes: 3 additions & 4 deletions vllm/assets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@

import vllm.envs as envs
from vllm.connections import global_http_connection
from vllm.envs import VLLM_IMAGE_FETCH_TIMEOUT

vLLM_S3_BUCKET_URL = "https://vllm-public-assets.s3.us-west-2.amazonaws.com"
VLLM_S3_BUCKET_URL = "https://vllm-public-assets.s3.us-west-2.amazonaws.com"


def get_cache_dir() -> Path:
Expand All @@ -32,8 +31,8 @@ def get_vllm_public_assets(filename: str,
if s3_prefix is not None:
filename = s3_prefix + "/" + filename
global_http_connection.download_file(
f"{vLLM_S3_BUCKET_URL}/{filename}",
f"{VLLM_S3_BUCKET_URL}/{filename}",
asset_path,
timeout=VLLM_IMAGE_FETCH_TIMEOUT)
timeout=envs.VLLM_IMAGE_FETCH_TIMEOUT)

return asset_path
3 changes: 1 addition & 2 deletions vllm/assets/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import torch
from PIL import Image

from vllm.assets.base import get_vllm_public_assets
from .base import get_vllm_public_assets

VLM_IMAGES_DIR = "vision_model_images"

Expand All @@ -15,7 +15,6 @@ class ImageAsset:

@property
def pil_image(self) -> Image.Image:

image_path = get_vllm_public_assets(filename=f"{self.name}.jpg",
s3_prefix=VLM_IMAGES_DIR)
return Image.open(image_path)
Expand Down
9 changes: 3 additions & 6 deletions vllm/assets/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
from functools import lru_cache
from typing import List, Literal

import cv2
import numpy as np
import numpy.typing as npt
from huggingface_hub import hf_hub_download
from PIL import Image

from vllm.multimodal.video import (sample_frames_from_video,
try_import_video_packages)
from vllm.multimodal.video import sample_frames_from_video

from .base import get_cache_dir

Expand All @@ -19,7 +19,7 @@ def download_video_asset(filename: str) -> str:
Download and open an image from huggingface
repo: raushan-testing-hf/videos-test
"""
video_directory = get_cache_dir() / "video-eample-data"
video_directory = get_cache_dir() / "video-example-data"
video_directory.mkdir(parents=True, exist_ok=True)

video_path = video_directory / filename
Expand All @@ -35,8 +35,6 @@ def download_video_asset(filename: str) -> str:


def video_to_ndarrays(path: str, num_frames: int = -1) -> npt.NDArray:
cv2, _ = try_import_video_packages()

cap = cv2.VideoCapture(path)
if not cap.isOpened():
raise ValueError(f"Could not open video file {path}")
Expand All @@ -59,7 +57,6 @@ def video_to_ndarrays(path: str, num_frames: int = -1) -> npt.NDArray:

def video_to_pil_images_list(path: str,
num_frames: int = -1) -> List[Image.Image]:
cv2, _ = try_import_video_packages()
frames = video_to_ndarrays(path, num_frames)
return [
Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
Expand Down
10 changes: 1 addition & 9 deletions vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
get_hf_text_config, get_pooling_config,
get_sentence_transformer_tokenizer_config, is_encoder_decoder,
try_get_generation_config, uses_mrope)
from vllm.transformers_utils.s3_utils import S3Model
from vllm.transformers_utils.utils import is_s3
from vllm.utils import (GiB_bytes, LayerBlockType, cuda_device_count_stateless,
get_cpu_memory, print_warning_once, random_uuid,
Expand Down Expand Up @@ -372,15 +373,6 @@ def maybe_pull_model_tokenizer_for_s3(self, model: str,
"""
if is_s3(model) or is_s3(tokenizer):
try:
from vllm.transformers_utils.s3_utils import S3Model
except ImportError as err:
raise ImportError(
"Please install Run:ai optional dependency "
"to use the S3 capabilities. "
"You can install it with: pip install vllm[runai]"
) from err

if is_s3(model):
self.s3_model = S3Model()
self.s3_model.pull_files(model, allow_pattern=["*config.json"])
Expand Down
11 changes: 1 addition & 10 deletions vllm/model_executor/model_loader/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
runai_safetensors_weights_iterator, safetensors_weights_iterator)
from vllm.model_executor.utils import set_weight_attrs
from vllm.platforms import current_platform
from vllm.transformers_utils.s3_utils import glob as s3_glob
from vllm.transformers_utils.utils import is_s3
from vllm.utils import is_pin_memory_available

Expand Down Expand Up @@ -1269,16 +1270,6 @@ def _prepare_weights(self, model_name_or_path: str,
If the model is not local, it will be downloaded."""
is_s3_path = is_s3(model_name_or_path)
if is_s3_path:
try:
from vllm.transformers_utils.s3_utils import glob as s3_glob
except ImportError as err:
raise ImportError(
"Please install Run:ai optional dependency "
"to use the S3 capabilities. "
"You can install it with: pip install vllm[runai]"
) from err

is_local = os.path.isdir(model_name_or_path)
safetensors_pattern = "*.safetensors"
index_file = SAFE_WEIGHTS_INDEX_NAME
Expand Down
25 changes: 14 additions & 11 deletions vllm/model_executor/model_loader/tensorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@
from vllm.logger import init_logger
from vllm.model_executor.layers.vocab_parallel_embedding import (
VocabParallelEmbedding)
from vllm.utils import FlexibleArgumentParser

tensorizer_error_msg = None
from vllm.utils import FlexibleArgumentParser, PlaceholderModule

try:
from tensorizer import (DecryptionParams, EncryptionParams,
Expand All @@ -34,8 +32,19 @@
open_stream,
mode=mode,
) for mode in ("rb", "wb+"))
except ImportError as e:
tensorizer_error_msg = str(e)
except ImportError:
tensorizer = PlaceholderModule("tensorizer")
DecryptionParams = tensorizer.placeholder_attr("DecryptionParams")
EncryptionParams = tensorizer.placeholder_attr("EncryptionParams")
TensorDeserializer = tensorizer.placeholder_attr("TensorDeserializer")
TensorSerializer = tensorizer.placeholder_attr("TensorSerializer")
open_stream = tensorizer.placeholder_attr("stream_io.open_stream")
convert_bytes = tensorizer.placeholder_attr("utils.convert_bytes")
get_mem_usage = tensorizer.placeholder_attr("utils.get_mem_usage")
no_init_or_tensor = tensorizer.placeholder_attr("utils.no_init_or_tensor")

_read_stream = tensorizer.placeholder_attr("_read_stream")
_write_stream = tensorizer.placeholder_attr("_write_stream")

__all__ = [
'EncryptionParams', 'DecryptionParams', 'TensorDeserializer',
Expand Down Expand Up @@ -267,12 +276,6 @@ class TensorizerAgent:
"""

def __init__(self, tensorizer_config: TensorizerConfig, vllm_config):
if tensorizer_error_msg is not None:
raise ImportError(
"Tensorizer is not installed. Please install tensorizer "
"to use this feature with `pip install vllm[tensorizer]`. "
"Error message: {}".format(tensorizer_error_msg))

self.tensorizer_config = tensorizer_config
self.tensorizer_args = (
self.tensorizer_config._construct_tensorizer_args())
Expand Down
17 changes: 9 additions & 8 deletions vllm/model_executor/model_loader/weight_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,15 @@
get_quantization_config)
from vllm.model_executor.layers.quantization.schema import QuantParamSchema
from vllm.platforms import current_platform
from vllm.utils import print_warning_once
from vllm.utils import PlaceholderModule, print_warning_once

try:
from runai_model_streamer import SafetensorsStreamer
except ImportError:
runai_model_streamer = PlaceholderModule(
"runai_model_streamer") # type: ignore[assignment]
SafetensorsStreamer = runai_model_streamer.placeholder_attr(
"SafetensorsStreamer")

logger = init_logger(__name__)

Expand Down Expand Up @@ -414,13 +422,6 @@ def runai_safetensors_weights_iterator(
hf_weights_files: List[str]
) -> Generator[Tuple[str, torch.Tensor], None, None]:
"""Iterate over the weights in the model safetensor files."""
try:
from runai_model_streamer import SafetensorsStreamer
except ImportError as err:
raise ImportError(
"Please install Run:ai optional dependency."
"You can install it with: pip install vllm[runai]") from err

enable_tqdm = not torch.distributed.is_initialized(
) or torch.distributed.get_rank() == 0
with SafetensorsStreamer() as streamer:
Expand Down
24 changes: 6 additions & 18 deletions vllm/multimodal/audio.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
from typing import Any

import numpy as np
import numpy.typing as npt

from vllm.inputs.registry import InputContext
from vllm.utils import PlaceholderModule

from .base import MultiModalPlugin
from .inputs import AudioItem, MultiModalData, MultiModalKwargs

try:
import librosa
except ImportError:
librosa = PlaceholderModule("librosa") # type: ignore[assignment]


class AudioPlugin(MultiModalPlugin):
"""Plugin for audio data."""
Expand All @@ -28,26 +32,10 @@ def _default_max_multimodal_tokens(self, ctx: InputContext) -> int:
"There is no default maximum multimodal tokens")


def try_import_audio_packages() -> tuple[Any, Any]:
try:
import librosa
import soundfile
except ImportError as exc:
raise ImportError(
"Please install vllm[audio] for audio support.") from exc
return librosa, soundfile


def resample_audio(
audio: npt.NDArray[np.floating],
*,
orig_sr: float,
target_sr: float,
) -> npt.NDArray[np.floating]:
try:
import librosa
except ImportError as exc:
msg = "Please install vllm[audio] for audio support."
raise ImportError(msg) from exc

return librosa.resample(audio, orig_sr=orig_sr, target_sr=target_sr)
26 changes: 16 additions & 10 deletions vllm/multimodal/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,24 @@
from vllm.connections import global_http_connection
from vllm.logger import init_logger
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
from vllm.utils import PlaceholderModule

from .audio import try_import_audio_packages
from .inputs import MultiModalDataDict, PlaceholderRange
from .video import try_import_video_packages

try:
import decord
except ImportError:
decord = PlaceholderModule("decord") # type: ignore[assignment]

try:
import librosa
except ImportError:
librosa = PlaceholderModule("librosa") # type: ignore[assignment]

try:
import soundfile
except ImportError:
soundfile = PlaceholderModule("soundfile") # type: ignore[assignment]

logger = init_logger(__name__)

Expand Down Expand Up @@ -128,8 +142,6 @@ async def async_fetch_image(image_url: str,


def _load_video_from_bytes(b: bytes, num_frames: int = 32) -> npt.NDArray:
_, decord = try_import_video_packages()

video_path = BytesIO(b)
vr = decord.VideoReader(video_path, num_threads=1)
total_frame_num = len(vr)
Expand Down Expand Up @@ -204,8 +216,6 @@ def fetch_audio(audio_url: str) -> Tuple[np.ndarray, Union[int, float]]:
"""
Load audio from a URL.
"""
librosa, _ = try_import_audio_packages()

if audio_url.startswith("http"):
audio_bytes = global_http_connection.get_bytes(
audio_url,
Expand All @@ -226,8 +236,6 @@ async def async_fetch_audio(
"""
Asynchronously fetch audio from a URL.
"""
librosa, _ = try_import_audio_packages()

if audio_url.startswith("http"):
audio_bytes = await global_http_connection.async_get_bytes(
audio_url,
Expand Down Expand Up @@ -286,8 +294,6 @@ def encode_audio_base64(
sampling_rate: int,
) -> str:
"""Encode audio as base64."""
_, soundfile = try_import_audio_packages()

buffered = BytesIO()
soundfile.write(buffered, audio, sampling_rate, format="WAV")

Expand Down
Loading

0 comments on commit eec906d

Please sign in to comment.