Skip to content

Commit

Permalink
[V1][Metrics] Add initial Prometheus logger (vllm-project#12416)
Browse files Browse the repository at this point in the history
Signed-off-by: Mark McLoughlin <[email protected]>
  • Loading branch information
markmc authored and NickLucche committed Feb 7, 2025
1 parent 75f84a1 commit bc9a9a2
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 10 deletions.
41 changes: 35 additions & 6 deletions tests/entrypoints/openai/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,24 @@
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"


@pytest.fixture(scope="module", params=[True, False])
def use_v1(request):
# Module-scoped variant of run_with_both_engines
#
# Use this fixture to run a test with both v0 and v1, and
# also to conditionalize the test logic e.g.
#
# def test_metrics_exist(use_v1, server, client):
# ...
# expected = EXPECTED_V1_METRICS if use_v1 else EXPECTED_METRICS
# for metric in expected:
# assert metric in response.text
#
# @skip_v1 wouldn't work here because this is a module-level
# fixture - per-function decorators would have no effect
yield request.param


@pytest.fixture(scope="module")
def default_server_args():
return [
Expand All @@ -36,10 +54,12 @@ def default_server_args():
"--enable-chunked-prefill",
"--disable-frontend-multiprocessing",
])
def server(default_server_args, request):
def server(use_v1, default_server_args, request):
if request.param:
default_server_args.append(request.param)
with RemoteOpenAIServer(MODEL_NAME, default_server_args) as remote_server:
env_dict = dict(VLLM_USE_V1='1' if use_v1 else '0')
with RemoteOpenAIServer(MODEL_NAME, default_server_args,
env_dict=env_dict) as remote_server:
yield remote_server


Expand Down Expand Up @@ -84,7 +104,9 @@ async def client(server):

@pytest.mark.asyncio
async def test_metrics_counts(server: RemoteOpenAIServer,
client: openai.AsyncClient):
client: openai.AsyncClient, use_v1: bool):
if use_v1:
pytest.skip("Skipping test on vllm V1")
for _ in range(_NUM_REQUESTS):
# sending a request triggers the metrics to be logged.
await client.completions.create(
Expand Down Expand Up @@ -174,10 +196,15 @@ async def test_metrics_counts(server: RemoteOpenAIServer,
"swap_space_bytes",
]

EXPECTED_METRICS_V1 = [
"vllm:num_requests_running",
"vllm:num_requests_waiting",
]


@pytest.mark.asyncio
async def test_metrics_exist(server: RemoteOpenAIServer,
client: openai.AsyncClient):
client: openai.AsyncClient, use_v1: bool):
# sending a request triggers the metrics to be logged.
await client.completions.create(model=MODEL_NAME,
prompt="Hello, my name is",
Expand All @@ -187,11 +214,13 @@ async def test_metrics_exist(server: RemoteOpenAIServer,
response = requests.get(server.url_for("metrics"))
assert response.status_code == HTTPStatus.OK

for metric in EXPECTED_METRICS:
for metric in (EXPECTED_METRICS_V1 if use_v1 else EXPECTED_METRICS):
assert metric in response.text


def test_metrics_exist_run_batch():
def test_metrics_exist_run_batch(use_v1: bool):
if use_v1:
pytest.skip("Skipping test on vllm V1")
input_batch = """{"custom_id": "request-0", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/e5-mistral-7b-instruct", "input": "You are a helpful assistant."}}""" # noqa: E501

base_url = "0.0.0.0"
Expand Down
11 changes: 7 additions & 4 deletions vllm/v1/engine/async_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
from vllm.v1.engine.output_processor import OutputProcessor
from vllm.v1.engine.processor import Processor
from vllm.v1.executor.abstract import Executor
from vllm.v1.metrics.loggers import LoggingStatLogger, StatLoggerBase
from vllm.v1.metrics.loggers import (LoggingStatLogger, PrometheusStatLogger,
StatLoggerBase)
from vllm.v1.metrics.stats import IterationStats, SchedulerStats

logger = init_logger(__name__)
Expand All @@ -46,13 +47,15 @@ def __init__(

assert start_engine_loop

self.model_config = vllm_config.model_config

self.log_requests = log_requests
self.log_stats = log_stats
self.stat_loggers: List[StatLoggerBase] = [
LoggingStatLogger(),
# TODO(rob): PrometheusStatLogger(),
PrometheusStatLogger(labels=dict(
model_name=self.model_config.served_model_name)),
]
self.model_config = vllm_config.model_config

# Tokenizer (+ ensure liveness if running in another process).
self.tokenizer = init_tokenizer_from_configs(
Expand Down Expand Up @@ -272,7 +275,7 @@ async def _run_output_handler(self):

# 4) Logging.
# TODO(rob): make into a coroutine and launch it in
# background thread once we add Prometheus.
# background thread once Prometheus overhead is non-trivial.
assert iteration_stats is not None
self._log_stats(
scheduler_stats=outputs.scheduler_stats,
Expand Down
36 changes: 36 additions & 0 deletions vllm/v1/metrics/loggers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import time
from abc import ABC, abstractmethod
from typing import Dict

import prometheus_client

from vllm.logger import init_logger
from vllm.v1.metrics.stats import SchedulerStats
Expand Down Expand Up @@ -36,3 +39,36 @@ def log(self, scheduler_stats: SchedulerStats):
scheduler_stats.num_running_reqs,
scheduler_stats.num_waiting_reqs,
)


class PrometheusStatLogger(StatLoggerBase):

def __init__(self, labels: Dict[str, str]):
self.labels = labels

labelnames = self.labels.keys()
labelvalues = self.labels.values()

self._unregister_vllm_metrics()

self.gauge_scheduler_running = prometheus_client.Gauge(
name="vllm:num_requests_running",
documentation="Number of requests in model execution batches.",
labelnames=labelnames).labels(*labelvalues)

self.gauge_scheduler_waiting = prometheus_client.Gauge(
name="vllm:num_requests_waiting",
documentation="Number of requests waiting to be processed.",
labelnames=labelnames).labels(*labelvalues)

def log(self, scheduler_stats: SchedulerStats):
"""Log to prometheus."""
self.gauge_scheduler_running.set(scheduler_stats.num_running_reqs)
self.gauge_scheduler_waiting.set(scheduler_stats.num_waiting_reqs)

@staticmethod
def _unregister_vllm_metrics():
# Unregister any existing vLLM collectors (for CI/CD
for collector in list(prometheus_client.REGISTRY._collector_to_names):
if hasattr(collector, "_name") and "vllm" in collector._name:
prometheus_client.REGISTRY.unregister(collector)

0 comments on commit bc9a9a2

Please sign in to comment.