Skip to content

Commit 01ba927

Browse files
authored
[V1][Metrics] Add initial Prometheus logger (#12416)
Signed-off-by: Mark McLoughlin <[email protected]>
1 parent 103bd17 commit 01ba927

File tree

3 files changed

+78
-10
lines changed

3 files changed

+78
-10
lines changed

tests/entrypoints/openai/test_metrics.py

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,24 @@
1616
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
1717

1818

19+
@pytest.fixture(scope="module", params=[True, False])
20+
def use_v1(request):
21+
# Module-scoped variant of run_with_both_engines
22+
#
23+
# Use this fixture to run a test with both v0 and v1, and
24+
# also to conditionalize the test logic e.g.
25+
#
26+
# def test_metrics_exist(use_v1, server, client):
27+
# ...
28+
# expected = EXPECTED_V1_METRICS if use_v1 else EXPECTED_METRICS
29+
# for metric in expected:
30+
# assert metric in response.text
31+
#
32+
# @skip_v1 wouldn't work here because this is a module-level
33+
# fixture - per-function decorators would have no effect
34+
yield request.param
35+
36+
1937
@pytest.fixture(scope="module")
2038
def default_server_args():
2139
return [
@@ -36,10 +54,12 @@ def default_server_args():
3654
"--enable-chunked-prefill",
3755
"--disable-frontend-multiprocessing",
3856
])
39-
def server(default_server_args, request):
57+
def server(use_v1, default_server_args, request):
4058
if request.param:
4159
default_server_args.append(request.param)
42-
with RemoteOpenAIServer(MODEL_NAME, default_server_args) as remote_server:
60+
env_dict = dict(VLLM_USE_V1='1' if use_v1 else '0')
61+
with RemoteOpenAIServer(MODEL_NAME, default_server_args,
62+
env_dict=env_dict) as remote_server:
4363
yield remote_server
4464

4565

@@ -84,7 +104,9 @@ async def client(server):
84104

85105
@pytest.mark.asyncio
86106
async def test_metrics_counts(server: RemoteOpenAIServer,
87-
client: openai.AsyncClient):
107+
client: openai.AsyncClient, use_v1: bool):
108+
if use_v1:
109+
pytest.skip("Skipping test on vllm V1")
88110
for _ in range(_NUM_REQUESTS):
89111
# sending a request triggers the metrics to be logged.
90112
await client.completions.create(
@@ -174,10 +196,15 @@ async def test_metrics_counts(server: RemoteOpenAIServer,
174196
"swap_space_bytes",
175197
]
176198

199+
EXPECTED_METRICS_V1 = [
200+
"vllm:num_requests_running",
201+
"vllm:num_requests_waiting",
202+
]
203+
177204

178205
@pytest.mark.asyncio
179206
async def test_metrics_exist(server: RemoteOpenAIServer,
180-
client: openai.AsyncClient):
207+
client: openai.AsyncClient, use_v1: bool):
181208
# sending a request triggers the metrics to be logged.
182209
await client.completions.create(model=MODEL_NAME,
183210
prompt="Hello, my name is",
@@ -187,11 +214,13 @@ async def test_metrics_exist(server: RemoteOpenAIServer,
187214
response = requests.get(server.url_for("metrics"))
188215
assert response.status_code == HTTPStatus.OK
189216

190-
for metric in EXPECTED_METRICS:
217+
for metric in (EXPECTED_METRICS_V1 if use_v1 else EXPECTED_METRICS):
191218
assert metric in response.text
192219

193220

194-
def test_metrics_exist_run_batch():
221+
def test_metrics_exist_run_batch(use_v1: bool):
222+
if use_v1:
223+
pytest.skip("Skipping test on vllm V1")
195224
input_batch = """{"custom_id": "request-0", "method": "POST", "url": "/v1/embeddings", "body": {"model": "intfloat/e5-mistral-7b-instruct", "input": "You are a helpful assistant."}}""" # noqa: E501
196225

197226
base_url = "0.0.0.0"

vllm/v1/engine/async_llm.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@
2424
from vllm.v1.engine.output_processor import OutputProcessor
2525
from vllm.v1.engine.processor import Processor
2626
from vllm.v1.executor.abstract import Executor
27-
from vllm.v1.metrics.loggers import LoggingStatLogger, StatLoggerBase
27+
from vllm.v1.metrics.loggers import (LoggingStatLogger, PrometheusStatLogger,
28+
StatLoggerBase)
2829
from vllm.v1.metrics.stats import IterationStats, SchedulerStats
2930

3031
logger = init_logger(__name__)
@@ -46,13 +47,15 @@ def __init__(
4647

4748
assert start_engine_loop
4849

50+
self.model_config = vllm_config.model_config
51+
4952
self.log_requests = log_requests
5053
self.log_stats = log_stats
5154
self.stat_loggers: List[StatLoggerBase] = [
5255
LoggingStatLogger(),
53-
# TODO(rob): PrometheusStatLogger(),
56+
PrometheusStatLogger(labels=dict(
57+
model_name=self.model_config.served_model_name)),
5458
]
55-
self.model_config = vllm_config.model_config
5659

5760
# Tokenizer (+ ensure liveness if running in another process).
5861
self.tokenizer = init_tokenizer_from_configs(
@@ -272,7 +275,7 @@ async def _run_output_handler(self):
272275

273276
# 4) Logging.
274277
# TODO(rob): make into a coroutine and launch it in
275-
# background thread once we add Prometheus.
278+
# background thread once Prometheus overhead is non-trivial.
276279
assert iteration_stats is not None
277280
self._log_stats(
278281
scheduler_stats=outputs.scheduler_stats,

vllm/v1/metrics/loggers.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
import time
22
from abc import ABC, abstractmethod
3+
from typing import Dict
4+
5+
import prometheus_client
36

47
from vllm.logger import init_logger
58
from vllm.v1.metrics.stats import SchedulerStats
@@ -36,3 +39,36 @@ def log(self, scheduler_stats: SchedulerStats):
3639
scheduler_stats.num_running_reqs,
3740
scheduler_stats.num_waiting_reqs,
3841
)
42+
43+
44+
class PrometheusStatLogger(StatLoggerBase):
45+
46+
def __init__(self, labels: Dict[str, str]):
47+
self.labels = labels
48+
49+
labelnames = self.labels.keys()
50+
labelvalues = self.labels.values()
51+
52+
self._unregister_vllm_metrics()
53+
54+
self.gauge_scheduler_running = prometheus_client.Gauge(
55+
name="vllm:num_requests_running",
56+
documentation="Number of requests in model execution batches.",
57+
labelnames=labelnames).labels(*labelvalues)
58+
59+
self.gauge_scheduler_waiting = prometheus_client.Gauge(
60+
name="vllm:num_requests_waiting",
61+
documentation="Number of requests waiting to be processed.",
62+
labelnames=labelnames).labels(*labelvalues)
63+
64+
def log(self, scheduler_stats: SchedulerStats):
65+
"""Log to prometheus."""
66+
self.gauge_scheduler_running.set(scheduler_stats.num_running_reqs)
67+
self.gauge_scheduler_waiting.set(scheduler_stats.num_waiting_reqs)
68+
69+
@staticmethod
70+
def _unregister_vllm_metrics():
71+
# Unregister any existing vLLM collectors (for CI/CD
72+
for collector in list(prometheus_client.REGISTRY._collector_to_names):
73+
if hasattr(collector, "_name") and "vllm" in collector._name:
74+
prometheus_client.REGISTRY.unregister(collector)

0 commit comments

Comments
 (0)