Skip to content

Commit bf52471

Browse files
markmcGWS0428
authored andcommitted
[V1][Metrics] Hook up IterationStats for Prometheus metrics (vllm-project#12478)
Signed-off-by: Mark McLoughlin <[email protected]>
1 parent 500d7cb commit bf52471

File tree

3 files changed

+66
-12
lines changed

3 files changed

+66
-12
lines changed

tests/entrypoints/openai/test_metrics.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,6 @@ async def client(server):
105105
@pytest.mark.asyncio
106106
async def test_metrics_counts(server: RemoteOpenAIServer,
107107
client: openai.AsyncClient, use_v1: bool):
108-
if use_v1:
109-
pytest.skip("Skipping test on vllm V1")
110108
for _ in range(_NUM_REQUESTS):
111109
# sending a request triggers the metrics to be logged.
112110
await client.completions.create(
@@ -120,6 +118,9 @@ async def test_metrics_counts(server: RemoteOpenAIServer,
120118

121119
# Loop over all expected metric_families
122120
for metric_family, suffix_values_list in EXPECTED_VALUES.items():
121+
if use_v1 and metric_family not in EXPECTED_METRICS_V1:
122+
continue
123+
123124
found_metric = False
124125

125126
# Check to see if the metric_family is found in the prom endpoint.
@@ -199,6 +200,8 @@ async def test_metrics_counts(server: RemoteOpenAIServer,
199200
EXPECTED_METRICS_V1 = [
200201
"vllm:num_requests_running",
201202
"vllm:num_requests_waiting",
203+
"vllm:prompt_tokens_total",
204+
"vllm:generation_tokens_total",
202205
]
203206

204207

vllm/v1/engine/async_llm.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,8 @@ def _log_stats(
305305
return
306306

307307
for logger in self.stat_loggers:
308-
logger.log(scheduler_stats=scheduler_stats)
308+
logger.log(scheduler_stats=scheduler_stats,
309+
iteration_stats=iteration_stats)
309310

310311
def encode(
311312
self,

vllm/v1/metrics/loggers.py

Lines changed: 59 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
import time
22
from abc import ABC, abstractmethod
3-
from typing import Dict
3+
from typing import Dict, List
44

5+
import numpy as np
56
import prometheus_client
67

78
from vllm.logger import init_logger
8-
from vllm.v1.metrics.stats import SchedulerStats
9+
from vllm.v1.metrics.stats import IterationStats, SchedulerStats
910

1011
logger = init_logger(__name__)
1112

@@ -15,27 +16,61 @@
1516
class StatLoggerBase(ABC):
1617

1718
@abstractmethod
18-
def log(self, scheduler_stats: SchedulerStats):
19+
def log(self, scheduler_stats: SchedulerStats,
20+
iteration_stats: IterationStats):
1921
...
2022

2123

2224
class LoggingStatLogger(StatLoggerBase):
2325

2426
def __init__(self):
25-
self.last_log_time = time.monotonic()
27+
self._reset(time.monotonic())
2628

27-
def log(self, scheduler_stats: SchedulerStats):
28-
"""Log Stats to standard output."""
29+
def _reset(self, now):
30+
self.last_log_time = now
31+
32+
# Tracked stats over current local logging interval.
33+
self.num_prompt_tokens: List[int] = []
34+
self.num_generation_tokens: List[int] = []
2935

36+
def _local_interval_elapsed(self, now: float) -> bool:
3037
# Log every _LOCAL_LOGGING_INTERVAL_SEC.
38+
elapsed_time = now - self.last_log_time
39+
return elapsed_time > _LOCAL_LOGGING_INTERVAL_SEC
40+
41+
def _track_iteration_stats(self, iteration_stats: IterationStats):
42+
# Save tracked stats for token counters.
43+
self.num_prompt_tokens.append(iteration_stats.num_prompt_tokens)
44+
self.num_generation_tokens.append(
45+
iteration_stats.num_generation_tokens)
46+
47+
def _get_throughput(self, tracked_stats: List[int], now: float) -> float:
48+
# Compute summary metrics for tracked stats
49+
return float(np.sum(tracked_stats) / (now - self.last_log_time))
50+
51+
def log(self, scheduler_stats: SchedulerStats,
52+
iteration_stats: IterationStats):
53+
"""Log Stats to standard output."""
54+
55+
self._track_iteration_stats(iteration_stats)
56+
3157
now = time.monotonic()
32-
if now - self.last_log_time < _LOCAL_LOGGING_INTERVAL_SEC:
58+
if not self._local_interval_elapsed(now):
3359
return
34-
self.last_log_time = now
60+
61+
prompt_throughput = self._get_throughput(self.num_prompt_tokens, now)
62+
generation_throughput = self._get_throughput(
63+
self.num_generation_tokens, now)
64+
65+
self._reset(now)
3566

3667
# Format and print output.
3768
logger.info(
69+
"Avg prompt throughput: %.1f tokens/s, "
70+
"Avg generation throughput: %.1f tokens/s, "
3871
"Running: %d reqs, Waiting: %d reqs ",
72+
prompt_throughput,
73+
generation_throughput,
3974
scheduler_stats.num_running_reqs,
4075
scheduler_stats.num_waiting_reqs,
4176
)
@@ -61,11 +96,26 @@ def __init__(self, labels: Dict[str, str]):
6196
documentation="Number of requests waiting to be processed.",
6297
labelnames=labelnames).labels(*labelvalues)
6398

64-
def log(self, scheduler_stats: SchedulerStats):
99+
self.counter_prompt_tokens = prometheus_client.Counter(
100+
name="vllm:prompt_tokens_total",
101+
documentation="Number of prefill tokens processed.",
102+
labelnames=labelnames).labels(*labelvalues)
103+
104+
self.counter_generation_tokens = prometheus_client.Counter(
105+
name="vllm:generation_tokens_total",
106+
documentation="Number of generation tokens processed.",
107+
labelnames=labelnames).labels(*labelvalues)
108+
109+
def log(self, scheduler_stats: SchedulerStats,
110+
iteration_stats: IterationStats):
65111
"""Log to prometheus."""
66112
self.gauge_scheduler_running.set(scheduler_stats.num_running_reqs)
67113
self.gauge_scheduler_waiting.set(scheduler_stats.num_waiting_reqs)
68114

115+
self.counter_prompt_tokens.inc(iteration_stats.num_prompt_tokens)
116+
self.counter_generation_tokens.inc(
117+
iteration_stats.num_generation_tokens)
118+
69119
@staticmethod
70120
def _unregister_vllm_metrics():
71121
# Unregister any existing vLLM collectors (for CI/CD

0 commit comments

Comments
 (0)