Skip to content

Commit 0f27d4f

Browse files
committed
[V1][Metrics] Hook up IterationStats
Follow on from vllm-project#12416 Pass IterationStats to the stat logger, and log them in both the logging and prometheus loggers. For the logging stat logger, we need to calculate the throughput based on the number of tokens in the particular logging interval. In the prometheus logger, we just need to record the prompt and generation tokens in a counter. Note, v0 had a vllm:tokens_total counter registered that apparently was never logged to, so I've omitted it in v1. Signed-off-by: Mark McLoughlin <[email protected]>
1 parent 3f1fc74 commit 0f27d4f

File tree

3 files changed

+66
-12
lines changed

3 files changed

+66
-12
lines changed

tests/entrypoints/openai/test_metrics.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,6 @@ async def client(server):
105105
@pytest.mark.asyncio
106106
async def test_metrics_counts(server: RemoteOpenAIServer,
107107
client: openai.AsyncClient, use_v1: bool):
108-
if use_v1:
109-
pytest.skip("Skipping test on vllm V1")
110108
for _ in range(_NUM_REQUESTS):
111109
# sending a request triggers the metrics to be logged.
112110
await client.completions.create(
@@ -120,6 +118,9 @@ async def test_metrics_counts(server: RemoteOpenAIServer,
120118

121119
# Loop over all expected metric_families
122120
for metric_family, suffix_values_list in EXPECTED_VALUES.items():
121+
if use_v1 and metric_family not in EXPECTED_METRICS_V1:
122+
continue
123+
123124
found_metric = False
124125

125126
# Check to see if the metric_family is found in the prom endpoint.
@@ -199,6 +200,8 @@ async def test_metrics_counts(server: RemoteOpenAIServer,
199200
EXPECTED_METRICS_V1 = [
200201
"vllm:num_requests_running",
201202
"vllm:num_requests_waiting",
203+
"vllm:prompt_tokens_total",
204+
"vllm:generation_tokens_total",
202205
]
203206

204207

vllm/v1/engine/async_llm.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,8 @@ def _log_stats(
305305
return
306306

307307
for logger in self.stat_loggers:
308-
logger.log(scheduler_stats=scheduler_stats)
308+
logger.log(scheduler_stats=scheduler_stats,
309+
iteration_stats=iteration_stats)
309310

310311
def encode(
311312
self,

vllm/v1/metrics/loggers.py

Lines changed: 59 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
import time
22
from abc import ABC, abstractmethod
3-
from typing import Dict
3+
from typing import Dict, List
44

5+
import numpy as np
56
import prometheus_client
67

78
from vllm.logger import init_logger
8-
from vllm.v1.metrics.stats import SchedulerStats
9+
from vllm.v1.metrics.stats import IterationStats, SchedulerStats
910

1011
logger = init_logger(__name__)
1112

@@ -15,27 +16,61 @@
1516
class StatLoggerBase(ABC):
1617

1718
@abstractmethod
18-
def log(self, scheduler_stats: SchedulerStats):
19+
def log(self, scheduler_stats: SchedulerStats,
20+
iteration_stats: IterationStats):
1921
...
2022

2123

2224
class LoggingStatLogger(StatLoggerBase):
2325

2426
def __init__(self):
25-
self.last_log_time = time.monotonic()
27+
self._reset(time.monotonic())
2628

27-
def log(self, scheduler_stats: SchedulerStats):
28-
"""Log Stats to standard output."""
29+
def _reset(self, now):
30+
self.last_log_time = now
31+
32+
# Tracked stats over current local logging interval.
33+
self.num_prompt_tokens: List[int] = []
34+
self.num_generation_tokens: List[int] = []
2935

36+
def _local_interval_elapsed(self, now: float) -> bool:
3037
# Log every _LOCAL_LOGGING_INTERVAL_SEC.
38+
elapsed_time = now - self.last_log_time
39+
return elapsed_time > _LOCAL_LOGGING_INTERVAL_SEC
40+
41+
def _track_iteration_stats(self, iteration_stats: IterationStats):
42+
# Save tracked stats for token counters.
43+
self.num_prompt_tokens.append(iteration_stats.num_prompt_tokens)
44+
self.num_generation_tokens.append(
45+
iteration_stats.num_generation_tokens)
46+
47+
def _get_throughput(self, tracked_stats: List[int], now: float) -> float:
48+
# Compute summary metrics for tracked stats
49+
return float(np.sum(tracked_stats) / (now - self.last_log_time))
50+
51+
def log(self, scheduler_stats: SchedulerStats,
52+
iteration_stats: IterationStats):
53+
"""Log Stats to standard output."""
54+
55+
self._track_iteration_stats(iteration_stats)
56+
3157
now = time.monotonic()
32-
if now - self.last_log_time < _LOCAL_LOGGING_INTERVAL_SEC:
58+
if not self._local_interval_elapsed(now):
3359
return
34-
self.last_log_time = now
60+
61+
prompt_throughput = self._get_throughput(self.num_prompt_tokens, now)
62+
generation_throughput = self._get_throughput(
63+
self.num_generation_tokens, now)
64+
65+
self._reset(now)
3566

3667
# Format and print output.
3768
logger.info(
69+
"Avg prompt throughput: %.1f tokens/s, "
70+
"Avg generation throughput: %.1f tokens/s, "
3871
"Running: %d reqs, Waiting: %d reqs ",
72+
prompt_throughput,
73+
generation_throughput,
3974
scheduler_stats.num_running_reqs,
4075
scheduler_stats.num_waiting_reqs,
4176
)
@@ -61,11 +96,26 @@ def __init__(self, labels: Dict[str, str]):
6196
documentation="Number of requests waiting to be processed.",
6297
labelnames=labelnames).labels(*labelvalues)
6398

64-
def log(self, scheduler_stats: SchedulerStats):
99+
self.counter_prompt_tokens = prometheus_client.Counter(
100+
name="vllm:prompt_tokens_total",
101+
documentation="Number of prefill tokens processed.",
102+
labelnames=labelnames).labels(*labelvalues)
103+
104+
self.counter_generation_tokens = prometheus_client.Counter(
105+
name="vllm:generation_tokens_total",
106+
documentation="Number of generation tokens processed.",
107+
labelnames=labelnames).labels(*labelvalues)
108+
109+
def log(self, scheduler_stats: SchedulerStats,
110+
iteration_stats: IterationStats):
65111
"""Log to prometheus."""
66112
self.gauge_scheduler_running.set(scheduler_stats.num_running_reqs)
67113
self.gauge_scheduler_waiting.set(scheduler_stats.num_waiting_reqs)
68114

115+
self.counter_prompt_tokens.inc(iteration_stats.num_prompt_tokens)
116+
self.counter_generation_tokens.inc(
117+
iteration_stats.num_generation_tokens)
118+
69119
@staticmethod
70120
def _unregister_vllm_metrics():
71121
# Unregister any existing vLLM collectors (for CI/CD

0 commit comments

Comments
 (0)