1
1
import time
2
2
from abc import ABC , abstractmethod
3
- from typing import Dict
3
+ from typing import Dict , List
4
4
5
+ import numpy as np
5
6
import prometheus_client
6
7
7
8
from vllm .logger import init_logger
8
- from vllm .v1 .metrics .stats import SchedulerStats
9
+ from vllm .v1 .metrics .stats import IterationStats , SchedulerStats
9
10
10
11
logger = init_logger (__name__ )
11
12
15
16
class StatLoggerBase (ABC ):
16
17
17
18
@abstractmethod
18
- def log (self , scheduler_stats : SchedulerStats ):
19
+ def log (self , scheduler_stats : SchedulerStats ,
20
+ iteration_stats : IterationStats ):
19
21
...
20
22
21
23
22
24
class LoggingStatLogger (StatLoggerBase ):
23
25
24
26
def __init__ (self ):
25
- self .last_log_time = time .monotonic ()
27
+ self ._reset ( time .monotonic () )
26
28
27
- def log (self , scheduler_stats : SchedulerStats ):
28
- """Log Stats to standard output."""
29
+ def _reset (self , now ):
30
+ self .last_log_time = now
31
+
32
+ # Tracked stats over current local logging interval.
33
+ self .num_prompt_tokens : List [int ] = []
34
+ self .num_generation_tokens : List [int ] = []
29
35
36
+ def _local_interval_elapsed (self , now : float ) -> bool :
30
37
# Log every _LOCAL_LOGGING_INTERVAL_SEC.
38
+ elapsed_time = now - self .last_log_time
39
+ return elapsed_time > _LOCAL_LOGGING_INTERVAL_SEC
40
+
41
+ def _track_iteration_stats (self , iteration_stats : IterationStats ):
42
+ # Save tracked stats for token counters.
43
+ self .num_prompt_tokens .append (iteration_stats .num_prompt_tokens )
44
+ self .num_generation_tokens .append (
45
+ iteration_stats .num_generation_tokens )
46
+
47
+ def _get_throughput (self , tracked_stats : List [int ], now : float ) -> float :
48
+ # Compute summary metrics for tracked stats
49
+ return float (np .sum (tracked_stats ) / (now - self .last_log_time ))
50
+
51
+ def log (self , scheduler_stats : SchedulerStats ,
52
+ iteration_stats : IterationStats ):
53
+ """Log Stats to standard output."""
54
+
55
+ self ._track_iteration_stats (iteration_stats )
56
+
31
57
now = time .monotonic ()
32
- if now - self .last_log_time < _LOCAL_LOGGING_INTERVAL_SEC :
58
+ if not self ._local_interval_elapsed ( now ) :
33
59
return
34
- self .last_log_time = now
60
+
61
+ prompt_throughput = self ._get_throughput (self .num_prompt_tokens , now )
62
+ generation_throughput = self ._get_throughput (
63
+ self .num_generation_tokens , now )
64
+
65
+ self ._reset (now )
35
66
36
67
# Format and print output.
37
68
logger .info (
69
+ "Avg prompt throughput: %.1f tokens/s, "
70
+ "Avg generation throughput: %.1f tokens/s, "
38
71
"Running: %d reqs, Waiting: %d reqs " ,
72
+ prompt_throughput ,
73
+ generation_throughput ,
39
74
scheduler_stats .num_running_reqs ,
40
75
scheduler_stats .num_waiting_reqs ,
41
76
)
@@ -61,11 +96,26 @@ def __init__(self, labels: Dict[str, str]):
61
96
documentation = "Number of requests waiting to be processed." ,
62
97
labelnames = labelnames ).labels (* labelvalues )
63
98
64
- def log (self , scheduler_stats : SchedulerStats ):
99
+ self .counter_prompt_tokens = prometheus_client .Counter (
100
+ name = "vllm:prompt_tokens_total" ,
101
+ documentation = "Number of prefill tokens processed." ,
102
+ labelnames = labelnames ).labels (* labelvalues )
103
+
104
+ self .counter_generation_tokens = prometheus_client .Counter (
105
+ name = "vllm:generation_tokens_total" ,
106
+ documentation = "Number of generation tokens processed." ,
107
+ labelnames = labelnames ).labels (* labelvalues )
108
+
109
+ def log (self , scheduler_stats : SchedulerStats ,
110
+ iteration_stats : IterationStats ):
65
111
"""Log to prometheus."""
66
112
self .gauge_scheduler_running .set (scheduler_stats .num_running_reqs )
67
113
self .gauge_scheduler_waiting .set (scheduler_stats .num_waiting_reqs )
68
114
115
+ self .counter_prompt_tokens .inc (iteration_stats .num_prompt_tokens )
116
+ self .counter_generation_tokens .inc (
117
+ iteration_stats .num_generation_tokens )
118
+
69
119
@staticmethod
70
120
def _unregister_vllm_metrics ():
71
121
# Unregister any existing vLLM collectors (for CI/CD
0 commit comments