Skip to content

Commit 704ef75

Browse files
committed
add bucket for metrics request_latency,time_to_first_token,time_per_output_token
Signed-off-by: Kay Yan <[email protected]>
1 parent 2039c63 commit 704ef75

File tree

2 files changed

+8
-6
lines changed

2 files changed

+8
-6
lines changed

vllm/engine/metrics.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -151,22 +151,23 @@ def __init__(self, labelnames: List[str], vllm_config: VllmConfig):
151151
labelnames=labelnames,
152152
buckets=[
153153
0.001, 0.005, 0.01, 0.02, 0.04, 0.06, 0.08, 0.1, 0.25, 0.5,
154-
0.75, 1.0, 2.5, 5.0, 7.5, 10.0
154+
0.75, 1.0, 2.5, 5.0, 7.5, 10.0, 20.0, 40.0, 80.0, 160.0, 640.0,
155+
2560.0
155156
])
156157
self.histogram_time_per_output_token = self._histogram_cls(
157158
name="vllm:time_per_output_token_seconds",
158159
documentation="Histogram of time per output token in seconds.",
159160
labelnames=labelnames,
160161
buckets=[
161162
0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75,
162-
1.0, 2.5
163+
1.0, 2.5, 5.0, 7.5, 10.0, 20.0, 40.0, 80.0
163164
])
164165

165166
# Request stats
166167
# Latency
167168
request_latency_buckets = [
168169
0.3, 0.5, 0.8, 1.0, 1.5, 2.0, 2.5, 5.0, 10.0, 15.0, 20.0, 30.0,
169-
40.0, 50.0, 60.0
170+
40.0, 50.0, 60.0, 120.0, 240.0, 480.0, 960.0, 1920.0, 7680.0
170171
]
171172
self.histogram_e2e_time_request = self._histogram_cls(
172173
name="vllm:e2e_request_latency_seconds",

vllm/v1/metrics/loggers.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,8 @@ def __init__(self, vllm_config: VllmConfig, engine_index: int = 0):
239239
documentation="Histogram of time to first token in seconds.",
240240
buckets=[
241241
0.001, 0.005, 0.01, 0.02, 0.04, 0.06, 0.08, 0.1, 0.25, 0.5,
242-
0.75, 1.0, 2.5, 5.0, 7.5, 10.0
242+
0.75, 1.0, 2.5, 5.0, 7.5, 10.0, 20.0, 40.0, 80.0, 160.0,
243+
640.0, 2560.0
243244
],
244245
labelnames=labelnames).labels(*labelvalues)
245246

@@ -249,13 +250,13 @@ def __init__(self, vllm_config: VllmConfig, engine_index: int = 0):
249250
documentation="Histogram of time per output token in seconds.",
250251
buckets=[
251252
0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5,
252-
0.75, 1.0, 2.5
253+
0.75, 1.0, 2.5, 5.0, 7.5, 10.0, 20.0, 40.0, 80.0
253254
],
254255
labelnames=labelnames).labels(*labelvalues)
255256

256257
request_latency_buckets = [
257258
0.3, 0.5, 0.8, 1.0, 1.5, 2.0, 2.5, 5.0, 10.0, 15.0, 20.0, 30.0,
258-
40.0, 50.0, 60.0
259+
40.0, 50.0, 60.0, 120.0, 240.0, 480.0, 960.0, 1920.0, 7680.0
259260
]
260261
self.histogram_e2e_time_request = \
261262
prometheus_client.Histogram(

0 commit comments

Comments
 (0)