Skip to content

Commit c50fed5

Browse files
committed
add request metrics
1 parent 1ad93e4 commit c50fed5

File tree

5 files changed

+384
-0
lines changed

5 files changed

+384
-0
lines changed

pkg/ext-proc/metrics/metrics.go

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
package metrics
2+
3+
import (
4+
"sync"
5+
"time"
6+
7+
compbasemetrics "k8s.io/component-base/metrics"
8+
"k8s.io/component-base/metrics/legacyregistry"
9+
)
10+
11+
const (
12+
LLMServiceModelComponent = "llmservice_model"
13+
)
14+
15+
var (
16+
requestCounter = compbasemetrics.NewCounterVec(
17+
&compbasemetrics.CounterOpts{
18+
Subsystem: LLMServiceModelComponent,
19+
Name: "request_total",
20+
Help: "Counter of LLM service requests broken out for each model and target model.",
21+
StabilityLevel: compbasemetrics.ALPHA,
22+
},
23+
[]string{"llmservice_name", "model_name", "target_model_name"},
24+
)
25+
26+
requestLatencies = compbasemetrics.NewHistogramVec(
27+
&compbasemetrics.HistogramOpts{
28+
Subsystem: LLMServiceModelComponent,
29+
Name: "request_duration_seconds",
30+
Help: "LLM service response latency distribution in seconds for each model and target model.",
31+
Buckets: []float64{0.005, 0.025, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3,
32+
4, 5, 6, 8, 10, 15, 20, 30, 45, 60, 120, 180, 240, 300, 360, 480, 600, 900, 1200, 1800, 2700, 3600},
33+
StabilityLevel: compbasemetrics.ALPHA,
34+
},
35+
[]string{"llmservice_name", "model_name", "target_model_name"},
36+
)
37+
38+
requestSizes = compbasemetrics.NewHistogramVec(
39+
&compbasemetrics.HistogramOpts{
40+
Subsystem: LLMServiceModelComponent,
41+
Name: "request_sizes",
42+
Help: "LLM service requests size distribution in bytes for each model and target model.",
43+
// Use buckets ranging from 1000 bytes (1KB) to 10^9 bytes (1GB).
44+
Buckets: []float64{
45+
64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, // More fine-grained up to 64KB
46+
131072, 262144, 524288, 1048576, 2097152, 4194304, 8388608, // Exponential up to 8MB
47+
16777216, 33554432, 67108864, 134217728, 268435456, 536870912, 1073741824, // Exponential up to 1GB
48+
},
49+
StabilityLevel: compbasemetrics.ALPHA,
50+
},
51+
[]string{"llmservice_name", "model_name", "target_model_name"},
52+
)
53+
)
54+
55+
var registerMetrics sync.Once
56+
57+
// Register all metrics.
58+
func Register() {
59+
registerMetrics.Do(func() {
60+
legacyregistry.MustRegister(requestCounter)
61+
legacyregistry.MustRegister(requestLatencies)
62+
legacyregistry.MustRegister(requestSizes)
63+
})
64+
}
65+
66+
// MonitorRequest handles monitoring requests.
67+
func MonitorRequest(llmserviceName, modelName, targetModelName string, reqSize int, elapsed time.Duration) {
68+
elapsedSeconds := elapsed.Seconds()
69+
requestCounter.WithLabelValues(llmserviceName, modelName, targetModelName).Inc()
70+
requestLatencies.WithLabelValues(llmserviceName, modelName, targetModelName).Observe(elapsedSeconds)
71+
requestSizes.WithLabelValues(llmserviceName, modelName, targetModelName).Observe(float64(reqSize))
72+
}

pkg/ext-proc/metrics/metrics_test.go

+105
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
package metrics
2+
3+
import (
4+
"os"
5+
"testing"
6+
"time"
7+
8+
"k8s.io/component-base/metrics/legacyregistry"
9+
"k8s.io/component-base/metrics/testutil"
10+
)
11+
12+
const RequestTotalMetric = LLMServiceModelComponent + "_request_total"
13+
const RequestLatenciesMetric = LLMServiceModelComponent + "_request_duration_seconds"
14+
const RequestSizesMetric = LLMServiceModelComponent + "_request_sizes"
15+
16+
func TestMonitorRequest(t *testing.T) {
17+
type requests struct {
18+
llmserviceName string
19+
modelName string
20+
targetModelName string
21+
reqSize int
22+
elapsed time.Duration
23+
}
24+
scenarios := []struct {
25+
name string
26+
reqs []requests
27+
}{{
28+
name: "multiple requests",
29+
reqs: []requests{
30+
{
31+
llmserviceName: "s10",
32+
modelName: "m10",
33+
targetModelName: "t10",
34+
reqSize: 1200,
35+
elapsed: time.Millisecond * 10,
36+
},
37+
{
38+
llmserviceName: "s10",
39+
modelName: "m10",
40+
targetModelName: "t10",
41+
reqSize: 500,
42+
elapsed: time.Millisecond * 1600,
43+
},
44+
{
45+
llmserviceName: "s10",
46+
modelName: "m10",
47+
targetModelName: "t11",
48+
reqSize: 2480,
49+
elapsed: time.Millisecond * 60,
50+
},
51+
{
52+
llmserviceName: "s20",
53+
modelName: "m20",
54+
targetModelName: "t20",
55+
reqSize: 80,
56+
elapsed: time.Millisecond * 120,
57+
},
58+
},
59+
}}
60+
Register()
61+
for _, scenario := range scenarios {
62+
t.Run(scenario.name, func(t *testing.T) {
63+
for _, req := range scenario.reqs {
64+
MonitorRequest(req.llmserviceName, req.modelName, req.targetModelName, req.reqSize, req.elapsed)
65+
}
66+
wantRequestTotal, err := os.Open("testdata/request_total_metric")
67+
defer func() {
68+
if err := wantRequestTotal.Close(); err != nil {
69+
t.Error(err)
70+
}
71+
}()
72+
if err != nil {
73+
t.Fatal(err)
74+
}
75+
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, wantRequestTotal, RequestTotalMetric); err != nil {
76+
t.Error(err)
77+
}
78+
wantRequestLatencies, err := os.Open("testdata/request_duration_seconds_metric")
79+
defer func() {
80+
if err := wantRequestLatencies.Close(); err != nil {
81+
t.Error(err)
82+
}
83+
}()
84+
if err != nil {
85+
t.Fatal(err)
86+
}
87+
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, wantRequestLatencies, RequestLatenciesMetric); err != nil {
88+
t.Error(err)
89+
}
90+
wantRequestSizes, err := os.Open("testdata/request_sizes_metric")
91+
defer func() {
92+
if err := wantRequestSizes.Close(); err != nil {
93+
t.Error(err)
94+
}
95+
}()
96+
if err != nil {
97+
t.Fatal(err)
98+
}
99+
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, wantRequestSizes, RequestSizesMetric); err != nil {
100+
t.Error(err)
101+
}
102+
103+
})
104+
}
105+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
# HELP llmservice_model_request_duration_seconds [ALPHA] LLM service response latency distribution in seconds for each model and target model.
2+
# TYPE llmservice_model_request_duration_seconds histogram
3+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="0.005"} 0
4+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="0.025"} 1
5+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="0.05"} 1
6+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="0.1"} 1
7+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="0.2"} 1
8+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="0.4"} 1
9+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="0.6"} 1
10+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="0.8"} 1
11+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="1.0"} 1
12+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="1.25"} 1
13+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="1.5"} 1
14+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="2"} 2
15+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="3"} 2
16+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="4"} 2
17+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="5"} 2
18+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="6"} 2
19+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="8"} 2
20+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="10"} 2
21+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="15"} 2
22+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="20"} 2
23+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="30"} 2
24+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="45"} 2
25+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="60"} 2
26+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="120"} 2
27+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="180"} 2
28+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="240"} 2
29+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="300"} 2
30+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="360"} 2
31+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="480"} 2
32+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="600"} 2
33+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="900"} 2
34+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="1200"} 2
35+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="1800"} 2
36+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="2700"} 2
37+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="3600"} 2
38+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10", model_name="m10", target_model_name="t10", le="Inf"} 2
39+
llmservice_model_request_duration_seconds_sum{llmservice_name="s10", model_name="m10", target_model_name="t10"} 1.61
40+
llmservice_model_request_duration_seconds_count{llmservice_name="s10", model_name="m10", target_model_name="t10"} 2
41+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="0.005"} 0
42+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="0.025"} 0
43+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="0.05"} 0
44+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="0.1"} 1
45+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="0.2"} 1
46+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="0.4"} 1
47+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="0.6"} 1
48+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="0.8"} 1
49+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="1"} 1
50+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="1.25"} 1
51+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="1.5"} 1
52+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="2"} 1
53+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="3"} 1
54+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="4"} 1
55+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="5"} 1
56+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="6"} 1
57+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="8"} 1
58+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="10"} 1
59+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="15"} 1
60+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="20"} 1
61+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="30"} 1
62+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="45"} 1
63+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="60"} 1
64+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="120"} 1
65+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="180"} 1
66+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="240"} 1
67+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="300"} 1
68+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="360"} 1
69+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="480"} 1
70+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="600"} 1
71+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="900"} 1
72+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="1200"} 1
73+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="1800"} 1
74+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="2700"} 1
75+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="3600"} 1
76+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s10",model_name="m10",target_model_name="t11",le="+Inf"} 1
77+
llmservice_model_request_duration_seconds_sum{llmservice_name="s10",model_name="m10",target_model_name="t11"} 0.06
78+
llmservice_model_request_duration_seconds_count{llmservice_name="s10",model_name="m10",target_model_name="t11"} 1
79+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="0.005"} 0
80+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="0.025"} 0
81+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="0.05"} 0
82+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="0.1"} 0
83+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="0.2"} 1
84+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="0.4"} 1
85+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="0.6"} 1
86+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="0.8"} 1
87+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="1"} 1
88+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="1.25"} 1
89+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="1.5"} 1
90+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="2"} 1
91+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="3"} 1
92+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="4"} 1
93+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="5"} 1
94+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="6"} 1
95+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="8"} 1
96+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="10"} 1
97+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="15"} 1
98+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="20"} 1
99+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="30"} 1
100+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="45"} 1
101+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="60"} 1
102+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="120"} 1
103+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="180"} 1
104+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="240"} 1
105+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="300"} 1
106+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="360"} 1
107+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="480"} 1
108+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="600"} 1
109+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="900"} 1
110+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="1200"} 1
111+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="1800"} 1
112+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="2700"} 1
113+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="3600"} 1
114+
llmservice_model_request_duration_seconds_bucket{llmservice_name="s20",model_name="m20",target_model_name="t20",le="+Inf"} 1
115+
llmservice_model_request_duration_seconds_sum{llmservice_name="s20",model_name="m20",target_model_name="t20"} 0.12
116+
llmservice_model_request_duration_seconds_count{llmservice_name="s20",model_name="m20",target_model_name="t20"} 1

0 commit comments

Comments
 (0)