Skip to content

Commit bb98cdc

Browse files
committed
add request metrics
1 parent 918960c commit bb98cdc

File tree

8 files changed

+394
-3
lines changed

8 files changed

+394
-3
lines changed

go.mod

+3
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ require (
2121
k8s.io/apimachinery v0.31.3
2222
k8s.io/client-go v0.31.3
2323
k8s.io/code-generator v0.31.3
24+
k8s.io/component-base v0.31.3
2425
k8s.io/klog/v2 v2.130.1
2526
sigs.k8s.io/controller-runtime v0.19.3
2627
sigs.k8s.io/structured-merge-diff/v4 v4.4.3
@@ -35,6 +36,7 @@ require (
3536
github.com/Masterminds/sprig/v3 v3.2.3 // indirect
3637
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect
3738
github.com/beorn7/perks v1.0.1 // indirect
39+
github.com/blang/semver/v4 v4.0.0 // indirect
3840
github.com/bufbuild/protocompile v0.14.1 // indirect
3941
github.com/census-instrumentation/opencensus-proto v0.4.1 // indirect
4042
github.com/cespare/xxhash/v2 v2.3.0 // indirect
@@ -63,6 +65,7 @@ require (
6365
github.com/josharian/intern v1.0.0 // indirect
6466
github.com/json-iterator/go v1.1.12 // indirect
6567
github.com/klauspost/compress v1.17.9 // indirect
68+
github.com/kylelemons/godebug v1.1.0 // indirect
6669
github.com/mailru/easyjson v0.7.7 // indirect
6770
github.com/mitchellh/copystructure v1.0.0 // indirect
6871
github.com/mitchellh/reflectwalk v1.0.1 // indirect

go.sum

+4
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 h1:JYp7IbQjafo
1515
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
1616
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
1717
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
18+
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
19+
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
1820
github.com/bojand/ghz v0.120.0 h1:6F4wsmZVwFg5UnD+/R+IABWk6sKE/0OKIBdUQUZnOdo=
1921
github.com/bojand/ghz v0.120.0/go.mod h1:HfECuBZj1v02XObGnRuoZgyB1PR24/25dIYiJIMjJnE=
2022
github.com/bufbuild/protocompile v0.14.1 h1:iA73zAf/fyljNjQKwYzUHD6AD4R8KMasmwa/FBatYVw=
@@ -264,6 +266,8 @@ k8s.io/client-go v0.31.3 h1:CAlZuM+PH2cm+86LOBemaJI/lQ5linJ6UFxKX/SoG+4=
264266
k8s.io/client-go v0.31.3/go.mod h1:2CgjPUTpv3fE5dNygAr2NcM8nhHzXvxB8KL5gYc3kJs=
265267
k8s.io/code-generator v0.31.3 h1:Pj0fYOBms+ZrsulLi4DMsCEx1jG8fWKRLy44onHsLBI=
266268
k8s.io/code-generator v0.31.3/go.mod h1:/umCIlT84g1+Yu5ZXtP1KGSRTnGiIzzX5AzUAxsNlts=
269+
k8s.io/component-base v0.31.3 h1:DMCXXVx546Rfvhj+3cOm2EUxhS+EyztH423j+8sOwhQ=
270+
k8s.io/component-base v0.31.3/go.mod h1:xME6BHfUOafRgT0rGVBGl7TuSg8Z9/deT7qq6w7qjIU=
267271
k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70 h1:NGrVE502P0s0/1hudf8zjgwki1X/TByhmAoILTarmzo=
268272
k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70/go.mod h1:VH3AT8AaQOqiGjMF9p0/IM1Dj+82ZwjfxUP1IxaHE+8=
269273
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=

pkg/ext-proc/metrics/metrics.go

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
package metrics
2+
3+
import (
4+
"sync"
5+
"time"
6+
7+
compbasemetrics "k8s.io/component-base/metrics"
8+
"k8s.io/component-base/metrics/legacyregistry"
9+
)
10+
11+
const (
12+
LLMServiceModelComponent = "llmservice_model"
13+
)
14+
15+
var (
16+
requestCounter = compbasemetrics.NewCounterVec(
17+
&compbasemetrics.CounterOpts{
18+
Subsystem: LLMServiceModelComponent,
19+
Name: "request_total",
20+
Help: "Counter of LLM service requests broken out for each model and target model.",
21+
StabilityLevel: compbasemetrics.ALPHA,
22+
},
23+
[]string{"llmservice_name", "model_name", "target_model_name"},
24+
)
25+
26+
requestLatencies = compbasemetrics.NewHistogramVec(
27+
&compbasemetrics.HistogramOpts{
28+
Subsystem: LLMServiceModelComponent,
29+
Name: "request_duration_seconds",
30+
Help: "LLM service response latency distribution in seconds for each model and target model.",
31+
Buckets: []float64{0.005, 0.025, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3,
32+
4, 5, 6, 8, 10, 15, 20, 30, 45, 60, 120, 180, 240, 300, 360, 480, 600, 900, 1200, 1800, 2700, 3600},
33+
StabilityLevel: compbasemetrics.ALPHA,
34+
},
35+
[]string{"llmservice_name", "model_name", "target_model_name"},
36+
)
37+
38+
requestSizes = compbasemetrics.NewHistogramVec(
39+
&compbasemetrics.HistogramOpts{
40+
Subsystem: LLMServiceModelComponent,
41+
Name: "request_sizes",
42+
Help: "LLM service requests size distribution in bytes for each model and target model.",
43+
// Use buckets ranging from 1000 bytes (1KB) to 10^9 bytes (1GB).
44+
Buckets: []float64{
45+
64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, // More fine-grained up to 64KB
46+
131072, 262144, 524288, 1048576, 2097152, 4194304, 8388608, // Exponential up to 8MB
47+
16777216, 33554432, 67108864, 134217728, 268435456, 536870912, 1073741824, // Exponential up to 1GB
48+
},
49+
StabilityLevel: compbasemetrics.ALPHA,
50+
},
51+
[]string{"llmservice_name", "model_name", "target_model_name"},
52+
)
53+
)
54+
55+
var registerMetrics sync.Once
56+
57+
// Register all metrics.
58+
func Register() {
59+
registerMetrics.Do(func() {
60+
legacyregistry.MustRegister(requestCounter)
61+
legacyregistry.MustRegister(requestLatencies)
62+
legacyregistry.MustRegister(requestSizes)
63+
})
64+
}
65+
66+
// MonitorRequest handles monitoring requests.
67+
func MonitorRequest(llmserviceName, modelName, targetModelName string, reqSize int, elapsed time.Duration) {
68+
elapsedSeconds := elapsed.Seconds()
69+
requestCounter.WithLabelValues(llmserviceName, modelName, targetModelName).Inc()
70+
requestLatencies.WithLabelValues(llmserviceName, modelName, targetModelName).Observe(elapsedSeconds)
71+
requestSizes.WithLabelValues(llmserviceName, modelName, targetModelName).Observe(float64(reqSize))
72+
}

pkg/ext-proc/metrics/metrics_test.go

+105
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
package metrics
2+
3+
import (
4+
"os"
5+
"testing"
6+
"time"
7+
8+
"k8s.io/component-base/metrics/legacyregistry"
9+
"k8s.io/component-base/metrics/testutil"
10+
)
11+
12+
const RequestTotalMetric = LLMServiceModelComponent + "_request_total"
13+
const RequestLatenciesMetric = LLMServiceModelComponent + "_request_duration_seconds"
14+
const RequestSizesMetric = LLMServiceModelComponent + "_request_sizes"
15+
16+
func TestMonitorRequest(t *testing.T) {
17+
type requests struct {
18+
llmserviceName string
19+
modelName string
20+
targetModelName string
21+
reqSize int
22+
elapsed time.Duration
23+
}
24+
scenarios := []struct {
25+
name string
26+
reqs []requests
27+
}{{
28+
name: "multiple requests",
29+
reqs: []requests{
30+
{
31+
llmserviceName: "s10",
32+
modelName: "m10",
33+
targetModelName: "t10",
34+
reqSize: 1200,
35+
elapsed: time.Millisecond * 10,
36+
},
37+
{
38+
llmserviceName: "s10",
39+
modelName: "m10",
40+
targetModelName: "t10",
41+
reqSize: 500,
42+
elapsed: time.Millisecond * 1600,
43+
},
44+
{
45+
llmserviceName: "s10",
46+
modelName: "m10",
47+
targetModelName: "t11",
48+
reqSize: 2480,
49+
elapsed: time.Millisecond * 60,
50+
},
51+
{
52+
llmserviceName: "s20",
53+
modelName: "m20",
54+
targetModelName: "t20",
55+
reqSize: 80,
56+
elapsed: time.Millisecond * 120,
57+
},
58+
},
59+
}}
60+
Register()
61+
for _, scenario := range scenarios {
62+
t.Run(scenario.name, func(t *testing.T) {
63+
for _, req := range scenario.reqs {
64+
MonitorRequest(req.llmserviceName, req.modelName, req.targetModelName, req.reqSize, req.elapsed)
65+
}
66+
wantRequestTotal, err := os.Open("testdata/request_total_metric")
67+
defer func() {
68+
if err := wantRequestTotal.Close(); err != nil {
69+
t.Error(err)
70+
}
71+
}()
72+
if err != nil {
73+
t.Fatal(err)
74+
}
75+
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, wantRequestTotal, RequestTotalMetric); err != nil {
76+
t.Error(err)
77+
}
78+
wantRequestLatencies, err := os.Open("testdata/request_duration_seconds_metric")
79+
defer func() {
80+
if err := wantRequestLatencies.Close(); err != nil {
81+
t.Error(err)
82+
}
83+
}()
84+
if err != nil {
85+
t.Fatal(err)
86+
}
87+
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, wantRequestLatencies, RequestLatenciesMetric); err != nil {
88+
t.Error(err)
89+
}
90+
wantRequestSizes, err := os.Open("testdata/request_sizes_metric")
91+
defer func() {
92+
if err := wantRequestSizes.Close(); err != nil {
93+
t.Error(err)
94+
}
95+
}()
96+
if err != nil {
97+
t.Fatal(err)
98+
}
99+
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, wantRequestSizes, RequestSizesMetric); err != nil {
100+
t.Error(err)
101+
}
102+
103+
})
104+
}
105+
}

0 commit comments

Comments
 (0)