@@ -11,9 +11,11 @@ import (
11
11
12
12
const (
13
13
InferenceModelComponent = "inference_model"
14
+ InferencePoolComponent = "inference_pool"
14
15
)
15
16
16
17
var (
18
+ // Inference Model Metrics
17
19
requestCounter = compbasemetrics .NewCounterVec (
18
20
& compbasemetrics.CounterOpts {
19
21
Subsystem : InferenceModelComponent ,
88
90
},
89
91
[]string {"model_name" , "target_model_name" },
90
92
)
93
+
94
+ // Inference Pool Metrics
95
+ inferencePoolAvgKVCache = compbasemetrics .NewGaugeVec (
96
+ & compbasemetrics.GaugeOpts {
97
+ Subsystem : InferencePoolComponent ,
98
+ Name : "average_kv_cache_utilization" ,
99
+ Help : "The average kv cache utilization for an inference server pool." ,
100
+ StabilityLevel : compbasemetrics .ALPHA ,
101
+ },
102
+ []string {"name" },
103
+ )
104
+
105
+ inferencePoolAvgQueueSize = compbasemetrics .NewGaugeVec (
106
+ & compbasemetrics.GaugeOpts {
107
+ Subsystem : InferencePoolComponent ,
108
+ Name : "average_queue_size" ,
109
+ Help : " The average number of requests pending in the model server queue." ,
110
+ StabilityLevel : compbasemetrics .ALPHA ,
111
+ },
112
+ []string {"name" },
113
+ )
91
114
)
92
115
93
116
var registerMetrics sync.Once
@@ -101,6 +124,9 @@ func Register() {
101
124
legacyregistry .MustRegister (responseSizes )
102
125
legacyregistry .MustRegister (inputTokens )
103
126
legacyregistry .MustRegister (outputTokens )
127
+
128
+ legacyregistry .MustRegister (inferencePoolAvgKVCache )
129
+ legacyregistry .MustRegister (inferencePoolAvgQueueSize )
104
130
})
105
131
}
106
132
@@ -143,3 +169,11 @@ func RecordOutputTokens(modelName, targetModelName string, size int) {
143
169
outputTokens .WithLabelValues (modelName , targetModelName ).Observe (float64 (size ))
144
170
}
145
171
}
172
+
173
+ func RecordInferencePoolAvgKVCache (name string , utilization float64 ) {
174
+ inferencePoolAvgKVCache .WithLabelValues (name ).Set (utilization )
175
+ }
176
+
177
+ func RecordInferencePoolAvgQueueSize (name string , utilization float64 ) {
178
+ inferencePoolAvgQueueSize .WithLabelValues (name ).Set (utilization )
179
+ }
0 commit comments