@@ -95,15 +95,15 @@ func TestKubeInferenceModelRequest(t *testing.T) {
95
95
req : utiltesting .GenerateRequest (logger , "test1" , "my-model" ),
96
96
// pod-1 will be picked because it has relatively low queue size and low KV cache.
97
97
pods : map [backendmetrics.Pod ]* backendmetrics.Metrics {
98
- fakePod (0 ): & backendmetrics. Metrics {
98
+ fakePod (0 ): {
99
99
WaitingQueueSize : 3 ,
100
100
KVCacheUsagePercent : 0.2 ,
101
101
},
102
- fakePod (1 ): & backendmetrics. Metrics {
102
+ fakePod (1 ): {
103
103
WaitingQueueSize : 0 ,
104
104
KVCacheUsagePercent : 0.1 ,
105
105
},
106
- fakePod (2 ): & backendmetrics. Metrics {
106
+ fakePod (2 ): {
107
107
WaitingQueueSize : 10 ,
108
108
KVCacheUsagePercent : 0.2 ,
109
109
},
@@ -137,23 +137,23 @@ func TestKubeInferenceModelRequest(t *testing.T) {
137
137
// pod-1 will be picked because it has relatively low queue size, with the requested
138
138
// model being active, and has low KV cache.
139
139
pods : map [backendmetrics.Pod ]* backendmetrics.Metrics {
140
- fakePod (0 ): & backendmetrics. Metrics {
140
+ fakePod (0 ): {
141
141
WaitingQueueSize : 0 ,
142
142
KVCacheUsagePercent : 0.2 ,
143
143
ActiveModels : map [string ]int {
144
144
"foo" : 1 ,
145
145
"bar" : 1 ,
146
146
},
147
147
},
148
- fakePod (1 ): & backendmetrics. Metrics {
148
+ fakePod (1 ): {
149
149
WaitingQueueSize : 0 ,
150
150
KVCacheUsagePercent : 0.1 ,
151
151
ActiveModels : map [string ]int {
152
152
"foo" : 1 ,
153
153
"sql-lora-1fdg2" : 1 ,
154
154
},
155
155
},
156
- fakePod (2 ): & backendmetrics. Metrics {
156
+ fakePod (2 ): {
157
157
WaitingQueueSize : 10 ,
158
158
KVCacheUsagePercent : 0.2 ,
159
159
ActiveModels : map [string ]int {
@@ -192,23 +192,23 @@ func TestKubeInferenceModelRequest(t *testing.T) {
192
192
// as it's above the affinity for queue size. Also is critical, so we should
193
193
// still honor request despite all queues > 5
194
194
pods : map [backendmetrics.Pod ]* backendmetrics.Metrics {
195
- fakePod (0 ): & backendmetrics. Metrics {
195
+ fakePod (0 ): {
196
196
WaitingQueueSize : 10 ,
197
197
KVCacheUsagePercent : 0.2 ,
198
198
ActiveModels : map [string ]int {
199
199
"foo" : 1 ,
200
200
"bar" : 1 ,
201
201
},
202
202
},
203
- fakePod (1 ): & backendmetrics. Metrics {
203
+ fakePod (1 ): {
204
204
WaitingQueueSize : 200 ,
205
205
KVCacheUsagePercent : 0.1 ,
206
206
ActiveModels : map [string ]int {
207
207
"foo" : 1 ,
208
208
"sql-lora-1fdg2" : 1 ,
209
209
},
210
210
},
211
- fakePod (2 ): & backendmetrics. Metrics {
211
+ fakePod (2 ): {
212
212
WaitingQueueSize : 6 ,
213
213
KVCacheUsagePercent : 0.2 ,
214
214
ActiveModels : map [string ]int {
@@ -245,7 +245,7 @@ func TestKubeInferenceModelRequest(t *testing.T) {
245
245
// no pods will be picked as all models are either above kv threshold,
246
246
// queue threshold, or both.
247
247
pods : map [backendmetrics.Pod ]* backendmetrics.Metrics {
248
- fakePod (0 ): & backendmetrics. Metrics {
248
+ fakePod (0 ): {
249
249
WaitingQueueSize : 6 ,
250
250
KVCacheUsagePercent : 0.2 ,
251
251
ActiveModels : map [string ]int {
@@ -254,15 +254,15 @@ func TestKubeInferenceModelRequest(t *testing.T) {
254
254
"sql-lora-1fdg3" : 1 ,
255
255
},
256
256
},
257
- fakePod (1 ): & backendmetrics. Metrics {
257
+ fakePod (1 ): {
258
258
WaitingQueueSize : 0 ,
259
259
KVCacheUsagePercent : 0.85 ,
260
260
ActiveModels : map [string ]int {
261
261
"foo" : 1 ,
262
262
"sql-lora-1fdg3" : 1 ,
263
263
},
264
264
},
265
- fakePod (2 ): & backendmetrics. Metrics {
265
+ fakePod (2 ): {
266
266
WaitingQueueSize : 10 ,
267
267
KVCacheUsagePercent : 0.9 ,
268
268
ActiveModels : map [string ]int {
@@ -287,7 +287,7 @@ func TestKubeInferenceModelRequest(t *testing.T) {
287
287
req : utiltesting .GenerateRequest (logger , "test5" , "sql-lora-sheddable" ),
288
288
// pod 0 will be picked as all other models are above threshold
289
289
pods : map [backendmetrics.Pod ]* backendmetrics.Metrics {
290
- fakePod (0 ): & backendmetrics. Metrics {
290
+ fakePod (0 ): {
291
291
WaitingQueueSize : 4 ,
292
292
KVCacheUsagePercent : 0.2 ,
293
293
ActiveModels : map [string ]int {
@@ -296,15 +296,15 @@ func TestKubeInferenceModelRequest(t *testing.T) {
296
296
"sql-lora-1fdg3" : 1 ,
297
297
},
298
298
},
299
- fakePod (1 ): & backendmetrics. Metrics {
299
+ fakePod (1 ): {
300
300
WaitingQueueSize : 0 ,
301
301
KVCacheUsagePercent : 0.85 ,
302
302
ActiveModels : map [string ]int {
303
303
"foo" : 1 ,
304
304
"sql-lora-1fdg3" : 1 ,
305
305
},
306
306
},
307
- fakePod (2 ): & backendmetrics. Metrics {
307
+ fakePod (2 ): {
308
308
WaitingQueueSize : 10 ,
309
309
KVCacheUsagePercent : 0.9 ,
310
310
ActiveModels : map [string ]int {
0 commit comments