forked from kubernetes-sigs/gateway-api-inference-extension
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtypes.go
52 lines (44 loc) · 1.17 KB
/
types.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
// Package backend is a library to interact with backend model servers such as probing metrics.
package backend
import "fmt"
type PodSet map[Pod]bool
type Pod struct {
Namespace string
Name string
Address string
}
func (p Pod) String() string {
return p.Namespace + "." + p.Name
}
type Metrics struct {
// CachedModels is a set of models(including LoRA adapters) that are currently cached to GPU.
CachedModels map[string]int
RunningQueueSize int
WaitingQueueSize int
KVCacheUsagePercent float64
KvCacheMaxTokenCapacity int
}
type PodMetrics struct {
Pod
Metrics
}
func (pm *PodMetrics) String() string {
return fmt.Sprintf("Pod: %+v; Metrics: %+v", pm.Pod, pm.Metrics)
}
func (pm *PodMetrics) Clone() *PodMetrics {
cm := make(map[string]int, len(pm.CachedModels))
for k, v := range pm.CachedModels {
cm[k] = v
}
clone := &PodMetrics{
Pod: pm.Pod,
Metrics: Metrics{
CachedModels: cm,
RunningQueueSize: pm.RunningQueueSize,
WaitingQueueSize: pm.WaitingQueueSize,
KVCacheUsagePercent: pm.KVCacheUsagePercent,
KvCacheMaxTokenCapacity: pm.KvCacheMaxTokenCapacity,
},
}
return clone
}