Skip to content

Commit 2426579

Browse files
Tal-orbertinatto
authored andcommitted
UPSTREAM: <carry>: advertise shared cpus for mixed cpus feature
Kubelet should advertise the shared cpus as extedned resources. This has the benefit of limiting the amount of containers that can request an access to the shared cpus. For more information see - openshift/enhancements#1396 Signed-off-by: Talor Itzhak <[email protected]>
1 parent fcbeaee commit 2426579

File tree

4 files changed

+152
-0
lines changed

4 files changed

+152
-0
lines changed

pkg/kubelet/kubelet.go

+4
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ import (
113113
"k8s.io/kubernetes/pkg/kubelet/server"
114114
servermetrics "k8s.io/kubernetes/pkg/kubelet/server/metrics"
115115
serverstats "k8s.io/kubernetes/pkg/kubelet/server/stats"
116+
"k8s.io/kubernetes/pkg/kubelet/sharedcpus"
116117
"k8s.io/kubernetes/pkg/kubelet/stats"
117118
"k8s.io/kubernetes/pkg/kubelet/status"
118119
"k8s.io/kubernetes/pkg/kubelet/sysctl"
@@ -681,6 +682,9 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
681682
if managed.IsEnabled() {
682683
klog.InfoS("Pinned Workload Management Enabled")
683684
}
685+
if sharedcpus.IsEnabled() {
686+
klog.InfoS("Mixed CPUs Workload Enabled")
687+
}
684688

685689
if kubeDeps.KubeClient != nil {
686690
klet.runtimeClassManager = runtimeclass.NewManager(kubeDeps.KubeClient)

pkg/kubelet/kubelet_node_status.go

+22
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ import (
4646
"k8s.io/kubernetes/pkg/kubelet/events"
4747
"k8s.io/kubernetes/pkg/kubelet/managed"
4848
"k8s.io/kubernetes/pkg/kubelet/nodestatus"
49+
"k8s.io/kubernetes/pkg/kubelet/sharedcpus"
4950
taintutil "k8s.io/kubernetes/pkg/util/taints"
5051
volutil "k8s.io/kubernetes/pkg/volume/util"
5152
)
@@ -136,6 +137,7 @@ func (kl *Kubelet) tryRegisterWithAPIServer(node *v1.Node) bool {
136137
if managed.IsEnabled() {
137138
requiresUpdate = kl.addManagementNodeCapacity(node, existingNode) || requiresUpdate
138139
}
140+
requiresUpdate = kl.reconcileSharedCPUsNodeCapacity(node, existingNode) || requiresUpdate
139141
if requiresUpdate {
140142
if _, _, err := nodeutil.PatchNodeStatus(kl.kubeClient.CoreV1(), types.NodeName(kl.nodeName), originalNode, existingNode); err != nil {
141143
klog.ErrorS(err, "Unable to reconcile node with API server,error updating node", "node", klog.KObj(node))
@@ -165,6 +167,25 @@ func (kl *Kubelet) addManagementNodeCapacity(initialNode, existingNode *v1.Node)
165167
return true
166168
}
167169

170+
func (kl *Kubelet) reconcileSharedCPUsNodeCapacity(initialNode, existingNode *v1.Node) bool {
171+
updateDefaultResources(initialNode, existingNode)
172+
sharedCPUsResourceName := sharedcpus.GetResourceName()
173+
// delete resources in case they exist and feature has been disabled
174+
if !sharedcpus.IsEnabled() {
175+
if _, ok := existingNode.Status.Capacity[sharedCPUsResourceName]; ok {
176+
delete(existingNode.Status.Capacity, sharedCPUsResourceName)
177+
return true
178+
}
179+
return false
180+
}
181+
q := resource.NewQuantity(sharedcpus.GetConfig().ContainersLimit, resource.DecimalSI)
182+
if existingCapacity, ok := existingNode.Status.Capacity[sharedCPUsResourceName]; ok && existingCapacity.Equal(*q) {
183+
return false
184+
}
185+
existingNode.Status.Capacity[sharedCPUsResourceName] = *q
186+
return true
187+
}
188+
168189
// reconcileHugePageResource will update huge page capacity for each page size and remove huge page sizes no longer supported
169190
func (kl *Kubelet) reconcileHugePageResource(initialNode, existingNode *v1.Node) bool {
170191
requiresUpdate := updateDefaultResources(initialNode, existingNode)
@@ -459,6 +480,7 @@ func (kl *Kubelet) initialNode(ctx context.Context) (*v1.Node, error) {
459480
if managed.IsEnabled() {
460481
kl.addManagementNodeCapacity(node, node)
461482
}
483+
kl.reconcileSharedCPUsNodeCapacity(node, node)
462484

463485
kl.setNodeStatus(ctx, node)
464486

pkg/kubelet/sharedcpus/sharedcpus.go

+87
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/*
2+
Copyright 2023 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package sharedcpus
18+
19+
import (
20+
"encoding/json"
21+
"errors"
22+
"os"
23+
24+
corev1 "k8s.io/api/core/v1"
25+
"k8s.io/klog/v2"
26+
)
27+
28+
const (
29+
configFileName = "/etc/kubernetes/openshift-workload-mixed-cpus"
30+
sharedCpusResourceName = "workload.openshift.io/enable-shared-cpus"
31+
)
32+
33+
var (
34+
config Config
35+
sharedCpusEnabled bool
36+
)
37+
38+
type Config struct {
39+
sharedCpus `json:"shared_cpus"`
40+
}
41+
42+
type sharedCpus struct {
43+
// ContainersLimit specify the number of containers that are allowed to access the shared CPU pool`
44+
ContainersLimit int64 `json:"containers_limit"`
45+
}
46+
47+
func init() {
48+
parseConfig()
49+
}
50+
51+
func IsEnabled() bool {
52+
return sharedCpusEnabled
53+
}
54+
55+
func GetResourceName() corev1.ResourceName {
56+
return sharedCpusResourceName
57+
}
58+
59+
func GetConfig() Config {
60+
return config
61+
}
62+
63+
func parseConfig() {
64+
b, err := os.ReadFile(configFileName)
65+
if err != nil {
66+
if errors.Is(err, os.ErrNotExist) {
67+
return
68+
}
69+
klog.ErrorS(err, "Failed to read configuration file for shared cpus", "fileName", configFileName)
70+
return
71+
}
72+
cfg, err := parseConfigData(b)
73+
if err != nil {
74+
return
75+
}
76+
config = *cfg
77+
sharedCpusEnabled = true
78+
}
79+
80+
func parseConfigData(data []byte) (*Config, error) {
81+
cfg := &Config{}
82+
err := json.Unmarshal(data, cfg)
83+
if err != nil {
84+
klog.ErrorS(err, "Failed to parse configuration file for shared cpus", "fileContent", string(data))
85+
}
86+
return cfg, err
87+
}
+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
package sharedcpus
2+
3+
import "testing"
4+
5+
func TestParseConfigData(t *testing.T) {
6+
testCases := []struct {
7+
data []byte
8+
expectedToBeParsed bool
9+
containerLimitValue int64
10+
}{
11+
{
12+
data: []byte(`{
13+
"shared_cpus": {
14+
"containers_limit": 15
15+
}
16+
}`),
17+
expectedToBeParsed: true,
18+
containerLimitValue: 15,
19+
},
20+
{
21+
data: []byte(`{
22+
"shared_cpus": {
23+
"abc": "25"
24+
}
25+
}`),
26+
expectedToBeParsed: false,
27+
containerLimitValue: 0,
28+
},
29+
}
30+
for _, tc := range testCases {
31+
cfg, err := parseConfigData(tc.data)
32+
if err != nil && tc.expectedToBeParsed {
33+
t.Errorf("shared cpus data expected to be parsed")
34+
}
35+
if cfg.ContainersLimit != tc.containerLimitValue {
36+
t.Errorf("shared cpus ContainersLimit is different than expected: want: %d; got %d", tc.containerLimitValue, cfg.ContainersLimit)
37+
}
38+
}
39+
}

0 commit comments

Comments
 (0)