Skip to content

Commit c58db8c

Browse files
Tal-orbertinatto
authored andcommitted
UPSTREAM: <carry>: advertise shared cpus for mixed cpus feature
Kubelet should advertise the shared cpus as extedned resources. This has the benefit of limiting the amount of containers that can request an access to the shared cpus. For more information see - openshift/enhancements#1396 Signed-off-by: Talor Itzhak <[email protected]>
1 parent d33c96e commit c58db8c

File tree

4 files changed

+152
-0
lines changed

4 files changed

+152
-0
lines changed

Diff for: pkg/kubelet/kubelet.go

+4
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ import (
116116
"k8s.io/kubernetes/pkg/kubelet/server"
117117
servermetrics "k8s.io/kubernetes/pkg/kubelet/server/metrics"
118118
serverstats "k8s.io/kubernetes/pkg/kubelet/server/stats"
119+
"k8s.io/kubernetes/pkg/kubelet/sharedcpus"
119120
"k8s.io/kubernetes/pkg/kubelet/stats"
120121
"k8s.io/kubernetes/pkg/kubelet/status"
121122
"k8s.io/kubernetes/pkg/kubelet/sysctl"
@@ -717,6 +718,9 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
717718
if managed.IsEnabled() {
718719
klog.InfoS("Pinned Workload Management Enabled")
719720
}
721+
if sharedcpus.IsEnabled() {
722+
klog.InfoS("Mixed CPUs Workload Enabled")
723+
}
720724

721725
if kubeDeps.KubeClient != nil {
722726
klet.runtimeClassManager = runtimeclass.NewManager(kubeDeps.KubeClient)

Diff for: pkg/kubelet/kubelet_node_status.go

+22
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ import (
4545
"k8s.io/kubernetes/pkg/kubelet/events"
4646
"k8s.io/kubernetes/pkg/kubelet/managed"
4747
"k8s.io/kubernetes/pkg/kubelet/nodestatus"
48+
"k8s.io/kubernetes/pkg/kubelet/sharedcpus"
4849
taintutil "k8s.io/kubernetes/pkg/util/taints"
4950
volutil "k8s.io/kubernetes/pkg/volume/util"
5051
)
@@ -135,6 +136,7 @@ func (kl *Kubelet) tryRegisterWithAPIServer(node *v1.Node) bool {
135136
if managed.IsEnabled() {
136137
requiresUpdate = kl.addManagementNodeCapacity(node, existingNode) || requiresUpdate
137138
}
139+
requiresUpdate = kl.reconcileSharedCPUsNodeCapacity(node, existingNode) || requiresUpdate
138140
if requiresUpdate {
139141
if _, _, err := nodeutil.PatchNodeStatus(kl.kubeClient.CoreV1(), types.NodeName(kl.nodeName), originalNode, existingNode); err != nil {
140142
klog.ErrorS(err, "Unable to reconcile node with API server,error updating node", "node", klog.KObj(node))
@@ -164,6 +166,25 @@ func (kl *Kubelet) addManagementNodeCapacity(initialNode, existingNode *v1.Node)
164166
return true
165167
}
166168

169+
func (kl *Kubelet) reconcileSharedCPUsNodeCapacity(initialNode, existingNode *v1.Node) bool {
170+
updateDefaultResources(initialNode, existingNode)
171+
sharedCPUsResourceName := sharedcpus.GetResourceName()
172+
// delete resources in case they exist and feature has been disabled
173+
if !sharedcpus.IsEnabled() {
174+
if _, ok := existingNode.Status.Capacity[sharedCPUsResourceName]; ok {
175+
delete(existingNode.Status.Capacity, sharedCPUsResourceName)
176+
return true
177+
}
178+
return false
179+
}
180+
q := resource.NewQuantity(sharedcpus.GetConfig().ContainersLimit, resource.DecimalSI)
181+
if existingCapacity, ok := existingNode.Status.Capacity[sharedCPUsResourceName]; ok && existingCapacity.Equal(*q) {
182+
return false
183+
}
184+
existingNode.Status.Capacity[sharedCPUsResourceName] = *q
185+
return true
186+
}
187+
167188
// reconcileHugePageResource will update huge page capacity for each page size and remove huge page sizes no longer supported
168189
func (kl *Kubelet) reconcileHugePageResource(initialNode, existingNode *v1.Node) bool {
169190
requiresUpdate := updateDefaultResources(initialNode, existingNode)
@@ -458,6 +479,7 @@ func (kl *Kubelet) initialNode(ctx context.Context) (*v1.Node, error) {
458479
if managed.IsEnabled() {
459480
kl.addManagementNodeCapacity(node, node)
460481
}
482+
kl.reconcileSharedCPUsNodeCapacity(node, node)
461483

462484
kl.setNodeStatus(ctx, node)
463485

Diff for: pkg/kubelet/sharedcpus/sharedcpus.go

+87
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/*
2+
Copyright 2023 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package sharedcpus
18+
19+
import (
20+
"encoding/json"
21+
"errors"
22+
"os"
23+
24+
corev1 "k8s.io/api/core/v1"
25+
"k8s.io/klog/v2"
26+
)
27+
28+
const (
29+
configFileName = "/etc/kubernetes/openshift-workload-mixed-cpus"
30+
sharedCpusResourceName = "workload.openshift.io/enable-shared-cpus"
31+
)
32+
33+
var (
34+
config Config
35+
sharedCpusEnabled bool
36+
)
37+
38+
type Config struct {
39+
sharedCpus `json:"shared_cpus"`
40+
}
41+
42+
type sharedCpus struct {
43+
// ContainersLimit specify the number of containers that are allowed to access the shared CPU pool`
44+
ContainersLimit int64 `json:"containers_limit"`
45+
}
46+
47+
func init() {
48+
parseConfig()
49+
}
50+
51+
func IsEnabled() bool {
52+
return sharedCpusEnabled
53+
}
54+
55+
func GetResourceName() corev1.ResourceName {
56+
return sharedCpusResourceName
57+
}
58+
59+
func GetConfig() Config {
60+
return config
61+
}
62+
63+
func parseConfig() {
64+
b, err := os.ReadFile(configFileName)
65+
if err != nil {
66+
if errors.Is(err, os.ErrNotExist) {
67+
return
68+
}
69+
klog.ErrorS(err, "Failed to read configuration file for shared cpus", "fileName", configFileName)
70+
return
71+
}
72+
cfg, err := parseConfigData(b)
73+
if err != nil {
74+
return
75+
}
76+
config = *cfg
77+
sharedCpusEnabled = true
78+
}
79+
80+
func parseConfigData(data []byte) (*Config, error) {
81+
cfg := &Config{}
82+
err := json.Unmarshal(data, cfg)
83+
if err != nil {
84+
klog.ErrorS(err, "Failed to parse configuration file for shared cpus", "fileContent", string(data))
85+
}
86+
return cfg, err
87+
}

Diff for: pkg/kubelet/sharedcpus/sharedcpus_test.go

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
package sharedcpus
2+
3+
import "testing"
4+
5+
func TestParseConfigData(t *testing.T) {
6+
testCases := []struct {
7+
data []byte
8+
expectedToBeParsed bool
9+
containerLimitValue int64
10+
}{
11+
{
12+
data: []byte(`{
13+
"shared_cpus": {
14+
"containers_limit": 15
15+
}
16+
}`),
17+
expectedToBeParsed: true,
18+
containerLimitValue: 15,
19+
},
20+
{
21+
data: []byte(`{
22+
"shared_cpus": {
23+
"abc": "25"
24+
}
25+
}`),
26+
expectedToBeParsed: false,
27+
containerLimitValue: 0,
28+
},
29+
}
30+
for _, tc := range testCases {
31+
cfg, err := parseConfigData(tc.data)
32+
if err != nil && tc.expectedToBeParsed {
33+
t.Errorf("shared cpus data expected to be parsed")
34+
}
35+
if cfg.ContainersLimit != tc.containerLimitValue {
36+
t.Errorf("shared cpus ContainersLimit is different than expected: want: %d; got %d", tc.containerLimitValue, cfg.ContainersLimit)
37+
}
38+
}
39+
}

0 commit comments

Comments
 (0)