openshift
diff --git a/‎openshift-kube-apiserver/admission/admissionenablement/register.go
+3 b/‎openshift-kube-apiserver/admission/admissionenablement/register.go
+3
diff --git a/‎openshift-kube-apiserver/admission/autoscaling/managementcpusoverride/admission.go
+54-34 b/‎openshift-kube-apiserver/admission/autoscaling/managementcpusoverride/admission.go
+54-34
diff --git a/‎openshift-kube-apiserver/admission/autoscaling/managementcpusoverride/admission_test.go
+55-30 b/‎openshift-kube-apiserver/admission/autoscaling/managementcpusoverride/admission_test.go
+55-30
@@ -5,6 +5,7 @@ import (
 	"k8s.io/apiserver/pkg/admission"
 	"k8s.io/apiserver/pkg/admission/plugin/resourcequota"
 	mutatingwebhook "k8s.io/apiserver/pkg/admission/plugin/webhook/mutating"
+	"k8s.io/kubernetes/openshift-kube-apiserver/admission/autoscaling/mixedcpus"
 
 	"github.com/openshift/apiserver-library-go/pkg/admission/imagepolicy"
 	imagepolicyapiv1 "github.com/openshift/apiserver-library-go/pkg/admission/imagepolicy/apis/imagepolicy/v1"
@@ -32,6 +33,7 @@ func RegisterOpenshiftKubeAdmissionPlugins(plugins *admission.Plugins) {
 	ingressadmission.Register(plugins)
 	managementcpusoverride.Register(plugins)
 	managednode.Register(plugins)
+	mixedcpus.Register(plugins)
 	projectnodeenv.Register(plugins)
 	quotaclusterresourceoverride.Register(plugins)
 	quotaclusterresourcequota.Register(plugins)
@@ -74,6 +76,7 @@ var (
 		hostassignment.PluginName,          // "route.openshift.io/RouteHostAssignment"
 		csiinlinevolumesecurity.PluginName, // "storage.openshift.io/CSIInlineVolumeSecurity"
 		managednode.PluginName,             // "autoscaling.openshift.io/ManagedNode"
+		mixedcpus.PluginName,               // "autoscaling.openshift.io/MixedCPUs"
 	}
 
 	// openshiftAdmissionPluginsForKubeAfterResourceQuota are the plugins to add after ResourceQuota plugin
 
@@ -71,6 +71,14 @@ func Register(plugins *admission.Plugins) {
 		})
 }
 
+type resourceAnnotation struct {
+	// CPUShares contains resource annotation value cpushares key
+	CPUShares uint64 `json:"cpushares,omitempty"`
+	// CPULimit contains the cpu limit in millicores to be used by the container runtime to calculate
+	// quota
+	CPULimit int64 `json:"cpulimit,omitempty"`
+}
+
 // managementCPUsOverride presents admission plugin that should replace pod container CPU requests with a new management resource.
 // It applies to all pods that:
 // 1. are in an allowed namespace
@@ -217,6 +225,13 @@ func (a *managementCPUsOverride) Admit(ctx context.Context, attr admission.Attri
 		return err
 	}
 
+	if _, found := ns.Annotations[namespaceAllowedAnnotation]; !found && len(workloadType) > 0 {
+		pod.Annotations[workloadAdmissionWarning] = fmt.Sprintf(
+			"skipping pod CPUs requests modifications because the %s namespace is not annotated with %s to allow workload partitioning",
+			ns.GetName(), namespaceAllowedAnnotation)
+		return nil
+	}
+
 	if !doesNamespaceAllowWorkloadType(ns.Annotations, workloadType) {
 		return admission.NewForbidden(attr, fmt.Errorf("%s the pod namespace %q does not allow the workload type %s", PluginName, ns.Name, workloadType))
 	}
@@ -245,13 +260,6 @@ func (a *managementCPUsOverride) Admit(ctx context.Context, attr admission.Attri
 		return nil
 	}
 
-	// we should skip mutation of the pod that has container with both CPU limit and request because once we will remove
-	// the request, the defaulter will set the request back with the CPU limit value
-	if podHasBothCPULimitAndRequest(allContainers) {
-		pod.Annotations[workloadAdmissionWarning] = "skip pod CPUs requests modifications because pod container has both CPU limit and request"
-		return nil
-	}
-
 	// before we update the pod available under admission attributes, we need to verify that deletion of the CPU request
 	// will not change the pod QoS class, otherwise skip pod mutation
 	// 1. Copy the pod
@@ -353,6 +361,14 @@ func updateContainersResources(containers []coreapi.Container, podAnnotations ma
 			continue
 		}
 
+		resourceAnno := resourceAnnotation{}
+
+		if c.Resources.Limits != nil {
+			if value, ok := c.Resources.Limits[coreapi.ResourceCPU]; ok {
+				resourceAnno.CPULimit = value.MilliValue()
+			}
+		}
+
 		if c.Resources.Requests != nil {
 			if _, ok := c.Resources.Requests[coreapi.ResourceCPU]; !ok {
 				continue
@@ -361,9 +377,20 @@ func updateContainersResources(containers []coreapi.Container, podAnnotations ma
 			cpuRequest := c.Resources.Requests[coreapi.ResourceCPU]
 			cpuRequestInMilli := cpuRequest.MilliValue()
 
-			cpuShares := cm.MilliCPUToShares(cpuRequestInMilli)
-			podAnnotations[cpusharesAnnotationKey] = fmt.Sprintf(`{"%s": %d}`, containerResourcesAnnotationValueKeyCPUShares, cpuShares)
+			// Casting to uint64, Linux build returns uint64, noop Darwin build returns int64
+			resourceAnno.CPUShares = uint64(cm.MilliCPUToShares(cpuRequestInMilli))
+
+			// This should not error but if something does go wrong we default to string creation of just CPU Shares
+			// and add a warning annotation
+			resourceAnnoString, err := json.Marshal(resourceAnno)
+			if err != nil {
+				podAnnotations[workloadAdmissionWarning] = fmt.Sprintf("failed to marshal cpu resources, using fallback: err: %s", err.Error())
+				podAnnotations[cpusharesAnnotationKey] = fmt.Sprintf(`{"%s": %d}`, containerResourcesAnnotationValueKeyCPUShares, resourceAnno.CPUShares)
+			} else {
+				podAnnotations[cpusharesAnnotationKey] = string(resourceAnnoString)
+			}
 			delete(c.Resources.Requests, coreapi.ResourceCPU)
+			delete(c.Resources.Limits, coreapi.ResourceCPU)
 
 			if c.Resources.Limits == nil {
 				c.Resources.Limits = coreapi.ResourceList{}
@@ -378,7 +405,7 @@ func updateContainersResources(containers []coreapi.Container, podAnnotations ma
 	}
 }
 
-func isGuaranteed(containers []coreapi.Container) bool {
+func IsGuaranteed(containers []coreapi.Container) bool {
 	for _, c := range containers {
 		// only memory and CPU resources are relevant to decide pod QoS class
 		for _, r := range []coreapi.ResourceName{coreapi.ResourceMemory, coreapi.ResourceCPU} {
@@ -425,7 +452,7 @@ func isBestEffort(containers []coreapi.Container) bool {
 }
 
 func getPodQoSClass(containers []coreapi.Container) coreapi.PodQOSClass {
-	if isGuaranteed(containers) {
+	if IsGuaranteed(containers) {
 		return coreapi.PodQOSGuaranteed
 	}
 
@@ -449,10 +476,13 @@ func podHasBothCPULimitAndRequest(containers []coreapi.Container) bool {
 	return false
 }
 
+// doesNamespaceAllowWorkloadType will return false when a workload type does not match any present ones.
 func doesNamespaceAllowWorkloadType(annotations map[string]string, workloadType string) bool {
 	v, found := annotations[namespaceAllowedAnnotation]
+	// When a namespace contains no annotation for workloads we infer that to mean all workload types are allowed.
+	// The mutation hook will strip all workload annotation from pods that contain them in that circumstance.
 	if !found {
-		return false
+		return true
 	}
 
 	for _, t := range strings.Split(v, ",") {
@@ -559,17 +589,20 @@ func (a *managementCPUsOverride) Validate(ctx context.Context, attr admission.At
 		allErrs = append(allErrs, getPodInvalidWorkloadAnnotationError(pod.Annotations, err.Error()))
 	}
 
-	workloadResourceAnnotations := map[string]map[string]int{}
+	workloadResourceAnnotations := resourceAnnotation{}
+	hasWorkloadAnnotation := false
 	for k, v := range pod.Annotations {
 		if !strings.HasPrefix(k, containerResourcesAnnotationPrefix) {
 			continue
 		}
+		hasWorkloadAnnotation = true
 
-		resourceAnnotationValue := map[string]int{}
-		if err := json.Unmarshal([]byte(v), &resourceAnnotationValue); err != nil {
+		// Custom decoder to print invalid fields for resources
+		decoder := json.NewDecoder(strings.NewReader(v))
+		decoder.DisallowUnknownFields()
+		if err := decoder.Decode(&workloadResourceAnnotations); err != nil {
 			allErrs = append(allErrs, getPodInvalidWorkloadAnnotationError(pod.Annotations, err.Error()))
 		}
-		workloadResourceAnnotations[k] = resourceAnnotationValue
 	}
 
 	containersWorkloadResources := map[string]*coreapi.Container{}
@@ -586,9 +619,9 @@ func (a *managementCPUsOverride) Validate(ctx context.Context, attr admission.At
 		}
 	}
 
-	// the pod does not have workload annotation
-	if len(workloadType) == 0 {
-		if len(workloadResourceAnnotations) > 0 {
+	switch {
+	case len(workloadType) == 0: // the pod does not have workload annotation
+		if hasWorkloadAnnotation {
 			allErrs = append(allErrs, getPodInvalidWorkloadAnnotationError(pod.Annotations, "the pod without workload annotation can not have resource annotation"))
 		}
 
@@ -599,21 +632,8 @@ func (a *managementCPUsOverride) Validate(ctx context.Context, attr admission.At
 
 			allErrs = append(allErrs, field.Invalid(field.NewPath("spec.containers.resources.requests"), c.Resources.Requests, fmt.Sprintf("the pod without workload annotations can not have containers with workload resources %q", resourceName)))
 		}
-	} else {
-		if !doesNamespaceAllowWorkloadType(ns.Annotations, workloadType) { // pod has workload annotation, but the pod does not have workload annotation
-			allErrs = append(allErrs, getPodInvalidWorkloadAnnotationError(pod.Annotations, fmt.Sprintf("the pod can not have workload annotation, when the namespace %q does not allow it", ns.Name)))
-		}
-
-		for _, v := range workloadResourceAnnotations {
-			if len(v) > 1 {
-				allErrs = append(allErrs, field.Invalid(field.NewPath("metadata.annotations"), pod.Annotations, "the pod resource annotation value can not have more than one key"))
-			}
-
-			// the pod should not have any resource annotations with the value that includes keys different from cpushares
-			if _, ok := v[containerResourcesAnnotationValueKeyCPUShares]; len(v) == 1 && !ok {
-				allErrs = append(allErrs, field.Invalid(field.NewPath("metadata.annotations"), pod.Annotations, "the pod resource annotation value should have only cpushares key"))
-			}
-		}
+	case !doesNamespaceAllowWorkloadType(ns.Annotations, workloadType): // pod has workload annotation, but the namespace does not allow specified workload
+		allErrs = append(allErrs, getPodInvalidWorkloadAnnotationError(pod.Annotations, fmt.Sprintf("the namespace %q does not allow the workload type %s", ns.Name, workloadType)))
 	}
 
 	if len(allErrs) == 0 {
 
@@ -84,12 +84,12 @@ func TestAdmit(t *testing.T) {
 	}{
 		{
 			name:               "should return admission error when the pod namespace does not allow the workload type",
-			pod:                testManagedPod("500m", "250m", "500Mi", "250Mi"),
+			pod:                testManagedPodWithWorkloadAnnotation("500m", "250m", "500Mi", "250Mi", "non-existent"),
 			expectedCpuRequest: resource.MustParse("250m"),
-			namespace:          testNamespace(),
+			namespace:          testManagedNamespace(),
 			nodes:              []*corev1.Node{testNodeWithManagementResource()},
 			infra:              testClusterSNOInfra(),
-			expectedError:      fmt.Errorf("the pod namespace %q does not allow the workload type management", "namespace"),
+			expectedError:      fmt.Errorf("the pod namespace %q does not allow the workload type non-existent", "managed-namespace"),
 		},
 		{
 			name:               "should ignore pods that do not have managed annotation",
@@ -167,14 +167,33 @@ func TestAdmit(t *testing.T) {
 			expectedError:      fmt.Errorf(`failed to get workload annotation effect: the workload annotation value map["test":"test"] does not have "effect" key`),
 			infra:              testClusterSNOInfra(),
 		},
+		{
+			name: "should return admission warning when the pod has workload annotation but the namespace does not",
+			pod: testManagedPodWithAnnotations(
+				"500m",
+				"250m",
+				"500Mi",
+				"250Mi",
+				map[string]string{
+					fmt.Sprintf("%s%s", podWorkloadTargetAnnotationPrefix, workloadTypeManagement): `{"test": "test"}`,
+				},
+			),
+			expectedCpuRequest: resource.MustParse("250m"),
+			expectedAnnotations: map[string]string{
+				workloadAdmissionWarning: "skipping pod CPUs requests modifications because the namespace namespace is not annotated with workload.openshift.io/allowed to allow workload partitioning",
+			},
+			namespace: testNamespace(),
+			nodes:     []*corev1.Node{testNodeWithManagementResource()},
+			infra:     testClusterSNOInfra(),
+		},
 		{
 			name:               "should delete CPU requests and update workload CPU annotations for the burstable pod with managed annotation",
 			pod:                testManagedPod("", "250m", "500Mi", "250Mi"),
 			expectedCpuRequest: resource.Quantity{},
 			namespace:          testManagedNamespace(),
 			expectedAnnotations: map[string]string{
-				fmt.Sprintf("%s%s", containerResourcesAnnotationPrefix, "test"):                fmt.Sprintf(`{"%s": 256}`, containerResourcesAnnotationValueKeyCPUShares),
-				fmt.Sprintf("%s%s", containerResourcesAnnotationPrefix, "initTest"):            fmt.Sprintf(`{"%s": 256}`, containerResourcesAnnotationValueKeyCPUShares),
+				fmt.Sprintf("%s%s", containerResourcesAnnotationPrefix, "test"):                fmt.Sprintf(`{"%s":256}`, containerResourcesAnnotationValueKeyCPUShares),
+				fmt.Sprintf("%s%s", containerResourcesAnnotationPrefix, "initTest"):            fmt.Sprintf(`{"%s":256}`, containerResourcesAnnotationValueKeyCPUShares),
 				fmt.Sprintf("%s%s", podWorkloadTargetAnnotationPrefix, workloadTypeManagement): fmt.Sprintf(`{"%s":"%s"}`, podWorkloadAnnotationEffect, workloadEffectPreferredDuringScheduling),
 			},
 			nodes: []*corev1.Node{testNodeWithManagementResource()},
@@ -217,12 +236,14 @@ func TestAdmit(t *testing.T) {
 			infra: testClusterSNOInfra(),
 		},
 		{
-			name:               "should ignore pod when one of pod containers have both CPU limit and request",
+			name:               "should not ignore pod when one of pod containers have both CPU limit and request",
 			pod:                testManagedPod("500m", "250m", "500Mi", ""),
-			expectedCpuRequest: resource.MustParse("250m"),
+			expectedCpuRequest: resource.Quantity{},
 			namespace:          testManagedNamespace(),
 			expectedAnnotations: map[string]string{
-				workloadAdmissionWarning: fmt.Sprintf("skip pod CPUs requests modifications because pod container has both CPU limit and request"),
+				fmt.Sprintf("%s%s", containerResourcesAnnotationPrefix, "test"):                fmt.Sprintf(`{"%s":256,"cpulimit":500}`, containerResourcesAnnotationValueKeyCPUShares),
+				fmt.Sprintf("%s%s", containerResourcesAnnotationPrefix, "initTest"):            fmt.Sprintf(`{"%s":256,"cpulimit":500}`, containerResourcesAnnotationValueKeyCPUShares),
+				fmt.Sprintf("%s%s", podWorkloadTargetAnnotationPrefix, workloadTypeManagement): fmt.Sprintf(`{"%s":"%s"}`, podWorkloadAnnotationEffect, workloadEffectPreferredDuringScheduling),
 			},
 			nodes: []*corev1.Node{testNodeWithManagementResource()},
 			infra: testClusterSNOInfra(),
@@ -239,12 +260,12 @@ func TestAdmit(t *testing.T) {
 			infra: testClusterSNOInfra(),
 		},
 		{
-			name:               "should not mutate the pod when at least one node does not have management resources",
+			name:               "should not mutate the pod when cpu partitioning is not set to AllNodes",
 			pod:                testManagedPod("500m", "250m", "500Mi", "250Mi"),
 			expectedCpuRequest: resource.MustParse("250m"),
 			namespace:          testManagedNamespace(),
 			nodes:              []*corev1.Node{testNode()},
-			infra:              testClusterSNOInfra(),
+			infra:              testClusterInfraWithoutWorkloadPartitioning(),
 		},
 		{
 			name:               "should return admission error when the cluster does not have any nodes",
@@ -407,7 +428,7 @@ func TestValidate(t *testing.T) {
 			),
 			namespace:     testManagedNamespace(),
 			nodes:         []*corev1.Node{testNodeWithManagementResource()},
-			expectedError: fmt.Errorf("he pod resource annotation value should have only cpushares key"),
+			expectedError: fmt.Errorf("json: unknown field \"cpuset\""),
 		},
 		{
 			name: "should return invalid error when the pod does not have workload annotation, but has resource annotation",
@@ -437,16 +458,28 @@ func TestValidate(t *testing.T) {
 			expectedError: fmt.Errorf("the pod without workload annotations can not have containers with workload resources %q", "management.workload.openshift.io/cores"),
 		},
 		{
-			name: "should return invalid error when the pod has workload annotation, but the pod namespace does not have allowed annotation",
-			pod: testManagedPod(
+			name: "should return invalid error when the pod has workload annotation, but the pod namespace does not have allowed workload type",
+			pod: testManagedPodWithWorkloadAnnotation(
 				"500m",
 				"250m",
 				"500Mi",
 				"250Mi",
+				"non-existent",
 			),
-			namespace:     testNamespace(),
+			namespace:     testManagedNamespace(),
 			nodes:         []*corev1.Node{testNodeWithManagementResource()},
-			expectedError: fmt.Errorf("the pod can not have workload annotation, when the namespace %q does not allow it", "namespace"),
+			expectedError: fmt.Errorf("the namespace %q does not allow the workload type %s", "managed-namespace", "non-existent"),
+		},
+		{
+			name: "should not return any errors when the pod has workload annotation, but the pod namespace has no annotations",
+			pod: testManagedPod(
+				"500m",
+				"250m",
+				"500Mi",
+				"250Mi",
+			),
+			namespace: testNamespace(),
+			nodes:     []*corev1.Node{testNodeWithManagementResource()},
 		},
 		{
 			name: "should not return any errors when the pod and namespace valid",
@@ -532,19 +565,12 @@ func testManagedStaticPod(cpuLimit, cpuRequest, memoryLimit, memoryRequest strin
 }
 
 func testManagedPod(cpuLimit, cpuRequest, memoryLimit, memoryRequest string) *kapi.Pod {
-	pod := testPod(cpuLimit, cpuRequest, memoryLimit, memoryRequest)
-
-	pod.Annotations = map[string]string{}
-	for _, c := range pod.Spec.InitContainers {
-		cpusetAnnotation := fmt.Sprintf("%s%s", containerResourcesAnnotationPrefix, c.Name)
-		pod.Annotations[cpusetAnnotation] = `{"cpuset": "0-1"}`
-	}
-	for _, c := range pod.Spec.Containers {
-		cpusetAnnotation := fmt.Sprintf("%s%s", containerResourcesAnnotationPrefix, c.Name)
-		pod.Annotations[cpusetAnnotation] = `{"cpuset": "0-1"}`
-	}
+	return testManagedPodWithWorkloadAnnotation(cpuLimit, cpuRequest, memoryLimit, memoryRequest, workloadTypeManagement)
+}
 
-	managementWorkloadAnnotation := fmt.Sprintf("%s%s", podWorkloadTargetAnnotationPrefix, workloadTypeManagement)
+func testManagedPodWithWorkloadAnnotation(cpuLimit, cpuRequest, memoryLimit, memoryRequest string, workloadType string) *kapi.Pod {
+	pod := testPod(cpuLimit, cpuRequest, memoryLimit, memoryRequest)
+	managementWorkloadAnnotation := fmt.Sprintf("%s%s", podWorkloadTargetAnnotationPrefix, workloadType)
 	pod.Annotations = map[string]string{
 		managementWorkloadAnnotation: fmt.Sprintf(`{"%s":"%s"}`, podWorkloadAnnotationEffect, workloadEffectPreferredDuringScheduling),
 	}
@@ -675,9 +701,8 @@ func testClusterSNOInfra() *configv1.Infrastructure {
 	}
 }
 
-func testClusterInfraWithoutTopologyFields() *configv1.Infrastructure {
+func testClusterInfraWithoutWorkloadPartitioning() *configv1.Infrastructure {
 	infra := testClusterSNOInfra()
-	infra.Status.ControlPlaneTopology = ""
-	infra.Status.InfrastructureTopology = ""
+	infra.Status.CPUPartitioning = configv1.CPUPartitioningNone
 	return infra
 }