From a564e0969555946056c846157fee98b66e24d40f Mon Sep 17 00:00:00 2001 From: fabriziopandini Date: Fri, 25 Oct 2024 14:48:26 +0200 Subject: [PATCH 1/2] Add v1beta2 condition to MD controller --- api/v1beta1/machinedeployment_types.go | 123 ++ api/v1beta1/v1beta2_condition_consts.go | 29 +- .../api/v1beta1/v1beta2_condition_consts.go | 4 +- .../kubeadm/internal/controllers/status.go | 5 +- .../controllers/machine/machine_controller.go | 2 +- .../machine/machine_controller_status.go | 4 +- .../machinedeployment_controller.go | 107 +- .../machinedeployment_controller_test.go | 14 +- .../machinedeployment_status.go | 502 +++++++ .../machinedeployment_status_test.go | 1158 +++++++++++++++++ .../machinedeployment_sync.go | 3 + .../machinedeployment/mdutil/util.go | 36 +- .../machineset/machineset_controller.go | 2 +- .../machineset_controller_status.go | 6 +- 14 files changed, 1917 insertions(+), 78 deletions(-) create mode 100644 internal/controllers/machinedeployment/machinedeployment_status.go create mode 100644 internal/controllers/machinedeployment/machinedeployment_status_test.go diff --git a/api/v1beta1/machinedeployment_types.go b/api/v1beta1/machinedeployment_types.go index fe4d4a198edd..5594f829fcf3 100644 --- a/api/v1beta1/machinedeployment_types.go +++ b/api/v1beta1/machinedeployment_types.go @@ -77,6 +77,129 @@ const ( MachineDeploymentUniqueLabel = "machine-template-hash" ) +// MachineDeployment's Available condition and corresponding reasons that will be used in v1Beta2 API version. +const ( + // MachineDeploymentAvailableV1Beta2Condition is true if the MachineDeployment is not deleted, and it has minimum + // availability according to parameters specified in the deployment strategy, e.g. If using RollingUpgrade strategy, + // availableReplicas must be greater or equal than desired replicas - MaxUnavailable replicas. + MachineDeploymentAvailableV1Beta2Condition = AvailableV1Beta2Condition + + // MachineDeploymentAvailableWaitingForReplicasSetV1Beta2Reason surfaces when the .spec.replicas + // field of the MachineDeployment is not set. + MachineDeploymentAvailableWaitingForReplicasSetV1Beta2Reason = WaitingForReplicasSetV1Beta2Reason + + // MachineDeploymentAvailableWaitingForAvailableReplicasSetV1Beta2Reason surfaces when the .status.v1beta2.availableReplicas + // field of the MachineDeployment is not set. + MachineDeploymentAvailableWaitingForAvailableReplicasSetV1Beta2Reason = "WaitingForAvailableReplicasSet" + + // MachineDeploymentAvailableV1Beta2Reason surfaces when a Deployment is available. + MachineDeploymentAvailableV1Beta2Reason = AvailableV1Beta2Reason + + // MachineDeploymentNotAvailableV1Beta2Reason surfaces when a Deployment is not available. + MachineDeploymentNotAvailableV1Beta2Reason = NotAvailableV1Beta2Reason + + // MachineDeploymentAvailableInternalErrorV1Beta2Reason surfaces unexpected failures when computing the Available condition. + MachineDeploymentAvailableInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason +) + +// MachineDeployment's MachinesReady condition and corresponding reasons that will be used in v1Beta2 API version. +const ( + // MachineDeploymentMachinesReadyV1Beta2Condition surfaces detail of issues on the controlled machines, if any. + MachineDeploymentMachinesReadyV1Beta2Condition = MachinesReadyV1Beta2Condition + + // MachineDeploymentMachinesReadyNoReplicasV1Beta2Reason surfaces when no machines exist for the MachineDeployment. + MachineDeploymentMachinesReadyNoReplicasV1Beta2Reason = NoReplicasV1Beta2Reason + + // MachineDeploymentMachinesReadyInternalErrorV1Beta2Reason surfaces unexpected failures when listing machines + // or aggregating machine's conditions. + MachineDeploymentMachinesReadyInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason +) + +// MachineDeployment's MachinesUpToDate condition and corresponding reasons that will be used in v1Beta2 API version. +const ( + // MachineDeploymentMachinesUpToDateV1Beta2Condition surfaces details of controlled machines not up to date, if any. + MachineDeploymentMachinesUpToDateV1Beta2Condition = MachinesUpToDateV1Beta2Condition + + // MachineDeploymentMachinesUpToDateNoReplicasV1Beta2Reason surfaces when no machines exist for the MachineDeployment. + MachineDeploymentMachinesUpToDateNoReplicasV1Beta2Reason = NoReplicasV1Beta2Reason + + // MachineDeploymentMachinesUpToDateInternalErrorV1Beta2Reason surfaces unexpected failures when listing machines + // or aggregating status. + MachineDeploymentMachinesUpToDateInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason +) + +// MachineDeployment's ScalingUp condition and corresponding reasons that will be used in v1Beta2 API version. +const ( + // MachineDeploymentScalingUpV1Beta2Condition is true if available replicas < desired replicas. + MachineDeploymentScalingUpV1Beta2Condition = ScalingUpV1Beta2Condition + + // MachineDeploymentScalingUpV1Beta2Reason surfaces when actual replicas < desired replicas. + MachineDeploymentScalingUpV1Beta2Reason = ScalingUpV1Beta2Reason + + // MachineDeploymentNotScalingUpV1Beta2Reason surfaces when actual replicas >= desired replicas. + MachineDeploymentNotScalingUpV1Beta2Reason = NotScalingUpV1Beta2Reason + + // MachineDeploymentScalingUpInternalErrorV1Beta2Reason surfaces unexpected failures when listing machines. + MachineDeploymentScalingUpInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason + + // MachineDeploymentScalingUpWaitingForReplicasSetV1Beta2Reason surfaces when the .spec.replicas + // field of the MachineDeployment is not set. + MachineDeploymentScalingUpWaitingForReplicasSetV1Beta2Reason = WaitingForReplicasSetV1Beta2Reason +) + +// MachineDeployment's ScalingDown condition and corresponding reasons that will be used in v1Beta2 API version. +const ( + // MachineDeploymentScalingDownV1Beta2Condition is true if replicas > desired replicas. + MachineDeploymentScalingDownV1Beta2Condition = ScalingDownV1Beta2Condition + + // MachineDeploymentScalingDownV1Beta2Reason surfaces when actual replicas > desired replicas. + MachineDeploymentScalingDownV1Beta2Reason = ScalingDownV1Beta2Reason + + // MachineDeploymentNotScalingDownV1Beta2Reason surfaces when actual replicas <= desired replicas. + MachineDeploymentNotScalingDownV1Beta2Reason = NotScalingDownV1Beta2Reason + + // MachineDeploymentScalingDownInternalErrorV1Beta2Reason surfaces unexpected failures when listing machines. + MachineDeploymentScalingDownInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason + + // MachineDeploymentScalingDownWaitingForReplicasSetV1Beta2Reason surfaces when the .spec.replicas + // field of the MachineDeployment is not set. + MachineDeploymentScalingDownWaitingForReplicasSetV1Beta2Reason = WaitingForReplicasSetV1Beta2Reason +) + +// MachineDeployment's Remediating condition and corresponding reasons that will be used in v1Beta2 API version. +const ( + // MachineDeploymentRemediatingV1Beta2Condition details about ongoing remediation of the controlled machines, if any. + MachineDeploymentRemediatingV1Beta2Condition = RemediatingV1Beta2Condition + + // MachineDeploymentRemediatingV1Beta2Reason surfaces when the MachineDeployment has at least one machine with HealthCheckSucceeded set to false + // and with the OwnerRemediated condition set to false. + MachineDeploymentRemediatingV1Beta2Reason = RemediatingV1Beta2Reason + + // MachineDeploymentNotRemediatingV1Beta2Reason surfaces when the MachineDeployment does not have any machine with HealthCheckSucceeded set to false + // and with the OwnerRemediated condition set to false. + MachineDeploymentNotRemediatingV1Beta2Reason = NotRemediatingV1Beta2Reason + + // MachineDeploymentRemediatingInternalErrorV1Beta2Reason surfaces unexpected failures when computing the Remediating condition. + MachineDeploymentRemediatingInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason +) + +// MachineDeployment's Deleting condition and corresponding reasons that will be used in v1Beta2 API version. +const ( + // MachineDeploymentDeletingV1Beta2Condition surfaces details about ongoing deletion of the controlled machines. + MachineDeploymentDeletingV1Beta2Condition = DeletingV1Beta2Condition + + // MachineDeploymentDeletingDeletionTimestampNotSetV1Beta2Reason surfaces when the MachineDeployment is not deleting because the + // DeletionTimestamp is not set. + MachineDeploymentDeletingDeletionTimestampNotSetV1Beta2Reason = DeletionTimestampNotSetV1Beta2Reason + + // MachineDeploymentDeletingDeletionTimestampSetV1Beta2Reason surfaces when the MachineDeployment is deleting because the + // DeletionTimestamp is set. + MachineDeploymentDeletingDeletionTimestampSetV1Beta2Reason = DeletionTimestampSetV1Beta2Reason + + // MachineDeploymentDeletingInternalErrorV1Beta2Reason surfaces unexpected failures when deleting a MachineDeployment. + MachineDeploymentDeletingInternalErrorV1Beta2Reason = InternalErrorV1Beta2Reason +) + // ANCHOR: MachineDeploymentSpec // MachineDeploymentSpec defines the desired state of MachineDeployment. diff --git a/api/v1beta1/v1beta2_condition_consts.go b/api/v1beta1/v1beta2_condition_consts.go index bc8561c70f7a..df7146608bad 100644 --- a/api/v1beta1/v1beta2_condition_consts.go +++ b/api/v1beta1/v1beta2_condition_consts.go @@ -90,6 +90,9 @@ const ( // AvailableV1Beta2Reason applies to a condition surfacing object availability. AvailableV1Beta2Reason = "Available" + // NotAvailableV1Beta2Reason applies to a condition surfacing object not satisfying availability per-requisites. + NotAvailableV1Beta2Reason = "NotAvailable" + // ScalingUpV1Beta2Reason surfaces when an object is scaling up. ScalingUpV1Beta2Reason = "ScalingUp" @@ -170,32 +173,6 @@ const ( InspectionFailedV1Beta2Reason = "InspectionFailed" ) -// Conditions that will be used for the MachineDeployment object in v1Beta2 API version. -const ( - // MachineDeploymentAvailableV1Beta2Condition is true if the MachineDeployment is not deleted, and it has minimum - // availability according to parameters specified in the deployment strategy, e.g. If using RollingUpgrade strategy, - // availableReplicas must be greater or equal than desired replicas - MaxUnavailable replicas. - MachineDeploymentAvailableV1Beta2Condition = AvailableV1Beta2Condition - - // MachineDeploymentMachinesReadyV1Beta2Condition surfaces detail of issues on the controlled machines, if any. - MachineDeploymentMachinesReadyV1Beta2Condition = MachinesReadyV1Beta2Condition - - // MachineDeploymentMachinesUpToDateV1Beta2Condition surfaces details of controlled machines not up to date, if any. - MachineDeploymentMachinesUpToDateV1Beta2Condition = MachinesUpToDateV1Beta2Condition - - // MachineDeploymentScalingUpV1Beta2Condition is true if available replicas < desired replicas. - MachineDeploymentScalingUpV1Beta2Condition = ScalingUpV1Beta2Condition - - // MachineDeploymentScalingDownV1Beta2Condition is true if replicas > desired replicas. - MachineDeploymentScalingDownV1Beta2Condition = ScalingDownV1Beta2Condition - - // MachineDeploymentRemediatingV1Beta2Condition details about ongoing remediation of the controlled machines, if any. - MachineDeploymentRemediatingV1Beta2Condition = RemediatingV1Beta2Condition - - // MachineDeploymentDeletingV1Beta2Condition surfaces details about ongoing deletion of the controlled machines. - MachineDeploymentDeletingV1Beta2Condition = DeletingV1Beta2Condition -) - // Conditions that will be used for the Cluster object in v1Beta2 API version. const ( // ClusterAvailableV1Beta2Condition is true if the Cluster's is not deleted, and RemoteConnectionProbe, InfrastructureReady, diff --git a/controlplane/kubeadm/api/v1beta1/v1beta2_condition_consts.go b/controlplane/kubeadm/api/v1beta1/v1beta2_condition_consts.go index 3b0db9bbc186..96d72001fd2f 100644 --- a/controlplane/kubeadm/api/v1beta1/v1beta2_condition_consts.go +++ b/controlplane/kubeadm/api/v1beta1/v1beta2_condition_consts.go @@ -154,11 +154,11 @@ const ( // Note: KubeadmControlPlane only remediates machines with HealthCheckSucceeded set to false and with the OwnerRemediated condition set to false. KubeadmControlPlaneRemediatingV1Beta2Condition = clusterv1.RemediatingV1Beta2Condition - // KubeadmControlPlaneRemediatingV1Beta2Reason surfaces when kcp has at least one machine with HealthCheckSucceeded set to false + // KubeadmControlPlaneRemediatingV1Beta2Reason surfaces when the KubeadmControlPlane has at least one machine with HealthCheckSucceeded set to false // and with the OwnerRemediated condition set to false. KubeadmControlPlaneRemediatingV1Beta2Reason = clusterv1.RemediatingV1Beta2Reason - // KubeadmControlPlaneNotRemediatingV1Beta2Reason surfaces when kcp does not have any machine with HealthCheckSucceeded set to false + // KubeadmControlPlaneNotRemediatingV1Beta2Reason surfaces when the KubeadmControlPlane does not have any machine with HealthCheckSucceeded set to false // and with the OwnerRemediated condition set to false. KubeadmControlPlaneNotRemediatingV1Beta2Reason = clusterv1.NotRemediatingV1Beta2Reason diff --git a/controlplane/kubeadm/internal/controllers/status.go b/controlplane/kubeadm/internal/controllers/status.go index 721f4b1abe48..0ab1d370f911 100644 --- a/controlplane/kubeadm/internal/controllers/status.go +++ b/controlplane/kubeadm/internal/controllers/status.go @@ -37,8 +37,8 @@ import ( clog "sigs.k8s.io/cluster-api/util/log" ) -// updateStatus is called after every reconcilitation loop in a defer statement to always make sure we have the -// resource status subresourcs up-to-date. +// updateStatus is called after every reconciliation loop in a defer statement to always make sure we have the +// KubeadmControlPlane status up-to-date. func (r *KubeadmControlPlaneReconciler) updateStatus(ctx context.Context, controlPlane *internal.ControlPlane) error { selector := collections.ControlPlaneSelectorForCluster(controlPlane.Cluster.Name) // Copy label selector to its status counterpart in string format. @@ -380,7 +380,6 @@ func setRemediatingCondition(ctx context.Context, kcp *controlplanev1.KubeadmCon return } - // TODO: Bring together externally remediated machines and owner remediated machines remediatingCondition, err := v1beta2conditions.NewAggregateCondition( machinesToBeRemediated.UnsortedList(), clusterv1.MachineOwnerRemediatedV1Beta2Condition, v1beta2conditions.TargetConditionType(controlplanev1.KubeadmControlPlaneRemediatingV1Beta2Condition), diff --git a/internal/controllers/machine/machine_controller.go b/internal/controllers/machine/machine_controller.go index c4c7beafefc3..fa84ca3551d9 100644 --- a/internal/controllers/machine/machine_controller.go +++ b/internal/controllers/machine/machine_controller.go @@ -225,7 +225,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re } defer func() { - r.reconcileStatus(ctx, s) + r.updateStatus(ctx, s) // Always attempt to patch the object and status after each reconciliation. // Patch ObservedGeneration only if the reconciliation completed successfully diff --git a/internal/controllers/machine/machine_controller_status.go b/internal/controllers/machine/machine_controller_status.go index d063402584bd..c641f624700d 100644 --- a/internal/controllers/machine/machine_controller_status.go +++ b/internal/controllers/machine/machine_controller_status.go @@ -37,13 +37,13 @@ import ( v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2" ) -// reconcileStatus reconciles Machine's status during the entire lifecycle of the machine. +// updateStatus update Machine's status. // This implies that the code in this function should account for several edge cases e.g. machine being partially provisioned, // machine being partially deleted but also for running machines being disrupted e.g. by deleting the node. // Additionally, this func should ensure that the conditions managed by this controller are always set in order to // comply with the recommendation in the Kubernetes API guidelines. // Note: v1beta1 conditions are not managed by this func. -func (r *Reconciler) reconcileStatus(ctx context.Context, s *scope) { +func (r *Reconciler) updateStatus(ctx context.Context, s *scope) { // Update status from the Bootstrap Config external resource. // Note: some of the status fields derived from the Bootstrap Config are managed in reconcileBootstrap, e.g. status.BootstrapReady, etc. // here we are taking care only of the delta (condition). diff --git a/internal/controllers/machinedeployment/machinedeployment_controller.go b/internal/controllers/machinedeployment/machinedeployment_controller.go index 820e5f551ce6..f3ec111f1a1f 100644 --- a/internal/controllers/machinedeployment/machinedeployment_controller.go +++ b/internal/controllers/machinedeployment/machinedeployment_controller.go @@ -154,7 +154,16 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re return ctrl.Result{}, err } + s := &scope{ + machineDeployment: deployment, + cluster: cluster, + } + defer func() { + if err := r.updateStatus(ctx, s); err != nil { + reterr = kerrors.NewAggregate([]error{reterr, err}) + } + // Always attempt to patch the object and status after each reconciliation. // Patch ObservedGeneration only if the reconciliation completed successfully patchOpts := []patch.Option{} @@ -168,14 +177,19 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re // Handle deletion reconciliation loop. if !deployment.DeletionTimestamp.IsZero() { - return ctrl.Result{}, r.reconcileDelete(ctx, deployment) + return ctrl.Result{}, r.reconcileDelete(ctx, s) } - err = r.reconcile(ctx, cluster, deployment) - if err != nil { - r.recorder.Eventf(deployment, corev1.EventTypeWarning, "ReconcileError", "%v", err) - } - return ctrl.Result{}, err + return ctrl.Result{}, r.reconcile(ctx, s) +} + +type scope struct { + machineDeployment *clusterv1.MachineDeployment + cluster *clusterv1.Cluster + machineSets []*clusterv1.MachineSet + bootstrapTemplateNotFound bool + infrastructureTemplateNotFound bool + getAndAdoptMachineSetsForDeploymentSucceeded bool } func patchMachineDeployment(ctx context.Context, patchHelper *patch.Helper, md *clusterv1.MachineDeployment, options ...patch.Option) error { @@ -196,10 +210,13 @@ func patchMachineDeployment(ctx context.Context, patchHelper *patch.Helper, md * return patchHelper.Patch(ctx, md, options...) } -func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster, md *clusterv1.MachineDeployment) error { +func (r *Reconciler) reconcile(ctx context.Context, s *scope) error { log := ctrl.LoggerFrom(ctx) log.V(4).Info("Reconcile MachineDeployment") + md := s.machineDeployment + cluster := s.cluster + // Reconcile and retrieve the Cluster object. if md.Labels == nil { md.Labels = make(map[string]string) @@ -221,19 +238,11 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster, UID: cluster.UID, })) - // Make sure to reconcile the external infrastructure reference. - if err := reconcileExternalTemplateReference(ctx, r.Client, cluster, &md.Spec.Template.Spec.InfrastructureRef); err != nil { + if err := r.getTemplatesAndSetOwner(ctx, s); err != nil { return err } - // Make sure to reconcile the external bootstrap reference, if any. - if md.Spec.Template.Spec.Bootstrap.ConfigRef != nil { - if err := reconcileExternalTemplateReference(ctx, r.Client, cluster, md.Spec.Template.Spec.Bootstrap.ConfigRef); err != nil { - return err - } - } - msList, err := r.getAndAdoptMachineSetsForDeployment(ctx, md) - if err != nil { + if err := r.getAndAdoptMachineSetsForDeployment(ctx, s); err != nil { return err } @@ -242,8 +251,8 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster, // This logic is needed to add the `cluster.x-k8s.io/deployment-name` label to MachineSets // which were created before the `cluster.x-k8s.io/deployment-name` label was added // to all MachineSets created by a MachineDeployment or if a user manually removed the label. - for idx := range msList { - machineSet := msList[idx] + for idx := range s.machineSets { + machineSet := s.machineSets[idx] if name, ok := machineSet.Labels[clusterv1.MachineDeploymentNameLabel]; ok && name == md.Name { continue } @@ -264,15 +273,15 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster, // "capi-machinedeployment" and then we would not be able to e.g. drop labels and annotations. // Note: We are cleaning up managed fields for all MachineSets, so we're able to remove this code in a few // Cluster API releases. If we do this only for selected MachineSets, we would have to keep this code forever. - for idx := range msList { - machineSet := msList[idx] + for idx := range s.machineSets { + machineSet := s.machineSets[idx] if err := ssa.CleanUpManagedFieldsForSSAAdoption(ctx, r.Client, machineSet, machineDeploymentManagerName); err != nil { return errors.Wrapf(err, "failed to clean up managedFields of MachineSet %s", klog.KObj(machineSet)) } } if md.Spec.Paused { - return r.sync(ctx, md, msList) + return r.sync(ctx, md, s.machineSets) } if md.Spec.Strategy == nil { @@ -283,33 +292,32 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster, if md.Spec.Strategy.RollingUpdate == nil { return errors.Errorf("missing MachineDeployment settings for strategy type: %s", md.Spec.Strategy.Type) } - return r.rolloutRolling(ctx, md, msList) + return r.rolloutRolling(ctx, md, s.machineSets) } if md.Spec.Strategy.Type == clusterv1.OnDeleteMachineDeploymentStrategyType { - return r.rolloutOnDelete(ctx, md, msList) + return r.rolloutOnDelete(ctx, md, s.machineSets) } return errors.Errorf("unexpected deployment strategy type: %s", md.Spec.Strategy.Type) } -func (r *Reconciler) reconcileDelete(ctx context.Context, md *clusterv1.MachineDeployment) error { +func (r *Reconciler) reconcileDelete(ctx context.Context, s *scope) error { log := ctrl.LoggerFrom(ctx) - msList, err := r.getAndAdoptMachineSetsForDeployment(ctx, md) - if err != nil { + if err := r.getAndAdoptMachineSetsForDeployment(ctx, s); err != nil { return err } // If all the descendant machinesets are deleted, then remove the machinedeployment's finalizer. - if len(msList) == 0 { - controllerutil.RemoveFinalizer(md, clusterv1.MachineDeploymentFinalizer) + if len(s.machineSets) == 0 { + controllerutil.RemoveFinalizer(s.machineDeployment, clusterv1.MachineDeploymentFinalizer) return nil } - log.Info("Waiting for MachineSets to be deleted", "MachineSets", clog.ObjNamesString(msList)) + log.Info("Waiting for MachineSets to be deleted", "MachineSets", clog.ObjNamesString(s.machineSets)) // else delete owned machinesets. - for _, ms := range msList { + for _, ms := range s.machineSets { if ms.DeletionTimestamp.IsZero() { log.Info("Deleting MachineSet", "MachineSet", klog.KObj(ms)) if err := r.Client.Delete(ctx, ms); err != nil && !apierrors.IsNotFound(err) { @@ -322,13 +330,15 @@ func (r *Reconciler) reconcileDelete(ctx context.Context, md *clusterv1.MachineD } // getAndAdoptMachineSetsForDeployment returns a list of MachineSets associated with a MachineDeployment. -func (r *Reconciler) getAndAdoptMachineSetsForDeployment(ctx context.Context, md *clusterv1.MachineDeployment) ([]*clusterv1.MachineSet, error) { +func (r *Reconciler) getAndAdoptMachineSetsForDeployment(ctx context.Context, s *scope) error { log := ctrl.LoggerFrom(ctx) + md := s.machineDeployment + // List all MachineSets to find those we own but that no longer match our selector. machineSets := &clusterv1.MachineSetList{} if err := r.Client.List(ctx, machineSets, client.InNamespace(md.Namespace)); err != nil { - return nil, err + return err } filtered := make([]*clusterv1.MachineSet, 0, len(machineSets.Items)) @@ -372,7 +382,9 @@ func (r *Reconciler) getAndAdoptMachineSetsForDeployment(ctx context.Context, md filtered = append(filtered, ms) } - return filtered, nil + s.getAndAdoptMachineSetsForDeploymentSucceeded = true + s.machineSets = filtered + return nil } // adoptOrphan sets the MachineDeployment as a controller OwnerReference to the MachineSet. @@ -447,13 +459,38 @@ func (r *Reconciler) MachineSetToDeployments(ctx context.Context, o client.Objec return result } +// getTemplatesAndSetOwner reconciles the templates referenced by a MachineDeployment ensuring they are owned by the Cluster. +func (r *Reconciler) getTemplatesAndSetOwner(ctx context.Context, s *scope) error { + md := s.machineDeployment + cluster := s.cluster + + // Make sure to reconcile the external infrastructure reference. + if err := reconcileExternalTemplateReference(ctx, r.Client, cluster, &md.Spec.Template.Spec.InfrastructureRef); err != nil { + if !apierrors.IsNotFound(err) { + return err + } + s.infrastructureTemplateNotFound = true + } + // Make sure to reconcile the external bootstrap reference, if any. + if md.Spec.Template.Spec.Bootstrap.ConfigRef != nil { + if err := reconcileExternalTemplateReference(ctx, r.Client, cluster, md.Spec.Template.Spec.Bootstrap.ConfigRef); err != nil { + if !apierrors.IsNotFound(err) { + return err + } + s.bootstrapTemplateNotFound = true + } + } + return nil +} + func reconcileExternalTemplateReference(ctx context.Context, c client.Client, cluster *clusterv1.Cluster, ref *corev1.ObjectReference) error { if !strings.HasSuffix(ref.Kind, clusterv1.TemplateSuffix) { return nil } if err := utilconversion.UpdateReferenceAPIContract(ctx, c, ref); err != nil { - return err + // We want to surface the NotFound error only for the referenced object, so we use a generic error in case CRD is not found. + return errors.New(err.Error()) } obj, err := external.Get(ctx, c, ref, cluster.Namespace) diff --git a/internal/controllers/machinedeployment/machinedeployment_controller_test.go b/internal/controllers/machinedeployment/machinedeployment_controller_test.go index 2b5aecad8020..f4faa52b631b 100644 --- a/internal/controllers/machinedeployment/machinedeployment_controller_test.go +++ b/internal/controllers/machinedeployment/machinedeployment_controller_test.go @@ -968,11 +968,14 @@ func TestGetMachineSetsForDeployment(t *testing.T) { recorder: record.NewFakeRecorder(32), } - got, err := r.getAndAdoptMachineSetsForDeployment(ctx, &tc.machineDeployment) + s := &scope{ + machineDeployment: &tc.machineDeployment, + } + err := r.getAndAdoptMachineSetsForDeployment(ctx, s) g.Expect(err).ToNot(HaveOccurred()) - g.Expect(got).To(HaveLen(len(tc.expected))) - for idx, res := range got { + g.Expect(s.machineSets).To(HaveLen(len(tc.expected))) + for idx, res := range s.machineSets { g.Expect(res.Name).To(Equal(tc.expected[idx].Name)) g.Expect(res.Namespace).To(Equal(tc.expected[idx].Namespace)) } @@ -1065,7 +1068,10 @@ func TestReconciler_reconcileDelete(t *testing.T) { recorder: record.NewFakeRecorder(32), } - err := r.reconcileDelete(ctx, tt.machineDeployment) + s := &scope{ + machineDeployment: tt.machineDeployment, + } + err := r.reconcileDelete(ctx, s) if tt.expectError { g.Expect(err).To(HaveOccurred()) } else { diff --git a/internal/controllers/machinedeployment/machinedeployment_status.go b/internal/controllers/machinedeployment/machinedeployment_status.go new file mode 100644 index 000000000000..ef078e73a1cd --- /dev/null +++ b/internal/controllers/machinedeployment/machinedeployment_status.go @@ -0,0 +1,502 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package machinedeployment + +import ( + "context" + "fmt" + "sort" + "strings" + "time" + + "github.com/pkg/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/internal/controllers/machinedeployment/mdutil" + "sigs.k8s.io/cluster-api/util/collections" + v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2" + clog "sigs.k8s.io/cluster-api/util/log" +) + +func (r *Reconciler) updateStatus(ctx context.Context, s *scope) (retErr error) { + // Get all Machines controlled by this MachineDeployment. + var machines, machinesToBeRemediated, unHealthyMachines collections.Machines + if selectorMap, err := metav1.LabelSelectorAsMap(&s.machineDeployment.Spec.Selector); err == nil { + machineList := &clusterv1.MachineList{} + if err := r.Client.List(ctx, machineList, client.InNamespace(s.machineDeployment.Namespace), client.MatchingLabels(selectorMap)); err != nil { + retErr = errors.Wrap(err, "failed to list machines") + } + machines = collections.FromMachineList(machineList) + machinesToBeRemediated = machines.Filter(collections.IsUnhealthyAndOwnerRemediated) + unHealthyMachines = machines.Filter(collections.IsUnhealthy) + } else { + retErr = errors.Wrap(err, "failed to convert label selector to a map") + } + + // If the controller failed to read MachineSets, do not update replica counters. + if !s.getAndAdoptMachineSetsForDeploymentSucceeded { + setReplicas(s.machineDeployment, s.machineSets) + } + + setAvailableCondition(ctx, s.machineDeployment, s.getAndAdoptMachineSetsForDeploymentSucceeded) + + setScalingUpCondition(ctx, s.machineDeployment, s.machineSets, s.bootstrapTemplateNotFound, s.infrastructureTemplateNotFound, s.getAndAdoptMachineSetsForDeploymentSucceeded) + setScalingDownCondition(ctx, s.machineDeployment, s.machineSets, machines, s.getAndAdoptMachineSetsForDeploymentSucceeded) + + setMachinesReadyCondition(ctx, s.machineDeployment, machines) + setMachinesUpToDateCondition(ctx, s.machineDeployment, machines) + + setRemediatingCondition(ctx, s.machineDeployment, machinesToBeRemediated, unHealthyMachines) + + setDeletingCondition(ctx, s.machineDeployment, s.machineSets, machines, s.getAndAdoptMachineSetsForDeploymentSucceeded) + + return retErr +} + +// setReplicas sets replicas in the v1beta2 status. +// Note: this controller computes replicas several time during a reconcile, because those counters are +// used by low level operations to take decisions, but also those decisions might impact the very same the counters +// e.g. scale up MachinesSet is based on counters and it can change the value on MachineSet's replica number; +// as a consequence it is required to compute the counters again before calling scale down machine sets, +// and again to before computing the overall availability of the Machine deployment. +func setReplicas(machineDeployment *clusterv1.MachineDeployment, machineSets []*clusterv1.MachineSet) { + if machineDeployment.Status.V1Beta2 == nil { + machineDeployment.Status.V1Beta2 = &clusterv1.MachineDeploymentV1Beta2Status{} + } + + machineDeployment.Status.V1Beta2.ReadyReplicas = mdutil.GetV1Beta2ReadyReplicaCountForMachineSets(machineSets) + machineDeployment.Status.V1Beta2.AvailableReplicas = mdutil.GetV1Beta2AvailableReplicaCountForMachineSets(machineSets) + machineDeployment.Status.V1Beta2.UpToDateReplicas = mdutil.GetV1Beta2UptoDateReplicaCountForMachineSets(machineSets) +} + +func setAvailableCondition(_ context.Context, machineDeployment *clusterv1.MachineDeployment, getAndAdoptMachineSetsForDeploymentSucceeded bool) { + // If we got unexpected errors in listing the machine sets (this should never happen), surface them. + if !getAndAdoptMachineSetsForDeploymentSucceeded { + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentAvailableV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentAvailableInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }) + return + } + + // Surface if .spec.replicas is not yet set (this should never happen). + if machineDeployment.Spec.Replicas == nil { + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentAvailableV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentAvailableWaitingForReplicasSetV1Beta2Reason, + }) + return + } + + // Surface if .status.v1beta2.availableReplicas is not yet set. + if machineDeployment.Status.V1Beta2 == nil || machineDeployment.Status.V1Beta2.AvailableReplicas == nil { + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentAvailableV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentAvailableWaitingForAvailableReplicasSetV1Beta2Reason, + }) + return + } + + // minReplicasNeeded will be equal to md.Spec.Replicas when the strategy is not RollingUpdateMachineDeploymentStrategyType. + minReplicasNeeded := *(machineDeployment.Spec.Replicas) - mdutil.MaxUnavailable(*machineDeployment) + + if *machineDeployment.Status.V1Beta2.AvailableReplicas >= minReplicasNeeded { + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentAvailableV1Beta2Reason, + }) + return + } + + message := fmt.Sprintf("%d available replicas, at least %d required", *machineDeployment.Status.V1Beta2.AvailableReplicas, minReplicasNeeded) + if machineDeployment.Spec.Strategy != nil && mdutil.IsRollingUpdate(machineDeployment) && machineDeployment.Spec.Strategy.RollingUpdate != nil { + message += fmt.Sprintf(" (spec.strategy.rollout.maxUnavailable is %s)", machineDeployment.Spec.Strategy.RollingUpdate.MaxUnavailable) + } + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentNotAvailableV1Beta2Reason, + Message: message, + }) +} + +func setScalingUpCondition(_ context.Context, machineDeployment *clusterv1.MachineDeployment, machineSets []*clusterv1.MachineSet, bootstrapObjectNotFound, infrastructureObjectNotFound, getAndAdoptMachineSetsForDeploymentSucceeded bool) { + // If we got unexpected errors in listing the machine sets (this should never happen), surface them. + if !getAndAdoptMachineSetsForDeploymentSucceeded { + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingUpV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentScalingUpInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }) + return + } + + // Surface if .spec.replicas is not yet set (this should never happen). + if machineDeployment.Spec.Replicas == nil { + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingUpV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentScalingUpWaitingForReplicasSetV1Beta2Reason, + }) + return + } + + desiredReplicas := *machineDeployment.Spec.Replicas + if !machineDeployment.DeletionTimestamp.IsZero() { + desiredReplicas = 0 + } + currentReplicas := mdutil.GetReplicaCountForMachineSets(machineSets) + + missingReferencesMessage := calculateMissingReferencesMessage(machineDeployment, bootstrapObjectNotFound, infrastructureObjectNotFound) + + if currentReplicas >= desiredReplicas { + var message string + if missingReferencesMessage != "" { + message = fmt.Sprintf("Scaling up would be blocked %s", missingReferencesMessage) + } + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingUpV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentNotScalingUpV1Beta2Reason, + Message: message, + }) + return + } + + // Scaling up. + message := fmt.Sprintf("Scaling up from %d to %d replicas", currentReplicas, desiredReplicas) + if missingReferencesMessage != "" { + message += fmt.Sprintf(" is blocked %s", missingReferencesMessage) + } + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingUpV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentScalingUpV1Beta2Reason, + Message: message, + }) +} + +func setScalingDownCondition(_ context.Context, machineDeployment *clusterv1.MachineDeployment, machineSets []*clusterv1.MachineSet, machines collections.Machines, getAndAdoptMachineSetsForDeploymentSucceeded bool) { + // If we got unexpected errors in listing the machines sets (this should never happen), surface them. + if !getAndAdoptMachineSetsForDeploymentSucceeded { + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingDownV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentScalingDownInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }) + return + } + + // Surface if .spec.replicas is not yet set (this should never happen). + if machineDeployment.Spec.Replicas == nil { + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingDownV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentScalingDownWaitingForReplicasSetV1Beta2Reason, + }) + return + } + + desiredReplicas := *machineDeployment.Spec.Replicas + if !machineDeployment.DeletionTimestamp.IsZero() { + desiredReplicas = 0 + } + currentReplicas := mdutil.GetReplicaCountForMachineSets(machineSets) + + // Scaling down. + if currentReplicas > desiredReplicas { + message := fmt.Sprintf("Scaling down from %d to %d replicas", currentReplicas, desiredReplicas) + staleMessage := aggregateStaleMachines(machines) + if staleMessage != "" { + message += fmt.Sprintf(" and %s", staleMessage) + } + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingDownV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentScalingDownV1Beta2Reason, + Message: message, + }) + return + } + + // Not scaling down. + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingDownV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentNotScalingDownV1Beta2Reason, + }) +} + +func setMachinesReadyCondition(ctx context.Context, machineDeployment *clusterv1.MachineDeployment, machines collections.Machines) { + log := ctrl.LoggerFrom(ctx) + // If we got unexpected errors in listing the machines (this should never happen), surface them. + if machines == nil { + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentMachinesReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentMachinesReadyInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }) + return + } + + if len(machines) == 0 { + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentMachinesReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentMachinesReadyNoReplicasV1Beta2Reason, + }) + return + } + + readyCondition, err := v1beta2conditions.NewAggregateCondition( + machines.UnsortedList(), clusterv1.MachineReadyV1Beta2Condition, + v1beta2conditions.TargetConditionType(clusterv1.MachineDeploymentMachinesReadyV1Beta2Condition), + ) + if err != nil { + log.Error(err, "Failed to aggregate Machine's Ready conditions") + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentMachinesReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentMachinesReadyInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }) + return + } + + v1beta2conditions.Set(machineDeployment, *readyCondition) +} + +func setMachinesUpToDateCondition(ctx context.Context, machineDeployment *clusterv1.MachineDeployment, machines collections.Machines) { + log := ctrl.LoggerFrom(ctx) + // If we got unexpected errors in listing the machines (this should never happen), surface them. + if machines == nil { + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentMachinesUpToDateInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }) + return + } + + if len(machines) == 0 { + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentMachinesUpToDateNoReplicasV1Beta2Reason, + }) + return + } + + upToDateCondition, err := v1beta2conditions.NewAggregateCondition( + machines.UnsortedList(), clusterv1.MachineUpToDateV1Beta2Condition, + v1beta2conditions.TargetConditionType(clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition), + ) + if err != nil { + log.Error(err, "Failed to aggregate Machine's UpToDate conditions") + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentMachinesUpToDateInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }) + return + } + + v1beta2conditions.Set(machineDeployment, *upToDateCondition) +} + +func setRemediatingCondition(ctx context.Context, machineDeployment *clusterv1.MachineDeployment, machinesToBeRemediated, unhealthyMachines collections.Machines) { + if machinesToBeRemediated == nil || unhealthyMachines == nil { + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentRemediatingV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentRemediatingInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }) + return + } + + if len(machinesToBeRemediated) == 0 { + message := aggregateUnhealthyMachines(unhealthyMachines) + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentRemediatingV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentNotRemediatingV1Beta2Reason, + Message: message, + }) + return + } + + remediatingCondition, err := v1beta2conditions.NewAggregateCondition( + machinesToBeRemediated.UnsortedList(), clusterv1.MachineOwnerRemediatedV1Beta2Condition, + v1beta2conditions.TargetConditionType(clusterv1.MachineDeploymentRemediatingV1Beta2Condition), + ) + if err != nil { + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentRemediatingV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentRemediatingInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }) + + log := ctrl.LoggerFrom(ctx) + log.Error(err, fmt.Sprintf("Failed to aggregate Machine's %s conditions", clusterv1.MachineOwnerRemediatedV1Beta2Condition)) + return + } + + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: remediatingCondition.Type, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentRemediatingV1Beta2Reason, + Message: remediatingCondition.Message, + }) +} + +func setDeletingCondition(_ context.Context, machineDeployment *clusterv1.MachineDeployment, machineSets []*clusterv1.MachineSet, machines collections.Machines, getAndAdoptMachineSetsForDeploymentSucceeded bool) { + // If we got unexpected errors in listing the machines sets or machines (this should never happen), surface them. + if !getAndAdoptMachineSetsForDeploymentSucceeded || machines == nil { + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentDeletingV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentDeletingInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }) + return + } + + if machineDeployment.DeletionTimestamp.IsZero() { + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentDeletingV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentDeletingDeletionTimestampNotSetV1Beta2Reason, + }) + return + } + + message := "" + if len(machines) > 0 { + message = fmt.Sprintf("Deleting %d replicas", len(machines)) + staleMessage := aggregateStaleMachines(machines) + if staleMessage != "" { + message += fmt.Sprintf(" and %s", staleMessage) + } + } + if len(machines) == 0 && len(machineSets) > 0 { + // Note: this should not happen or happen for a very short time while the finalizer is removed. + message = fmt.Sprintf("Deleting %d MachineSets", len(machineSets)) + } + v1beta2conditions.Set(machineDeployment, metav1.Condition{ + Type: clusterv1.MachineDeploymentDeletingV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentDeletingDeletionTimestampSetV1Beta2Reason, + Message: message, + }) +} + +func calculateMissingReferencesMessage(machineDeployment *clusterv1.MachineDeployment, bootstrapTemplateNotFound, infraMachineTemplateNotFound bool) string { + missingObjects := []string{} + if bootstrapTemplateNotFound { + missingObjects = append(missingObjects, machineDeployment.Spec.Template.Spec.Bootstrap.ConfigRef.Kind) + } + if infraMachineTemplateNotFound { + missingObjects = append(missingObjects, machineDeployment.Spec.Template.Spec.InfrastructureRef.Kind) + } + + if len(missingObjects) == 0 { + return "" + } + + if len(missingObjects) == 1 { + return fmt.Sprintf("because %s does not exist", missingObjects[0]) + } + + return fmt.Sprintf("because %s do not exist", strings.Join(missingObjects, " and ")) +} + +func aggregateStaleMachines(machines collections.Machines) string { + if len(machines) == 0 { + return "" + } + + machineNames := []string{} + for _, machine := range machines { + if !machine.GetDeletionTimestamp().IsZero() && time.Since(machine.GetDeletionTimestamp().Time) > time.Minute*30 { + machineNames = append(machineNames, machine.GetName()) + } + } + + if len(machineNames) == 0 { + return "" + } + + message := "Machine" + if len(machineNames) > 1 { + message += "s" + } + + sort.Strings(machineNames) + message += " " + clog.ListToString(machineNames, func(s string) string { return s }, 3) + + if len(machineNames) == 1 { + message += " is " + } else { + message += " are " + } + message += "in deletion since more than 30m" + + return message +} + +func aggregateUnhealthyMachines(machines collections.Machines) string { + machineNames := []string{} + for _, machine := range machines { + machineNames = append(machineNames, machine.GetName()) + } + + if len(machineNames) == 0 { + return "" + } + + message := "Machine" + if len(machineNames) > 1 { + message += "s" + } + + sort.Strings(machineNames) + message += " " + clog.ListToString(machineNames, func(s string) string { return s }, 3) + + if len(machineNames) == 1 { + message += " is " + } else { + message += " are " + } + message += "not healthy (not to be remediated by KCP)" + + return message +} diff --git a/internal/controllers/machinedeployment/machinedeployment_status_test.go b/internal/controllers/machinedeployment/machinedeployment_status_test.go new file mode 100644 index 000000000000..1b9f921ea7b7 --- /dev/null +++ b/internal/controllers/machinedeployment/machinedeployment_status_test.go @@ -0,0 +1,1158 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package machinedeployment + +import ( + "testing" + "time" + + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/utils/ptr" + + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util/collections" + v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2" +) + +func Test_setReplicas(t *testing.T) { + tests := []struct { + name string + machineSets []*clusterv1.MachineSet + expectReadyReplicas *int32 + expectAvailableReplicas *int32 + expectUpToDateReplicas *int32 + }{ + { + name: "No MachineSets", + machineSets: nil, + expectReadyReplicas: nil, + expectAvailableReplicas: nil, + expectUpToDateReplicas: nil, + }, + { + name: "MachineSets without replicas set", + machineSets: []*clusterv1.MachineSet{ + fakeMachineSet("ms1"), + }, + expectReadyReplicas: nil, + expectAvailableReplicas: nil, + expectUpToDateReplicas: nil, + }, + { + name: "MachineSets with replicas set", + machineSets: []*clusterv1.MachineSet{ + fakeMachineSet("ms1", withStatusV1beta2ReadyReplicas(5), withStatusV1beta2AvailableReplicas(3), withStatusV1beta2UpToDateReplicas(3)), + fakeMachineSet("ms2", withStatusV1beta2ReadyReplicas(2), withStatusV1beta2AvailableReplicas(2), withStatusV1beta2UpToDateReplicas(1)), + fakeMachineSet("ms3"), + }, + expectReadyReplicas: ptr.To(int32(7)), + expectAvailableReplicas: ptr.To(int32(5)), + expectUpToDateReplicas: ptr.To(int32(4)), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + md := &clusterv1.MachineDeployment{} + setReplicas(md, tt.machineSets) + + g.Expect(md.Status.V1Beta2).ToNot(BeNil()) + g.Expect(md.Status.V1Beta2.ReadyReplicas).To(Equal(tt.expectReadyReplicas)) + g.Expect(md.Status.V1Beta2.AvailableReplicas).To(Equal(tt.expectAvailableReplicas)) + g.Expect(md.Status.V1Beta2.UpToDateReplicas).To(Equal(tt.expectUpToDateReplicas)) + }) + } +} + +func Test_setAvailableCondition(t *testing.T) { + tests := []struct { + name string + machineDeployment *clusterv1.MachineDeployment + getAndAdoptMachineSetsForDeploymentSucceeded bool + expectCondition metav1.Condition + }{ + { + name: "getAndAdoptMachineSetsForDeploymentSucceeded failed", + machineDeployment: &clusterv1.MachineDeployment{}, + getAndAdoptMachineSetsForDeploymentSucceeded: false, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentAvailableV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentAvailableInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }, + }, + { + name: "spec.replicas not set", + machineDeployment: &clusterv1.MachineDeployment{}, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentAvailableV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentAvailableWaitingForReplicasSetV1Beta2Reason, + }, + }, + { + name: "status.v1beta2 not set", + machineDeployment: &clusterv1.MachineDeployment{ + Spec: clusterv1.MachineDeploymentSpec{Replicas: ptr.To(int32(5))}, + }, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentAvailableV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentAvailableWaitingForAvailableReplicasSetV1Beta2Reason, + }, + }, + { + name: "all the expected replicase are available", + machineDeployment: &clusterv1.MachineDeployment{ + Spec: clusterv1.MachineDeploymentSpec{ + Replicas: ptr.To(int32(5)), + Strategy: &clusterv1.MachineDeploymentStrategy{ + Type: clusterv1.RollingUpdateMachineDeploymentStrategyType, + RollingUpdate: &clusterv1.MachineRollingUpdateDeployment{ + MaxSurge: ptr.To(intstr.FromInt32(1)), + MaxUnavailable: ptr.To(intstr.FromInt32(0)), + }, + }, + }, + Status: clusterv1.MachineDeploymentStatus{V1Beta2: &clusterv1.MachineDeploymentV1Beta2Status{AvailableReplicas: ptr.To(int32(5))}}, + }, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentAvailableV1Beta2Reason, + }, + }, + { + name: "some replicase are not available, but within MaxUnavailable range", + machineDeployment: &clusterv1.MachineDeployment{ + Spec: clusterv1.MachineDeploymentSpec{ + Replicas: ptr.To(int32(5)), + Strategy: &clusterv1.MachineDeploymentStrategy{ + Type: clusterv1.RollingUpdateMachineDeploymentStrategyType, + RollingUpdate: &clusterv1.MachineRollingUpdateDeployment{ + MaxSurge: ptr.To(intstr.FromInt32(1)), + MaxUnavailable: ptr.To(intstr.FromInt32(1)), + }, + }, + }, + Status: clusterv1.MachineDeploymentStatus{V1Beta2: &clusterv1.MachineDeploymentV1Beta2Status{AvailableReplicas: ptr.To(int32(4))}}, + }, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentAvailableV1Beta2Reason, + }, + }, + { + name: "some replicase are not available, less than MaxUnavailable", + machineDeployment: &clusterv1.MachineDeployment{ + Spec: clusterv1.MachineDeploymentSpec{ + Replicas: ptr.To(int32(5)), + Strategy: &clusterv1.MachineDeploymentStrategy{ + Type: clusterv1.RollingUpdateMachineDeploymentStrategyType, + RollingUpdate: &clusterv1.MachineRollingUpdateDeployment{ + MaxSurge: ptr.To(intstr.FromInt32(1)), + MaxUnavailable: ptr.To(intstr.FromInt32(1)), + }, + }, + }, + Status: clusterv1.MachineDeploymentStatus{V1Beta2: &clusterv1.MachineDeploymentV1Beta2Status{AvailableReplicas: ptr.To(int32(4))}}, + }, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentAvailableV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentAvailableV1Beta2Reason, + Message: "", + }, + }, + { + name: "some replicase are not available, more than MaxUnavailable", + machineDeployment: &clusterv1.MachineDeployment{ + Spec: clusterv1.MachineDeploymentSpec{ + Replicas: ptr.To(int32(5)), + Strategy: &clusterv1.MachineDeploymentStrategy{ + Type: clusterv1.RollingUpdateMachineDeploymentStrategyType, + RollingUpdate: &clusterv1.MachineRollingUpdateDeployment{ + MaxSurge: ptr.To(intstr.FromInt32(1)), + MaxUnavailable: ptr.To(intstr.FromInt32(1)), + }, + }, + }, + Status: clusterv1.MachineDeploymentStatus{V1Beta2: &clusterv1.MachineDeploymentV1Beta2Status{AvailableReplicas: ptr.To(int32(3))}}, + }, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentAvailableV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentNotAvailableV1Beta2Reason, + Message: "3 available replicas, at least 4 required (spec.strategy.rollout.maxUnavailable is 1)", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + setAvailableCondition(ctx, tt.machineDeployment, tt.getAndAdoptMachineSetsForDeploymentSucceeded) + + condition := v1beta2conditions.Get(tt.machineDeployment, clusterv1.MachineDeploymentAvailableV1Beta2Condition) + g.Expect(condition).ToNot(BeNil()) + g.Expect(*condition).To(v1beta2conditions.MatchCondition(tt.expectCondition, v1beta2conditions.IgnoreLastTransitionTime(true))) + }) + } +} + +func Test_setScalingUpCondition(t *testing.T) { + defaultMachineDeployment := &clusterv1.MachineDeployment{ + Spec: clusterv1.MachineDeploymentSpec{ + Replicas: ptr.To[int32](0), + Template: clusterv1.MachineTemplateSpec{ + Spec: clusterv1.MachineSpec{ + Bootstrap: clusterv1.Bootstrap{ + ConfigRef: &corev1.ObjectReference{ + Kind: "KubeadmBootstrapTemplate", + Namespace: "some-namespace", + Name: "some-name", + }, + }, + InfrastructureRef: corev1.ObjectReference{ + Kind: "DockerMachineTemplate", + Namespace: "some-namespace", + Name: "some-name", + }, + }, + }, + }, + } + + scalingUpMachineDeploymentWith3Replicas := defaultMachineDeployment.DeepCopy() + scalingUpMachineDeploymentWith3Replicas.Spec.Replicas = ptr.To[int32](3) + + deletingMachineDeploymentWith3Replicas := defaultMachineDeployment.DeepCopy() + deletingMachineDeploymentWith3Replicas.DeletionTimestamp = ptr.To(metav1.Now()) + deletingMachineDeploymentWith3Replicas.Spec.Replicas = ptr.To[int32](3) + + tests := []struct { + name string + machineDeployment *clusterv1.MachineDeployment + machineSets []*clusterv1.MachineSet + bootstrapTemplateNotFound bool + infrastructureTemplateNotFound bool + getAndAdoptMachineSetsForDeploymentSucceeded bool + expectCondition metav1.Condition + }{ + { + name: "getAndAdoptMachineSetsForDeploymentSucceeded failed", + machineDeployment: defaultMachineDeployment, + bootstrapTemplateNotFound: false, + infrastructureTemplateNotFound: false, + getAndAdoptMachineSetsForDeploymentSucceeded: false, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingUpV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentScalingUpInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }, + }, + { + name: "not scaling up and no machines", + machineDeployment: defaultMachineDeployment, + bootstrapTemplateNotFound: false, + infrastructureTemplateNotFound: false, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingUpV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentNotScalingUpV1Beta2Reason, + }, + }, + { + name: "not scaling up with machines", + machineDeployment: deletingMachineDeploymentWith3Replicas, + machineSets: []*clusterv1.MachineSet{ + fakeMachineSet("ms1", withSpecReplicas(1)), + fakeMachineSet("ms2", withSpecReplicas(2)), + }, + bootstrapTemplateNotFound: false, + infrastructureTemplateNotFound: false, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingUpV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentNotScalingUpV1Beta2Reason, + }, + }, + { + name: "not scaling up and no machines and bootstrapConfig object not found", + machineDeployment: defaultMachineDeployment, + bootstrapTemplateNotFound: true, + infrastructureTemplateNotFound: false, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingUpV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentNotScalingUpV1Beta2Reason, + Message: "Scaling up would be blocked because KubeadmBootstrapTemplate does not exist", + }, + }, + { + name: "not scaling up and no machines and infrastructure object not found", + machineDeployment: defaultMachineDeployment, + bootstrapTemplateNotFound: false, + infrastructureTemplateNotFound: true, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingUpV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentNotScalingUpV1Beta2Reason, + Message: "Scaling up would be blocked because DockerMachineTemplate does not exist", + }, + }, + { + name: "not scaling up and no machines and bootstrapConfig and infrastructure object not found", + machineDeployment: defaultMachineDeployment, + bootstrapTemplateNotFound: true, + infrastructureTemplateNotFound: true, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingUpV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentNotScalingUpV1Beta2Reason, + Message: "Scaling up would be blocked because KubeadmBootstrapTemplate and DockerMachineTemplate do not exist", + }, + }, + { + name: "scaling up", + machineDeployment: scalingUpMachineDeploymentWith3Replicas, + bootstrapTemplateNotFound: false, + infrastructureTemplateNotFound: false, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingUpV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentScalingUpV1Beta2Reason, + Message: "Scaling up from 0 to 3 replicas", + }, + }, + { + name: "scaling up with machines", + machineDeployment: scalingUpMachineDeploymentWith3Replicas, + machineSets: []*clusterv1.MachineSet{ + fakeMachineSet("ms1", withSpecReplicas(1)), + fakeMachineSet("ms2", withSpecReplicas(1)), + }, + bootstrapTemplateNotFound: false, + infrastructureTemplateNotFound: false, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingUpV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentScalingUpV1Beta2Reason, + Message: "Scaling up from 2 to 3 replicas", + }, + }, + { + name: "scaling up and blocked by bootstrap object", + machineDeployment: scalingUpMachineDeploymentWith3Replicas, + bootstrapTemplateNotFound: true, + infrastructureTemplateNotFound: false, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingUpV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentScalingUpV1Beta2Reason, + Message: "Scaling up from 0 to 3 replicas is blocked because KubeadmBootstrapTemplate does not exist", + }, + }, + { + name: "scaling up and blocked by infrastructure object", + machineDeployment: scalingUpMachineDeploymentWith3Replicas, + bootstrapTemplateNotFound: false, + infrastructureTemplateNotFound: true, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingUpV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentScalingUpV1Beta2Reason, + Message: "Scaling up from 0 to 3 replicas is blocked because DockerMachineTemplate does not exist", + }, + }, + { + name: "deleting", + machineDeployment: deletingMachineDeploymentWith3Replicas, + machineSets: []*clusterv1.MachineSet{{}, {}, {}}, + bootstrapTemplateNotFound: false, + infrastructureTemplateNotFound: false, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingUpV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentNotScalingUpV1Beta2Reason, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + setScalingUpCondition(ctx, tt.machineDeployment, tt.machineSets, tt.bootstrapTemplateNotFound, tt.infrastructureTemplateNotFound, tt.getAndAdoptMachineSetsForDeploymentSucceeded) + + condition := v1beta2conditions.Get(tt.machineDeployment, clusterv1.MachineDeploymentScalingUpV1Beta2Condition) + g.Expect(condition).ToNot(BeNil()) + g.Expect(*condition).To(v1beta2conditions.MatchCondition(tt.expectCondition, v1beta2conditions.IgnoreLastTransitionTime(true))) + }) + } +} + +func Test_setScalingDownCondition(t *testing.T) { + defaultMachineDeployment := &clusterv1.MachineDeployment{ + Spec: clusterv1.MachineDeploymentSpec{ + Replicas: ptr.To[int32](0), + }, + } + + machineDeploymentWith1Replica := defaultMachineDeployment.DeepCopy() + machineDeploymentWith1Replica.Spec.Replicas = ptr.To[int32](1) + + deletingMachineDeployment := defaultMachineDeployment.DeepCopy() + deletingMachineDeployment.Spec.Replicas = ptr.To[int32](1) + deletingMachineDeployment.DeletionTimestamp = ptr.To(metav1.Now()) + + tests := []struct { + name string + machineDeployment *clusterv1.MachineDeployment + machineSets []*clusterv1.MachineSet + machines []*clusterv1.Machine + getAndAdoptMachineSetsForDeploymentSucceeded bool + expectCondition metav1.Condition + }{ + { + name: "getAndAdoptMachineSetsForDeploymentSucceeded failed", + machineDeployment: defaultMachineDeployment, + machineSets: nil, + getAndAdoptMachineSetsForDeploymentSucceeded: false, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingDownV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentScalingDownInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }, + }, + { + name: "not scaling down and no machines", + machineDeployment: defaultMachineDeployment, + machineSets: []*clusterv1.MachineSet{}, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingDownV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentNotScalingDownV1Beta2Reason, + }, + }, + { + name: "not scaling down because scaling up", + machineDeployment: machineDeploymentWith1Replica, + machineSets: []*clusterv1.MachineSet{}, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingDownV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentNotScalingDownV1Beta2Reason, + }, + }, + { + name: "scaling down to zero", + machineDeployment: defaultMachineDeployment, + machineSets: []*clusterv1.MachineSet{ + fakeMachineSet("ms1", withSpecReplicas(1)), + }, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingDownV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentScalingDownV1Beta2Reason, + Message: "Scaling down from 1 to 0 replicas", + }, + }, + { + name: "scaling down with 1 stale machine", + machineDeployment: machineDeploymentWith1Replica, + machineSets: []*clusterv1.MachineSet{ + fakeMachineSet("ms1", withSpecReplicas(1)), + fakeMachineSet("ms2", withSpecReplicas(1)), + }, + machines: []*clusterv1.Machine{ + fakeMachine("m1"), + fakeMachine("stale-machine-1", withStaleDeletion()), + }, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingDownV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentScalingDownV1Beta2Reason, + Message: "Scaling down from 2 to 1 replicas and Machine stale-machine-1 is in deletion since more than 30m", + }, + }, + { + name: "scaling down with 3 stale machines", + machineDeployment: machineDeploymentWith1Replica, + machineSets: []*clusterv1.MachineSet{ + fakeMachineSet("ms1", withSpecReplicas(3)), + fakeMachineSet("ms2", withSpecReplicas(1)), + }, + machines: []*clusterv1.Machine{ + fakeMachine("m1"), + fakeMachine("stale-machine-1", withStaleDeletion()), + fakeMachine("stale-machine-2", withStaleDeletion()), + fakeMachine("stale-machine-3", withStaleDeletion()), + }, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingDownV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentScalingDownV1Beta2Reason, + Message: "Scaling down from 4 to 1 replicas and Machines stale-machine-1, stale-machine-2, stale-machine-3 are in deletion since more than 30m", + }, + }, + { + name: "scaling down with 5 stale machines", + machineDeployment: machineDeploymentWith1Replica, + machineSets: []*clusterv1.MachineSet{ + fakeMachineSet("ms1", withSpecReplicas(5)), + fakeMachineSet("ms2", withSpecReplicas(1)), + }, + machines: []*clusterv1.Machine{ + fakeMachine("m1"), + fakeMachine("stale-machine-1", withStaleDeletion()), + fakeMachine("stale-machine-2", withStaleDeletion()), + fakeMachine("stale-machine-3", withStaleDeletion()), + fakeMachine("stale-machine-4", withStaleDeletion()), + fakeMachine("stale-machine-5", withStaleDeletion()), + }, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingDownV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentScalingDownV1Beta2Reason, + Message: "Scaling down from 6 to 1 replicas and Machines stale-machine-1, stale-machine-2, stale-machine-3, ... (2 more) are in deletion since more than 30m", + }, + }, + { + name: "deleting machine deployment without replicas", + machineDeployment: deletingMachineDeployment, + machineSets: []*clusterv1.MachineSet{}, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingDownV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentNotScalingDownV1Beta2Reason, + }, + }, + { + name: "deleting machine deployment having 1 replica", + machineDeployment: deletingMachineDeployment, + machineSets: []*clusterv1.MachineSet{ + fakeMachineSet("ms1", withSpecReplicas(1)), + }, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingDownV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentScalingDownV1Beta2Reason, + Message: "Scaling down from 1 to 0 replicas", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + setScalingDownCondition(ctx, tt.machineDeployment, tt.machineSets, collections.FromMachines(tt.machines...), tt.getAndAdoptMachineSetsForDeploymentSucceeded) + + condition := v1beta2conditions.Get(tt.machineDeployment, clusterv1.MachineDeploymentScalingDownV1Beta2Condition) + g.Expect(condition).ToNot(BeNil()) + g.Expect(*condition).To(v1beta2conditions.MatchCondition(tt.expectCondition, v1beta2conditions.IgnoreLastTransitionTime(true))) + }) + } +} + +func Test_setMachinesReadyCondition(t *testing.T) { + readyCondition := metav1.Condition{ + Type: clusterv1.MachineReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: v1beta2conditions.MultipleInfoReportedReason, + } + + tests := []struct { + name string + machineDeployment *clusterv1.MachineDeployment + machines []*clusterv1.Machine + expectCondition metav1.Condition + }{ + { + name: "get machines failed", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: nil, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentMachinesReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentMachinesReadyInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }, + }, + { + name: "no machines", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: []*clusterv1.Machine{}, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentMachinesReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentMachinesReadyNoReplicasV1Beta2Reason, + }, + }, + { + name: "all machines are ready", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: []*clusterv1.Machine{ + fakeMachine("machine-1", withV1Beta2Condition(readyCondition)), + fakeMachine("machine-2", withV1Beta2Condition(readyCondition)), + }, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentMachinesReadyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: v1beta2conditions.MultipleInfoReportedReason, + }, + }, + { + name: "one ready, one has nothing reported", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: []*clusterv1.Machine{ + fakeMachine("machine-1", withV1Beta2Condition(readyCondition)), + fakeMachine("machine-2"), + }, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentMachinesReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: v1beta2conditions.NotYetReportedReason, + Message: "Condition Ready not yet reported from Machine machine-2", + }, + }, + { + name: "one ready, one reporting not ready, one reporting unknown, one reporting deleting", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: []*clusterv1.Machine{ + fakeMachine("machine-1", withV1Beta2Condition(readyCondition)), + fakeMachine("machine-2", withV1Beta2Condition(metav1.Condition{ + Type: clusterv1.MachineReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: "SomeReason", + Message: "HealthCheckSucceeded: Some message", + })), + fakeMachine("machine-3", withV1Beta2Condition(metav1.Condition{ + Type: clusterv1.MachineReadyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: "SomeUnknownReason", + Message: "Some unknown message", + })), + fakeMachine("machine-4", withV1Beta2Condition(metav1.Condition{ + Type: clusterv1.MachineReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeletingV1Beta2Reason, + Message: "Deleting: Machine deletion in progress, stage: DrainingNode", + })), + }, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentMachinesReadyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: v1beta2conditions.MultipleIssuesReportedReason, + Message: "Deleting: Machine deletion in progress, stage: DrainingNode from Machine machine-4; HealthCheckSucceeded: Some message from Machine machine-2; Some unknown message from Machine machine-3", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + var machines collections.Machines + if tt.machines != nil { + machines = collections.FromMachines(tt.machines...) + } + setMachinesReadyCondition(ctx, tt.machineDeployment, machines) + + condition := v1beta2conditions.Get(tt.machineDeployment, clusterv1.MachineDeploymentMachinesReadyV1Beta2Condition) + g.Expect(condition).ToNot(BeNil()) + g.Expect(*condition).To(v1beta2conditions.MatchCondition(tt.expectCondition, v1beta2conditions.IgnoreLastTransitionTime(true))) + }) + } +} + +func Test_setMachinesUpToDateCondition(t *testing.T) { + tests := []struct { + name string + machineDeployment *clusterv1.MachineDeployment + machines []*clusterv1.Machine + expectCondition metav1.Condition + }{ + { + name: "get machines failed", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: nil, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentMachinesUpToDateInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }, + }, + { + name: "no machines", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: []*clusterv1.Machine{}, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentMachinesUpToDateNoReplicasV1Beta2Reason, + Message: "", + }, + }, + { + name: "One machine up-to-date", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: []*clusterv1.Machine{ + fakeMachine("up-to-date-1", withV1Beta2Condition(metav1.Condition{ + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "some-reason-1", + })), + }, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "some-reason-1", + Message: "", + }, + }, + { + name: "One machine unknown", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: []*clusterv1.Machine{ + fakeMachine("unknown-1", withV1Beta2Condition(metav1.Condition{ + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: "some-unknown-reason-1", + Message: "some unknown message", + })), + }, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: "some-unknown-reason-1", + Message: "some unknown message from Machine unknown-1", + }, + }, + { + name: "One machine not up-to-date", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: []*clusterv1.Machine{ + fakeMachine("not-up-to-date-machine-1", withV1Beta2Condition(metav1.Condition{ + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: "some-not-up-to-date-reason", + Message: "some not up-to-date message", + })), + }, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: "some-not-up-to-date-reason", + Message: "some not up-to-date message from Machine not-up-to-date-machine-1", + }, + }, + { + name: "One machine without up-to-date condition", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: []*clusterv1.Machine{ + fakeMachine("no-condition-machine-1"), + }, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: v1beta2conditions.NotYetReportedReason, + Message: "Condition UpToDate not yet reported from Machine no-condition-machine-1", + }, + }, + { + name: "Two machines not up-to-date, two up-to-date, two not reported", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: []*clusterv1.Machine{ + fakeMachine("up-to-date-1", withV1Beta2Condition(metav1.Condition{ + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "TestUpToDate", + })), + fakeMachine("up-to-date-2", withV1Beta2Condition(metav1.Condition{ + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: "TestUpToDate", + })), + fakeMachine("not-up-to-date-machine-1", withV1Beta2Condition(metav1.Condition{ + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: "TestNotUpToDate", + Message: "This is not up-to-date message", + })), + fakeMachine("not-up-to-date-machine-2", withV1Beta2Condition(metav1.Condition{ + Type: clusterv1.MachineUpToDateV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: "TestNotUpToDate", + Message: "This is not up-to-date message", + })), + fakeMachine("no-condition-machine-1"), + fakeMachine("no-condition-machine-2"), + }, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: v1beta2conditions.MultipleIssuesReportedReason, + Message: "This is not up-to-date message from Machines not-up-to-date-machine-1, not-up-to-date-machine-2; Condition UpToDate not yet reported from Machines no-condition-machine-1, no-condition-machine-2", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + var machines collections.Machines + if tt.machines != nil { + machines = collections.FromMachines(tt.machines...) + } + setMachinesUpToDateCondition(ctx, tt.machineDeployment, machines) + + condition := v1beta2conditions.Get(tt.machineDeployment, clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition) + g.Expect(condition).ToNot(BeNil()) + g.Expect(*condition).To(v1beta2conditions.MatchCondition(tt.expectCondition, v1beta2conditions.IgnoreLastTransitionTime(true))) + }) + } +} + +func Test_setRemediatingCondition(t *testing.T) { + healthCheckSucceeded := clusterv1.Condition{Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, Status: corev1.ConditionTrue} + healthCheckNotSucceeded := clusterv1.Condition{Type: clusterv1.MachineHealthCheckSucceededV1Beta2Condition, Status: corev1.ConditionFalse} + ownerRemediated := clusterv1.Condition{Type: clusterv1.MachineOwnerRemediatedCondition, Status: corev1.ConditionFalse} + ownerRemediatedV1Beta2 := metav1.Condition{Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, Status: metav1.ConditionFalse, Message: "Remediation in progress"} + + tests := []struct { + name string + machineDeployment *clusterv1.MachineDeployment + machines []*clusterv1.Machine + expectCondition metav1.Condition + }{ + { + name: "get machines failed", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: nil, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentRemediatingV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentRemediatingInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }, + }, + { + name: "Without unhealthy machines", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: []*clusterv1.Machine{ + fakeMachine("m1"), + fakeMachine("m2"), + }, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentRemediatingV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentNotRemediatingV1Beta2Reason, + }, + }, + { + name: "With machines to be remediated by KCP", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: []*clusterv1.Machine{ + fakeMachine("m1", withConditions(healthCheckSucceeded)), // Healthy machine + fakeMachine("m2", withConditions(healthCheckNotSucceeded)), // Unhealthy machine, not yet marked for remediation + fakeMachine("m3", withConditions(healthCheckNotSucceeded, ownerRemediated), withV1Beta2Condition(ownerRemediatedV1Beta2)), + }, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentRemediatingV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeploymentRemediatingV1Beta2Reason, + Message: "Remediation in progress from Machine m3", + }, + }, + { + name: "With one unhealthy machine not to be remediated by KCP", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: []*clusterv1.Machine{ + fakeMachine("m1", withConditions(healthCheckSucceeded)), // Healthy machine + fakeMachine("m2", withConditions(healthCheckNotSucceeded)), // Unhealthy machine, not yet marked for remediation + fakeMachine("m3", withConditions(healthCheckSucceeded)), // Healthy machine + }, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentRemediatingV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentNotRemediatingV1Beta2Reason, + Message: "Machine m2 is not healthy (not to be remediated by KCP)", + }, + }, + { + name: "With two unhealthy machine not to be remediated by KCP", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: []*clusterv1.Machine{ + fakeMachine("m1", withConditions(healthCheckNotSucceeded)), // Unhealthy machine, not yet marked for remediation + fakeMachine("m2", withConditions(healthCheckNotSucceeded)), // Unhealthy machine, not yet marked for remediation + fakeMachine("m3", withConditions(healthCheckSucceeded)), // Healthy machine + }, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentRemediatingV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentNotRemediatingV1Beta2Reason, + Message: "Machines m1, m2 are not healthy (not to be remediated by KCP)", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + var machinesToBeRemediated, unHealthyMachines collections.Machines + if tt.machines != nil { + machines := collections.FromMachines(tt.machines...) + machinesToBeRemediated = machines.Filter(collections.IsUnhealthyAndOwnerRemediated) + unHealthyMachines = machines.Filter(collections.IsUnhealthy) + } + setRemediatingCondition(ctx, tt.machineDeployment, machinesToBeRemediated, unHealthyMachines) + + condition := v1beta2conditions.Get(tt.machineDeployment, clusterv1.MachineDeploymentRemediatingV1Beta2Condition) + g.Expect(condition).ToNot(BeNil()) + g.Expect(*condition).To(v1beta2conditions.MatchCondition(tt.expectCondition, v1beta2conditions.IgnoreLastTransitionTime(true))) + }) + } +} + +func Test_setDeletingCondition(t *testing.T) { + tests := []struct { + name string + machineDeployment *clusterv1.MachineDeployment + machineSets []*clusterv1.MachineSet + machines []*clusterv1.Machine + getAndAdoptMachineSetsForDeploymentSucceeded bool + expectCondition metav1.Condition + }{ + { + name: "get machine sets failed", + machineDeployment: &clusterv1.MachineDeployment{}, + machineSets: nil, + machines: []*clusterv1.Machine{}, + getAndAdoptMachineSetsForDeploymentSucceeded: false, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentDeletingV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentDeletingInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }, + }, + { + name: "get machines failed", + machineDeployment: &clusterv1.MachineDeployment{}, + machineSets: []*clusterv1.MachineSet{}, + machines: nil, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentDeletingV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentDeletingInternalErrorV1Beta2Reason, + Message: "Please check controller logs for errors", + }, + }, + { + name: "not deleting", + machineDeployment: &clusterv1.MachineDeployment{}, + machineSets: []*clusterv1.MachineSet{}, + machines: []*clusterv1.Machine{}, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentDeletingV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentDeletingDeletionTimestampNotSetV1Beta2Reason, + }, + }, + { + name: "Deleting with still some machine", + machineDeployment: &clusterv1.MachineDeployment{ObjectMeta: metav1.ObjectMeta{DeletionTimestamp: &metav1.Time{Time: time.Now()}}}, + machineSets: []*clusterv1.MachineSet{ + fakeMachineSet("ms1", withSpecReplicas(1)), + }, + machines: []*clusterv1.Machine{ + fakeMachine("m1"), + }, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentDeletingV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentDeletingDeletionTimestampSetV1Beta2Reason, + Message: "Deleting 1 replicas", + }, + }, + { + name: "Deleting with still some stale machine", + machineDeployment: &clusterv1.MachineDeployment{ObjectMeta: metav1.ObjectMeta{DeletionTimestamp: &metav1.Time{Time: time.Now()}}}, + machineSets: []*clusterv1.MachineSet{ + fakeMachineSet("ms1", withSpecReplicas(1)), + }, + machines: []*clusterv1.Machine{ + fakeMachine("m1", withStaleDeletion()), + }, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentDeletingV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentDeletingDeletionTimestampSetV1Beta2Reason, + Message: "Deleting 1 replicas and Machine m1 is in deletion since more than 30m", + }, + }, + { + name: "Deleting with no machines and a machine set still around", + machineDeployment: &clusterv1.MachineDeployment{ObjectMeta: metav1.ObjectMeta{DeletionTimestamp: &metav1.Time{Time: time.Now()}}}, + machineSets: []*clusterv1.MachineSet{ + fakeMachineSet("ms1", withSpecReplicas(1)), + }, + machines: []*clusterv1.Machine{}, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentDeletingV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineDeploymentDeletingDeletionTimestampSetV1Beta2Reason, + Message: "Deleting 1 MachineSets", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + var machines collections.Machines + if tt.machines != nil { + machines = collections.FromMachines(tt.machines...) + } + setDeletingCondition(ctx, tt.machineDeployment, tt.machineSets, machines, tt.getAndAdoptMachineSetsForDeploymentSucceeded) + + condition := v1beta2conditions.Get(tt.machineDeployment, clusterv1.MachineDeploymentDeletingV1Beta2Condition) + g.Expect(condition).ToNot(BeNil()) + g.Expect(*condition).To(v1beta2conditions.MatchCondition(tt.expectCondition, v1beta2conditions.IgnoreLastTransitionTime(true))) + }) + } +} + +type fakeMachineSetOption func(ms *clusterv1.MachineSet) + +func fakeMachineSet(name string, options ...fakeMachineSetOption) *clusterv1.MachineSet { + p := &clusterv1.MachineSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + } + for _, opt := range options { + opt(p) + } + return p +} + +func withSpecReplicas(n int32) fakeMachineSetOption { + return func(ms *clusterv1.MachineSet) { + ms.Spec.Replicas = &n + } +} + +func withStatusV1beta2ReadyReplicas(n int32) fakeMachineSetOption { + return func(ms *clusterv1.MachineSet) { + if ms.Status.V1Beta2 == nil { + ms.Status.V1Beta2 = &clusterv1.MachineSetV1Beta2Status{} + } + ms.Status.V1Beta2.ReadyReplicas = ptr.To(n) + } +} + +func withStatusV1beta2AvailableReplicas(n int32) fakeMachineSetOption { + return func(ms *clusterv1.MachineSet) { + if ms.Status.V1Beta2 == nil { + ms.Status.V1Beta2 = &clusterv1.MachineSetV1Beta2Status{} + } + ms.Status.V1Beta2.AvailableReplicas = ptr.To(n) + } +} + +func withStatusV1beta2UpToDateReplicas(n int32) fakeMachineSetOption { + return func(ms *clusterv1.MachineSet) { + if ms.Status.V1Beta2 == nil { + ms.Status.V1Beta2 = &clusterv1.MachineSetV1Beta2Status{} + } + ms.Status.V1Beta2.UpToDateReplicas = ptr.To(n) + } +} + +type fakeMachinesOption func(m *clusterv1.Machine) + +func fakeMachine(name string, options ...fakeMachinesOption) *clusterv1.Machine { + p := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + } + for _, opt := range options { + opt(p) + } + return p +} + +func withStaleDeletion() fakeMachinesOption { + return func(m *clusterv1.Machine) { + m.DeletionTimestamp = ptr.To(metav1.Time{Time: time.Now().Add(-1 * time.Hour)}) + } +} + +func withV1Beta2Condition(c metav1.Condition) fakeMachinesOption { + return func(m *clusterv1.Machine) { + if m.Status.V1Beta2 == nil { + m.Status.V1Beta2 = &clusterv1.MachineV1Beta2Status{} + } + meta.SetStatusCondition(&m.Status.V1Beta2.Conditions, c) + } +} + +func withConditions(c ...clusterv1.Condition) fakeMachinesOption { + return func(m *clusterv1.Machine) { + m.Status.Conditions = append(m.Status.Conditions, c...) + } +} diff --git a/internal/controllers/machinedeployment/machinedeployment_sync.go b/internal/controllers/machinedeployment/machinedeployment_sync.go index d953112cbc42..db3ad69da27b 100644 --- a/internal/controllers/machinedeployment/machinedeployment_sync.go +++ b/internal/controllers/machinedeployment/machinedeployment_sync.go @@ -496,6 +496,9 @@ func (r *Reconciler) syncDeploymentStatus(allMSs []*clusterv1.MachineSet, newMS conditions.MarkFalse(md, clusterv1.MachineSetReadyCondition, clusterv1.WaitingForMachineSetFallbackReason, clusterv1.ConditionSeverityInfo, "MachineSet not found") } + // Set v1beta replica counters on MD status. + setReplicas(md, allMSs) + return nil } diff --git a/internal/controllers/machinedeployment/mdutil/util.go b/internal/controllers/machinedeployment/mdutil/util.go index 6080fd87cfb0..7e60ffa016a7 100644 --- a/internal/controllers/machinedeployment/mdutil/util.go +++ b/internal/controllers/machinedeployment/mdutil/util.go @@ -34,6 +34,7 @@ import ( intstrutil "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/klog/v2" "k8s.io/utils/integer" + "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" @@ -489,7 +490,7 @@ func FindOldMachineSets(deployment *clusterv1.MachineDeployment, msList []*clust func GetReplicaCountForMachineSets(machineSets []*clusterv1.MachineSet) int32 { totalReplicas := int32(0) for _, ms := range machineSets { - if ms != nil { + if ms != nil && ms.Spec.Replicas != nil { totalReplicas += *(ms.Spec.Replicas) } } @@ -545,6 +546,39 @@ func GetAvailableReplicaCountForMachineSets(machineSets []*clusterv1.MachineSet) return totalAvailableReplicas } +// GetV1Beta2ReadyReplicaCountForMachineSets returns the number of ready machines corresponding to the given machine sets. +func GetV1Beta2ReadyReplicaCountForMachineSets(machineSets []*clusterv1.MachineSet) *int32 { + var totalReadyReplicas *int32 + for _, ms := range machineSets { + if ms != nil && ms.Status.V1Beta2 != nil && ms.Status.V1Beta2.ReadyReplicas != nil { + totalReadyReplicas = ptr.To(ptr.Deref(totalReadyReplicas, 0) + *ms.Status.V1Beta2.ReadyReplicas) + } + } + return totalReadyReplicas +} + +// GetV1Beta2AvailableReplicaCountForMachineSets returns the number of available machines corresponding to the given machine sets. +func GetV1Beta2AvailableReplicaCountForMachineSets(machineSets []*clusterv1.MachineSet) *int32 { + var totalAvailableReplicas *int32 + for _, ms := range machineSets { + if ms != nil && ms.Status.V1Beta2 != nil && ms.Status.V1Beta2.AvailableReplicas != nil { + totalAvailableReplicas = ptr.To(ptr.Deref(totalAvailableReplicas, 0) + *ms.Status.V1Beta2.AvailableReplicas) + } + } + return totalAvailableReplicas +} + +// GetV1Beta2UptoDateReplicaCountForMachineSets returns the number of up to date machines corresponding to the given machine sets. +func GetV1Beta2UptoDateReplicaCountForMachineSets(machineSets []*clusterv1.MachineSet) *int32 { + var totalUpToDateReplicas *int32 + for _, ms := range machineSets { + if ms != nil && ms.Status.V1Beta2 != nil && ms.Status.V1Beta2.UpToDateReplicas != nil { + totalUpToDateReplicas = ptr.To(ptr.Deref(totalUpToDateReplicas, 0) + *ms.Status.V1Beta2.UpToDateReplicas) + } + } + return totalUpToDateReplicas +} + // IsRollingUpdate returns true if the strategy type is a rolling update. func IsRollingUpdate(deployment *clusterv1.MachineDeployment) bool { return deployment.Spec.Strategy.Type == clusterv1.RollingUpdateMachineDeploymentStrategyType diff --git a/internal/controllers/machineset/machineset_controller.go b/internal/controllers/machineset/machineset_controller.go index ba79f4926e71..4dc2a5dd6371 100644 --- a/internal/controllers/machineset/machineset_controller.go +++ b/internal/controllers/machineset/machineset_controller.go @@ -196,7 +196,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (retres ct reterr = kerrors.NewAggregate([]error{reterr, errors.Wrapf(err, "failed to update status")}) } - r.reconcileV1Beta2Status(ctx, s) + r.updateStatus(ctx, s) // Always attempt to patch the object and status after each reconciliation. if err := patchMachineSet(ctx, patchHelper, s.machineSet); err != nil { diff --git a/internal/controllers/machineset/machineset_controller_status.go b/internal/controllers/machineset/machineset_controller_status.go index 3c94d8a03767..a47aee17777d 100644 --- a/internal/controllers/machineset/machineset_controller_status.go +++ b/internal/controllers/machineset/machineset_controller_status.go @@ -32,11 +32,11 @@ import ( clog "sigs.k8s.io/cluster-api/util/log" ) -// reconcileV1Beta2Status reconciles MachineSet's status during the entire lifecycle of the MachineSet. +// updateStatus updates MachineSet's status. // Additionally, this func should ensure that the conditions managed by this controller are always set in order to // comply with the recommendation in the Kubernetes API guidelines. // Note: v1beta1 conditions are not managed by this func. -func (r *Reconciler) reconcileV1Beta2Status(ctx context.Context, s *scope) { +func (r *Reconciler) updateStatus(ctx context.Context, s *scope) { // Update the following fields in status from the machines list. // - v1beta2.readyReplicas // - v1beta2.availableReplicas @@ -173,7 +173,7 @@ func setScalingDownCondition(_ context.Context, ms *clusterv1.MachineSet, machin // Scaling down. if currentReplicas > desiredReplicas { - message := fmt.Sprintf("Scaling down from %d to %d replicas", len(machines), desiredReplicas) + message := fmt.Sprintf("Scaling down from %d to %d replicas", currentReplicas, desiredReplicas) staleMessage := aggregateStaleMachines(machines) if staleMessage != "" { message += fmt.Sprintf(" and %s", staleMessage) From 24b2644579062ff1286546a395be76e3a6b9ce26 Mon Sep 17 00:00:00 2001 From: fabriziopandini Date: Fri, 25 Oct 2024 16:14:55 +0200 Subject: [PATCH 2/2] Add the WithOwnedV1Beta2Conditions option to the patch helper --- api/v1beta1/machinedeployment_types.go | 8 +- api/v1beta1/v1beta2_condition_consts.go | 2 +- .../kubeadm/internal/controllers/status.go | 8 + .../machinedeployment_controller.go | 32 ++- .../machinedeployment_rolling.go | 4 +- .../machinedeployment_rollout_ondelete.go | 4 +- .../machinedeployment_status.go | 71 +++--- .../machinedeployment_status_test.go | 225 ++++++++++-------- .../machinedeployment_sync.go | 14 +- .../machinedeployment/mdutil/util.go | 3 + .../machineset_controller_status.go | 4 + 11 files changed, 230 insertions(+), 145 deletions(-) diff --git a/api/v1beta1/machinedeployment_types.go b/api/v1beta1/machinedeployment_types.go index 5594f829fcf3..b0111748755a 100644 --- a/api/v1beta1/machinedeployment_types.go +++ b/api/v1beta1/machinedeployment_types.go @@ -88,7 +88,7 @@ const ( // field of the MachineDeployment is not set. MachineDeploymentAvailableWaitingForReplicasSetV1Beta2Reason = WaitingForReplicasSetV1Beta2Reason - // MachineDeploymentAvailableWaitingForAvailableReplicasSetV1Beta2Reason surfaces when the .status.v1beta2.availableReplicas + // MachineDeploymentAvailableWaitingForAvailableReplicasSetV1Beta2Reason surfaces when the .status.v1beta2.availableReplicas // field of the MachineDeployment is not set. MachineDeploymentAvailableWaitingForAvailableReplicasSetV1Beta2Reason = "WaitingForAvailableReplicasSet" @@ -130,7 +130,7 @@ const ( // MachineDeployment's ScalingUp condition and corresponding reasons that will be used in v1Beta2 API version. const ( - // MachineDeploymentScalingUpV1Beta2Condition is true if available replicas < desired replicas. + // MachineDeploymentScalingUpV1Beta2Condition is true if actual replicas < desired replicas. MachineDeploymentScalingUpV1Beta2Condition = ScalingUpV1Beta2Condition // MachineDeploymentScalingUpV1Beta2Reason surfaces when actual replicas < desired replicas. @@ -149,7 +149,7 @@ const ( // MachineDeployment's ScalingDown condition and corresponding reasons that will be used in v1Beta2 API version. const ( - // MachineDeploymentScalingDownV1Beta2Condition is true if replicas > desired replicas. + // MachineDeploymentScalingDownV1Beta2Condition is true if actual replicas > desired replicas. MachineDeploymentScalingDownV1Beta2Condition = ScalingDownV1Beta2Condition // MachineDeploymentScalingDownV1Beta2Reason surfaces when actual replicas > desired replicas. @@ -168,7 +168,7 @@ const ( // MachineDeployment's Remediating condition and corresponding reasons that will be used in v1Beta2 API version. const ( - // MachineDeploymentRemediatingV1Beta2Condition details about ongoing remediation of the controlled machines, if any. + // MachineDeploymentRemediatingV1Beta2Condition surfaces details about ongoing remediation of the controlled machines, if any. MachineDeploymentRemediatingV1Beta2Condition = RemediatingV1Beta2Condition // MachineDeploymentRemediatingV1Beta2Reason surfaces when the MachineDeployment has at least one machine with HealthCheckSucceeded set to false diff --git a/api/v1beta1/v1beta2_condition_consts.go b/api/v1beta1/v1beta2_condition_consts.go index df7146608bad..409c2559eba2 100644 --- a/api/v1beta1/v1beta2_condition_consts.go +++ b/api/v1beta1/v1beta2_condition_consts.go @@ -90,7 +90,7 @@ const ( // AvailableV1Beta2Reason applies to a condition surfacing object availability. AvailableV1Beta2Reason = "Available" - // NotAvailableV1Beta2Reason applies to a condition surfacing object not satisfying availability per-requisites. + // NotAvailableV1Beta2Reason applies to a condition surfacing object not satisfying availability criteria. NotAvailableV1Beta2Reason = "NotAvailable" // ScalingUpV1Beta2Reason surfaces when an object is scaling up. diff --git a/controlplane/kubeadm/internal/controllers/status.go b/controlplane/kubeadm/internal/controllers/status.go index 0ab1d370f911..09cef5bdba45 100644 --- a/controlplane/kubeadm/internal/controllers/status.go +++ b/controlplane/kubeadm/internal/controllers/status.go @@ -406,6 +406,10 @@ func setRemediatingCondition(ctx context.Context, kcp *controlplanev1.KubeadmCon } func aggregateStaleMachines(machines collections.Machines) string { + if len(machines) == 0 { + return "" + } + machineNames := []string{} for _, machine := range machines { if !machine.GetDeletionTimestamp().IsZero() && time.Since(machine.GetDeletionTimestamp().Time) > time.Minute*30 { @@ -436,6 +440,10 @@ func aggregateStaleMachines(machines collections.Machines) string { } func aggregateUnhealthyMachines(machines collections.Machines) string { + if len(machines) == 0 { + return "" + } + machineNames := []string{} for _, machine := range machines { machineNames = append(machineNames, machine.GetName()) diff --git a/internal/controllers/machinedeployment/machinedeployment_controller.go b/internal/controllers/machinedeployment/machinedeployment_controller.go index f3ec111f1a1f..0c8272508916 100644 --- a/internal/controllers/machinedeployment/machinedeployment_controller.go +++ b/internal/controllers/machinedeployment/machinedeployment_controller.go @@ -116,7 +116,7 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt return nil } -func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { +func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (retres ctrl.Result, reterr error) { log := ctrl.LoggerFrom(ctx) // Fetch the MachineDeployment instance. @@ -173,6 +173,10 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re if err := patchMachineDeployment(ctx, patchHelper, deployment, patchOpts...); err != nil { reterr = kerrors.NewAggregate([]error{reterr, err}) } + + if reterr != nil { + retres = ctrl.Result{} + } }() // Handle deletion reconciliation loop. @@ -188,7 +192,9 @@ type scope struct { cluster *clusterv1.Cluster machineSets []*clusterv1.MachineSet bootstrapTemplateNotFound bool + bootstrapTemplateExists bool infrastructureTemplateNotFound bool + infrastructureTemplateExists bool getAndAdoptMachineSetsForDeploymentSucceeded bool } @@ -206,6 +212,15 @@ func patchMachineDeployment(ctx context.Context, patchHelper *patch.Helper, md * clusterv1.ReadyCondition, clusterv1.MachineDeploymentAvailableCondition, }}, + patch.WithOwnedV1Beta2Conditions{Conditions: []string{ + clusterv1.MachineDeploymentAvailableV1Beta2Condition, + clusterv1.MachineDeploymentMachinesReadyV1Beta2Condition, + clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition, + clusterv1.MachineDeploymentScalingDownV1Beta2Condition, + clusterv1.MachineDeploymentScalingUpV1Beta2Condition, + clusterv1.MachineDeploymentRemediatingV1Beta2Condition, + clusterv1.MachineDeploymentDeletingV1Beta2Condition, + }}, ) return patchHelper.Patch(ctx, md, options...) } @@ -280,8 +295,10 @@ func (r *Reconciler) reconcile(ctx context.Context, s *scope) error { } } + templateExists := s.infrastructureTemplateExists && (md.Spec.Template.Spec.Bootstrap.ConfigRef == nil || s.bootstrapTemplateExists) + if md.Spec.Paused { - return r.sync(ctx, md, s.machineSets) + return r.sync(ctx, md, s.machineSets, templateExists) } if md.Spec.Strategy == nil { @@ -292,11 +309,11 @@ func (r *Reconciler) reconcile(ctx context.Context, s *scope) error { if md.Spec.Strategy.RollingUpdate == nil { return errors.Errorf("missing MachineDeployment settings for strategy type: %s", md.Spec.Strategy.Type) } - return r.rolloutRolling(ctx, md, s.machineSets) + return r.rolloutRolling(ctx, md, s.machineSets, templateExists) } if md.Spec.Strategy.Type == clusterv1.OnDeleteMachineDeploymentStrategyType { - return r.rolloutOnDelete(ctx, md, s.machineSets) + return r.rolloutOnDelete(ctx, md, s.machineSets, templateExists) } return errors.Errorf("unexpected deployment strategy type: %s", md.Spec.Strategy.Type) @@ -314,8 +331,6 @@ func (r *Reconciler) reconcileDelete(ctx context.Context, s *scope) error { return nil } - log.Info("Waiting for MachineSets to be deleted", "MachineSets", clog.ObjNamesString(s.machineSets)) - // else delete owned machinesets. for _, ms := range s.machineSets { if ms.DeletionTimestamp.IsZero() { @@ -326,6 +341,7 @@ func (r *Reconciler) reconcileDelete(ctx context.Context, s *scope) error { } } + log.Info("Waiting for MachineSets to be deleted", "MachineSets", clog.ObjNamesString(s.machineSets)) return nil } @@ -470,6 +486,8 @@ func (r *Reconciler) getTemplatesAndSetOwner(ctx context.Context, s *scope) erro return err } s.infrastructureTemplateNotFound = true + } else { + s.infrastructureTemplateExists = true } // Make sure to reconcile the external bootstrap reference, if any. if md.Spec.Template.Spec.Bootstrap.ConfigRef != nil { @@ -478,6 +496,8 @@ func (r *Reconciler) getTemplatesAndSetOwner(ctx context.Context, s *scope) erro return err } s.bootstrapTemplateNotFound = true + } else { + s.bootstrapTemplateExists = true } } return nil diff --git a/internal/controllers/machinedeployment/machinedeployment_rolling.go b/internal/controllers/machinedeployment/machinedeployment_rolling.go index 0c79bf365a83..31bd697d8b24 100644 --- a/internal/controllers/machinedeployment/machinedeployment_rolling.go +++ b/internal/controllers/machinedeployment/machinedeployment_rolling.go @@ -29,8 +29,8 @@ import ( ) // rolloutRolling implements the logic for rolling a new MachineSet. -func (r *Reconciler) rolloutRolling(ctx context.Context, md *clusterv1.MachineDeployment, msList []*clusterv1.MachineSet) error { - newMS, oldMSs, err := r.getAllMachineSetsAndSyncRevision(ctx, md, msList, true) +func (r *Reconciler) rolloutRolling(ctx context.Context, md *clusterv1.MachineDeployment, msList []*clusterv1.MachineSet, templateExists bool) error { + newMS, oldMSs, err := r.getAllMachineSetsAndSyncRevision(ctx, md, msList, true, templateExists) if err != nil { return err } diff --git a/internal/controllers/machinedeployment/machinedeployment_rollout_ondelete.go b/internal/controllers/machinedeployment/machinedeployment_rollout_ondelete.go index 64d15d1e3361..b8e7fcd75297 100644 --- a/internal/controllers/machinedeployment/machinedeployment_rollout_ondelete.go +++ b/internal/controllers/machinedeployment/machinedeployment_rollout_ondelete.go @@ -32,8 +32,8 @@ import ( ) // rolloutOnDelete implements the logic for the OnDelete MachineDeploymentStrategyType. -func (r *Reconciler) rolloutOnDelete(ctx context.Context, md *clusterv1.MachineDeployment, msList []*clusterv1.MachineSet) error { - newMS, oldMSs, err := r.getAllMachineSetsAndSyncRevision(ctx, md, msList, true) +func (r *Reconciler) rolloutOnDelete(ctx context.Context, md *clusterv1.MachineDeployment, msList []*clusterv1.MachineSet, templateExists bool) error { + newMS, oldMSs, err := r.getAllMachineSetsAndSyncRevision(ctx, md, msList, true, templateExists) if err != nil { return err } diff --git a/internal/controllers/machinedeployment/machinedeployment_status.go b/internal/controllers/machinedeployment/machinedeployment_status.go index ef078e73a1cd..24cee4926993 100644 --- a/internal/controllers/machinedeployment/machinedeployment_status.go +++ b/internal/controllers/machinedeployment/machinedeployment_status.go @@ -37,35 +37,38 @@ import ( func (r *Reconciler) updateStatus(ctx context.Context, s *scope) (retErr error) { // Get all Machines controlled by this MachineDeployment. - var machines, machinesToBeRemediated, unHealthyMachines collections.Machines + var machines, machinesToBeRemediated, unhealthyMachines collections.Machines + var getMachinesSucceeded bool if selectorMap, err := metav1.LabelSelectorAsMap(&s.machineDeployment.Spec.Selector); err == nil { machineList := &clusterv1.MachineList{} if err := r.Client.List(ctx, machineList, client.InNamespace(s.machineDeployment.Namespace), client.MatchingLabels(selectorMap)); err != nil { retErr = errors.Wrap(err, "failed to list machines") + } else { + getMachinesSucceeded = true + machines = collections.FromMachineList(machineList) + machinesToBeRemediated = machines.Filter(collections.IsUnhealthyAndOwnerRemediated) + unhealthyMachines = machines.Filter(collections.IsUnhealthy) } - machines = collections.FromMachineList(machineList) - machinesToBeRemediated = machines.Filter(collections.IsUnhealthyAndOwnerRemediated) - unHealthyMachines = machines.Filter(collections.IsUnhealthy) } else { retErr = errors.Wrap(err, "failed to convert label selector to a map") } - // If the controller failed to read MachineSets, do not update replica counters. - if !s.getAndAdoptMachineSetsForDeploymentSucceeded { + // If the controller could read MachineSets, update replica counters. + if s.getAndAdoptMachineSetsForDeploymentSucceeded { setReplicas(s.machineDeployment, s.machineSets) } setAvailableCondition(ctx, s.machineDeployment, s.getAndAdoptMachineSetsForDeploymentSucceeded) setScalingUpCondition(ctx, s.machineDeployment, s.machineSets, s.bootstrapTemplateNotFound, s.infrastructureTemplateNotFound, s.getAndAdoptMachineSetsForDeploymentSucceeded) - setScalingDownCondition(ctx, s.machineDeployment, s.machineSets, machines, s.getAndAdoptMachineSetsForDeploymentSucceeded) + setScalingDownCondition(ctx, s.machineDeployment, s.machineSets, machines, s.getAndAdoptMachineSetsForDeploymentSucceeded, getMachinesSucceeded) - setMachinesReadyCondition(ctx, s.machineDeployment, machines) - setMachinesUpToDateCondition(ctx, s.machineDeployment, machines) + setMachinesReadyCondition(ctx, s.machineDeployment, machines, getMachinesSucceeded) + setMachinesUpToDateCondition(ctx, s.machineDeployment, machines, getMachinesSucceeded) - setRemediatingCondition(ctx, s.machineDeployment, machinesToBeRemediated, unHealthyMachines) + setRemediatingCondition(ctx, s.machineDeployment, machinesToBeRemediated, unhealthyMachines, getMachinesSucceeded) - setDeletingCondition(ctx, s.machineDeployment, s.machineSets, machines, s.getAndAdoptMachineSetsForDeploymentSucceeded) + setDeletingCondition(ctx, s.machineDeployment, s.machineSets, machines, s.getAndAdoptMachineSetsForDeploymentSucceeded, getMachinesSucceeded) return retErr } @@ -132,7 +135,7 @@ func setAvailableCondition(_ context.Context, machineDeployment *clusterv1.Machi message := fmt.Sprintf("%d available replicas, at least %d required", *machineDeployment.Status.V1Beta2.AvailableReplicas, minReplicasNeeded) if machineDeployment.Spec.Strategy != nil && mdutil.IsRollingUpdate(machineDeployment) && machineDeployment.Spec.Strategy.RollingUpdate != nil { - message += fmt.Sprintf(" (spec.strategy.rollout.maxUnavailable is %s)", machineDeployment.Spec.Strategy.RollingUpdate.MaxUnavailable) + message += fmt.Sprintf(" (spec.strategy.rollout.maxUnavailable is %s, spec.replicas is %d)", machineDeployment.Spec.Strategy.RollingUpdate.MaxUnavailable, *machineDeployment.Spec.Replicas) } v1beta2conditions.Set(machineDeployment, metav1.Condition{ Type: clusterv1.MachineDeploymentAvailableV1Beta2Condition, @@ -168,7 +171,7 @@ func setScalingUpCondition(_ context.Context, machineDeployment *clusterv1.Machi if !machineDeployment.DeletionTimestamp.IsZero() { desiredReplicas = 0 } - currentReplicas := mdutil.GetReplicaCountForMachineSets(machineSets) + currentReplicas := mdutil.GetActualReplicaCountForMachineSets(machineSets) missingReferencesMessage := calculateMissingReferencesMessage(machineDeployment, bootstrapObjectNotFound, infrastructureObjectNotFound) @@ -199,7 +202,7 @@ func setScalingUpCondition(_ context.Context, machineDeployment *clusterv1.Machi }) } -func setScalingDownCondition(_ context.Context, machineDeployment *clusterv1.MachineDeployment, machineSets []*clusterv1.MachineSet, machines collections.Machines, getAndAdoptMachineSetsForDeploymentSucceeded bool) { +func setScalingDownCondition(_ context.Context, machineDeployment *clusterv1.MachineDeployment, machineSets []*clusterv1.MachineSet, machines collections.Machines, getAndAdoptMachineSetsForDeploymentSucceeded, getMachinesSucceeded bool) { // If we got unexpected errors in listing the machines sets (this should never happen), surface them. if !getAndAdoptMachineSetsForDeploymentSucceeded { v1beta2conditions.Set(machineDeployment, metav1.Condition{ @@ -225,14 +228,16 @@ func setScalingDownCondition(_ context.Context, machineDeployment *clusterv1.Mac if !machineDeployment.DeletionTimestamp.IsZero() { desiredReplicas = 0 } - currentReplicas := mdutil.GetReplicaCountForMachineSets(machineSets) + currentReplicas := mdutil.GetActualReplicaCountForMachineSets(machineSets) // Scaling down. if currentReplicas > desiredReplicas { message := fmt.Sprintf("Scaling down from %d to %d replicas", currentReplicas, desiredReplicas) - staleMessage := aggregateStaleMachines(machines) - if staleMessage != "" { - message += fmt.Sprintf(" and %s", staleMessage) + if getMachinesSucceeded { + staleMessage := aggregateStaleMachines(machines) + if staleMessage != "" { + message += fmt.Sprintf(" and %s", staleMessage) + } } v1beta2conditions.Set(machineDeployment, metav1.Condition{ Type: clusterv1.MachineDeploymentScalingDownV1Beta2Condition, @@ -251,10 +256,10 @@ func setScalingDownCondition(_ context.Context, machineDeployment *clusterv1.Mac }) } -func setMachinesReadyCondition(ctx context.Context, machineDeployment *clusterv1.MachineDeployment, machines collections.Machines) { +func setMachinesReadyCondition(ctx context.Context, machineDeployment *clusterv1.MachineDeployment, machines collections.Machines, getMachinesSucceeded bool) { log := ctrl.LoggerFrom(ctx) // If we got unexpected errors in listing the machines (this should never happen), surface them. - if machines == nil { + if !getMachinesSucceeded { v1beta2conditions.Set(machineDeployment, metav1.Condition{ Type: clusterv1.MachineDeploymentMachinesReadyV1Beta2Condition, Status: metav1.ConditionUnknown, @@ -291,10 +296,10 @@ func setMachinesReadyCondition(ctx context.Context, machineDeployment *clusterv1 v1beta2conditions.Set(machineDeployment, *readyCondition) } -func setMachinesUpToDateCondition(ctx context.Context, machineDeployment *clusterv1.MachineDeployment, machines collections.Machines) { +func setMachinesUpToDateCondition(ctx context.Context, machineDeployment *clusterv1.MachineDeployment, machines collections.Machines, getMachinesSucceeded bool) { log := ctrl.LoggerFrom(ctx) // If we got unexpected errors in listing the machines (this should never happen), surface them. - if machines == nil { + if !getMachinesSucceeded { v1beta2conditions.Set(machineDeployment, metav1.Condition{ Type: clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition, Status: metav1.ConditionUnknown, @@ -331,8 +336,8 @@ func setMachinesUpToDateCondition(ctx context.Context, machineDeployment *cluste v1beta2conditions.Set(machineDeployment, *upToDateCondition) } -func setRemediatingCondition(ctx context.Context, machineDeployment *clusterv1.MachineDeployment, machinesToBeRemediated, unhealthyMachines collections.Machines) { - if machinesToBeRemediated == nil || unhealthyMachines == nil { +func setRemediatingCondition(ctx context.Context, machineDeployment *clusterv1.MachineDeployment, machinesToBeRemediated, unhealthyMachines collections.Machines, getMachinesSucceeded bool) { + if !getMachinesSucceeded { v1beta2conditions.Set(machineDeployment, metav1.Condition{ Type: clusterv1.MachineDeploymentRemediatingV1Beta2Condition, Status: metav1.ConditionUnknown, @@ -378,9 +383,9 @@ func setRemediatingCondition(ctx context.Context, machineDeployment *clusterv1.M }) } -func setDeletingCondition(_ context.Context, machineDeployment *clusterv1.MachineDeployment, machineSets []*clusterv1.MachineSet, machines collections.Machines, getAndAdoptMachineSetsForDeploymentSucceeded bool) { +func setDeletingCondition(_ context.Context, machineDeployment *clusterv1.MachineDeployment, machineSets []*clusterv1.MachineSet, machines collections.Machines, getAndAdoptMachineSetsForDeploymentSucceeded, getMachinesSucceeded bool) { // If we got unexpected errors in listing the machines sets or machines (this should never happen), surface them. - if !getAndAdoptMachineSetsForDeploymentSucceeded || machines == nil { + if !getAndAdoptMachineSetsForDeploymentSucceeded || !getMachinesSucceeded { v1beta2conditions.Set(machineDeployment, metav1.Condition{ Type: clusterv1.MachineDeploymentDeletingV1Beta2Condition, Status: metav1.ConditionUnknown, @@ -401,7 +406,11 @@ func setDeletingCondition(_ context.Context, machineDeployment *clusterv1.Machin message := "" if len(machines) > 0 { - message = fmt.Sprintf("Deleting %d replicas", len(machines)) + if len(machines) == 1 { + message = fmt.Sprintf("Deleting %d Machine", len(machines)) + } else { + message = fmt.Sprintf("Deleting %d Machines", len(machines)) + } staleMessage := aggregateStaleMachines(machines) if staleMessage != "" { message += fmt.Sprintf(" and %s", staleMessage) @@ -413,7 +422,7 @@ func setDeletingCondition(_ context.Context, machineDeployment *clusterv1.Machin } v1beta2conditions.Set(machineDeployment, metav1.Condition{ Type: clusterv1.MachineDeploymentDeletingV1Beta2Condition, - Status: metav1.ConditionFalse, + Status: metav1.ConditionTrue, Reason: clusterv1.MachineDeploymentDeletingDeletionTimestampSetV1Beta2Reason, Message: message, }) @@ -474,6 +483,10 @@ func aggregateStaleMachines(machines collections.Machines) string { } func aggregateUnhealthyMachines(machines collections.Machines) string { + if len(machines) == 0 { + return "" + } + machineNames := []string{} for _, machine := range machines { machineNames = append(machineNames, machine.GetName()) @@ -496,7 +509,7 @@ func aggregateUnhealthyMachines(machines collections.Machines) string { } else { message += " are " } - message += "not healthy (not to be remediated by KCP)" + message += "not healthy (not to be remediated by MachineDeployment/MachineSet)" return message } diff --git a/internal/controllers/machinedeployment/machinedeployment_status_test.go b/internal/controllers/machinedeployment/machinedeployment_status_test.go index 1b9f921ea7b7..ce10e30cff06 100644 --- a/internal/controllers/machinedeployment/machinedeployment_status_test.go +++ b/internal/controllers/machinedeployment/machinedeployment_status_test.go @@ -22,7 +22,6 @@ import ( . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/utils/ptr" @@ -124,7 +123,7 @@ func Test_setAvailableCondition(t *testing.T) { }, }, { - name: "all the expected replicase are available", + name: "all the expected replicas are available", machineDeployment: &clusterv1.MachineDeployment{ Spec: clusterv1.MachineDeploymentSpec{ Replicas: ptr.To(int32(5)), @@ -146,7 +145,7 @@ func Test_setAvailableCondition(t *testing.T) { }, }, { - name: "some replicase are not available, but within MaxUnavailable range", + name: "some replicas are not available, but within MaxUnavailable range", machineDeployment: &clusterv1.MachineDeployment{ Spec: clusterv1.MachineDeploymentSpec{ Replicas: ptr.To(int32(5)), @@ -168,30 +167,7 @@ func Test_setAvailableCondition(t *testing.T) { }, }, { - name: "some replicase are not available, less than MaxUnavailable", - machineDeployment: &clusterv1.MachineDeployment{ - Spec: clusterv1.MachineDeploymentSpec{ - Replicas: ptr.To(int32(5)), - Strategy: &clusterv1.MachineDeploymentStrategy{ - Type: clusterv1.RollingUpdateMachineDeploymentStrategyType, - RollingUpdate: &clusterv1.MachineRollingUpdateDeployment{ - MaxSurge: ptr.To(intstr.FromInt32(1)), - MaxUnavailable: ptr.To(intstr.FromInt32(1)), - }, - }, - }, - Status: clusterv1.MachineDeploymentStatus{V1Beta2: &clusterv1.MachineDeploymentV1Beta2Status{AvailableReplicas: ptr.To(int32(4))}}, - }, - getAndAdoptMachineSetsForDeploymentSucceeded: true, - expectCondition: metav1.Condition{ - Type: clusterv1.MachineDeploymentAvailableV1Beta2Condition, - Status: metav1.ConditionTrue, - Reason: clusterv1.MachineDeploymentAvailableV1Beta2Reason, - Message: "", - }, - }, - { - name: "some replicase are not available, more than MaxUnavailable", + name: "some replicas are not available, more than MaxUnavailable", machineDeployment: &clusterv1.MachineDeployment{ Spec: clusterv1.MachineDeploymentSpec{ Replicas: ptr.To(int32(5)), @@ -210,7 +186,7 @@ func Test_setAvailableCondition(t *testing.T) { Type: clusterv1.MachineDeploymentAvailableV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.MachineDeploymentNotAvailableV1Beta2Reason, - Message: "3 available replicas, at least 4 required (spec.strategy.rollout.maxUnavailable is 1)", + Message: "3 available replicas, at least 4 required (spec.strategy.rollout.maxUnavailable is 1, spec.replicas is 5)", }, }, } @@ -279,6 +255,22 @@ func Test_setScalingUpCondition(t *testing.T) { Message: "Please check controller logs for errors", }, }, + { + name: "replicas not set", + machineDeployment: func() *clusterv1.MachineDeployment { + md := defaultMachineDeployment.DeepCopy() + md.Spec.Replicas = nil + return md + }(), + bootstrapTemplateNotFound: false, + infrastructureTemplateNotFound: false, + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingUpV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentScalingUpWaitingForReplicasSetV1Beta2Reason, + }, + }, { name: "not scaling up and no machines", machineDeployment: defaultMachineDeployment, @@ -295,8 +287,8 @@ func Test_setScalingUpCondition(t *testing.T) { name: "not scaling up with machines", machineDeployment: deletingMachineDeploymentWith3Replicas, machineSets: []*clusterv1.MachineSet{ - fakeMachineSet("ms1", withSpecReplicas(1)), - fakeMachineSet("ms2", withSpecReplicas(2)), + fakeMachineSet("ms1", withStatusReplicas(1)), + fakeMachineSet("ms2", withStatusReplicas(2)), }, bootstrapTemplateNotFound: false, infrastructureTemplateNotFound: false, @@ -363,8 +355,8 @@ func Test_setScalingUpCondition(t *testing.T) { name: "scaling up with machines", machineDeployment: scalingUpMachineDeploymentWith3Replicas, machineSets: []*clusterv1.MachineSet{ - fakeMachineSet("ms1", withSpecReplicas(1)), - fakeMachineSet("ms2", withSpecReplicas(1)), + fakeMachineSet("ms1", withStatusReplicas(1)), + fakeMachineSet("ms2", withStatusReplicas(1)), }, bootstrapTemplateNotFound: false, infrastructureTemplateNotFound: false, @@ -463,6 +455,20 @@ func Test_setScalingDownCondition(t *testing.T) { Message: "Please check controller logs for errors", }, }, + { + name: "replicas not set", + machineDeployment: func() *clusterv1.MachineDeployment { + md := defaultMachineDeployment.DeepCopy() + md.Spec.Replicas = nil + return md + }(), + getAndAdoptMachineSetsForDeploymentSucceeded: true, + expectCondition: metav1.Condition{ + Type: clusterv1.MachineDeploymentScalingDownV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: clusterv1.MachineDeploymentScalingDownWaitingForReplicasSetV1Beta2Reason, + }, + }, { name: "not scaling down and no machines", machineDeployment: defaultMachineDeployment, @@ -489,7 +495,7 @@ func Test_setScalingDownCondition(t *testing.T) { name: "scaling down to zero", machineDeployment: defaultMachineDeployment, machineSets: []*clusterv1.MachineSet{ - fakeMachineSet("ms1", withSpecReplicas(1)), + fakeMachineSet("ms1", withStatusReplicas(1)), }, getAndAdoptMachineSetsForDeploymentSucceeded: true, expectCondition: metav1.Condition{ @@ -503,8 +509,8 @@ func Test_setScalingDownCondition(t *testing.T) { name: "scaling down with 1 stale machine", machineDeployment: machineDeploymentWith1Replica, machineSets: []*clusterv1.MachineSet{ - fakeMachineSet("ms1", withSpecReplicas(1)), - fakeMachineSet("ms2", withSpecReplicas(1)), + fakeMachineSet("ms1", withStatusReplicas(1)), + fakeMachineSet("ms2", withStatusReplicas(1)), }, machines: []*clusterv1.Machine{ fakeMachine("m1"), @@ -522,8 +528,8 @@ func Test_setScalingDownCondition(t *testing.T) { name: "scaling down with 3 stale machines", machineDeployment: machineDeploymentWith1Replica, machineSets: []*clusterv1.MachineSet{ - fakeMachineSet("ms1", withSpecReplicas(3)), - fakeMachineSet("ms2", withSpecReplicas(1)), + fakeMachineSet("ms1", withStatusReplicas(3)), + fakeMachineSet("ms2", withStatusReplicas(1)), }, machines: []*clusterv1.Machine{ fakeMachine("m1"), @@ -543,8 +549,8 @@ func Test_setScalingDownCondition(t *testing.T) { name: "scaling down with 5 stale machines", machineDeployment: machineDeploymentWith1Replica, machineSets: []*clusterv1.MachineSet{ - fakeMachineSet("ms1", withSpecReplicas(5)), - fakeMachineSet("ms2", withSpecReplicas(1)), + fakeMachineSet("ms1", withStatusReplicas(5)), + fakeMachineSet("ms2", withStatusReplicas(1)), }, machines: []*clusterv1.Machine{ fakeMachine("m1"), @@ -577,7 +583,7 @@ func Test_setScalingDownCondition(t *testing.T) { name: "deleting machine deployment having 1 replica", machineDeployment: deletingMachineDeployment, machineSets: []*clusterv1.MachineSet{ - fakeMachineSet("ms1", withSpecReplicas(1)), + fakeMachineSet("ms1", withStatusReplicas(1)), }, getAndAdoptMachineSetsForDeploymentSucceeded: true, expectCondition: metav1.Condition{ @@ -592,7 +598,7 @@ func Test_setScalingDownCondition(t *testing.T) { t.Run(tt.name, func(t *testing.T) { g := NewWithT(t) - setScalingDownCondition(ctx, tt.machineDeployment, tt.machineSets, collections.FromMachines(tt.machines...), tt.getAndAdoptMachineSetsForDeploymentSucceeded) + setScalingDownCondition(ctx, tt.machineDeployment, tt.machineSets, collections.FromMachines(tt.machines...), tt.getAndAdoptMachineSetsForDeploymentSucceeded, true) condition := v1beta2conditions.Get(tt.machineDeployment, clusterv1.MachineDeploymentScalingDownV1Beta2Condition) g.Expect(condition).ToNot(BeNil()) @@ -609,15 +615,17 @@ func Test_setMachinesReadyCondition(t *testing.T) { } tests := []struct { - name string - machineDeployment *clusterv1.MachineDeployment - machines []*clusterv1.Machine - expectCondition metav1.Condition + name string + machineDeployment *clusterv1.MachineDeployment + machines []*clusterv1.Machine + getMachinesSucceeded bool + expectCondition metav1.Condition }{ { - name: "get machines failed", - machineDeployment: &clusterv1.MachineDeployment{}, - machines: nil, + name: "get machines failed", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: nil, + getMachinesSucceeded: false, expectCondition: metav1.Condition{ Type: clusterv1.MachineDeploymentMachinesReadyV1Beta2Condition, Status: metav1.ConditionUnknown, @@ -626,9 +634,10 @@ func Test_setMachinesReadyCondition(t *testing.T) { }, }, { - name: "no machines", - machineDeployment: &clusterv1.MachineDeployment{}, - machines: []*clusterv1.Machine{}, + name: "no machines", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: []*clusterv1.Machine{}, + getMachinesSucceeded: true, expectCondition: metav1.Condition{ Type: clusterv1.MachineDeploymentMachinesReadyV1Beta2Condition, Status: metav1.ConditionTrue, @@ -642,6 +651,7 @@ func Test_setMachinesReadyCondition(t *testing.T) { fakeMachine("machine-1", withV1Beta2Condition(readyCondition)), fakeMachine("machine-2", withV1Beta2Condition(readyCondition)), }, + getMachinesSucceeded: true, expectCondition: metav1.Condition{ Type: clusterv1.MachineDeploymentMachinesReadyV1Beta2Condition, Status: metav1.ConditionTrue, @@ -655,6 +665,7 @@ func Test_setMachinesReadyCondition(t *testing.T) { fakeMachine("machine-1", withV1Beta2Condition(readyCondition)), fakeMachine("machine-2"), }, + getMachinesSucceeded: true, expectCondition: metav1.Condition{ Type: clusterv1.MachineDeploymentMachinesReadyV1Beta2Condition, Status: metav1.ConditionUnknown, @@ -686,6 +697,7 @@ func Test_setMachinesReadyCondition(t *testing.T) { Message: "Deleting: Machine deletion in progress, stage: DrainingNode", })), }, + getMachinesSucceeded: true, expectCondition: metav1.Condition{ Type: clusterv1.MachineDeploymentMachinesReadyV1Beta2Condition, Status: metav1.ConditionFalse, @@ -702,7 +714,7 @@ func Test_setMachinesReadyCondition(t *testing.T) { if tt.machines != nil { machines = collections.FromMachines(tt.machines...) } - setMachinesReadyCondition(ctx, tt.machineDeployment, machines) + setMachinesReadyCondition(ctx, tt.machineDeployment, machines, tt.getMachinesSucceeded) condition := v1beta2conditions.Get(tt.machineDeployment, clusterv1.MachineDeploymentMachinesReadyV1Beta2Condition) g.Expect(condition).ToNot(BeNil()) @@ -713,15 +725,17 @@ func Test_setMachinesReadyCondition(t *testing.T) { func Test_setMachinesUpToDateCondition(t *testing.T) { tests := []struct { - name string - machineDeployment *clusterv1.MachineDeployment - machines []*clusterv1.Machine - expectCondition metav1.Condition + name string + machineDeployment *clusterv1.MachineDeployment + machines []*clusterv1.Machine + getMachinesSucceeded bool + expectCondition metav1.Condition }{ { - name: "get machines failed", - machineDeployment: &clusterv1.MachineDeployment{}, - machines: nil, + name: "get machines failed", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: nil, + getMachinesSucceeded: false, expectCondition: metav1.Condition{ Type: clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition, Status: metav1.ConditionUnknown, @@ -730,9 +744,10 @@ func Test_setMachinesUpToDateCondition(t *testing.T) { }, }, { - name: "no machines", - machineDeployment: &clusterv1.MachineDeployment{}, - machines: []*clusterv1.Machine{}, + name: "no machines", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: []*clusterv1.Machine{}, + getMachinesSucceeded: true, expectCondition: metav1.Condition{ Type: clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition, Status: metav1.ConditionTrue, @@ -750,6 +765,7 @@ func Test_setMachinesUpToDateCondition(t *testing.T) { Reason: "some-reason-1", })), }, + getMachinesSucceeded: true, expectCondition: metav1.Condition{ Type: clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition, Status: metav1.ConditionTrue, @@ -768,6 +784,7 @@ func Test_setMachinesUpToDateCondition(t *testing.T) { Message: "some unknown message", })), }, + getMachinesSucceeded: true, expectCondition: metav1.Condition{ Type: clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition, Status: metav1.ConditionUnknown, @@ -786,6 +803,7 @@ func Test_setMachinesUpToDateCondition(t *testing.T) { Message: "some not up-to-date message", })), }, + getMachinesSucceeded: true, expectCondition: metav1.Condition{ Type: clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition, Status: metav1.ConditionFalse, @@ -799,6 +817,7 @@ func Test_setMachinesUpToDateCondition(t *testing.T) { machines: []*clusterv1.Machine{ fakeMachine("no-condition-machine-1"), }, + getMachinesSucceeded: true, expectCondition: metav1.Condition{ Type: clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition, Status: metav1.ConditionUnknown, @@ -835,6 +854,7 @@ func Test_setMachinesUpToDateCondition(t *testing.T) { fakeMachine("no-condition-machine-1"), fakeMachine("no-condition-machine-2"), }, + getMachinesSucceeded: true, expectCondition: metav1.Condition{ Type: clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition, Status: metav1.ConditionFalse, @@ -851,7 +871,7 @@ func Test_setMachinesUpToDateCondition(t *testing.T) { if tt.machines != nil { machines = collections.FromMachines(tt.machines...) } - setMachinesUpToDateCondition(ctx, tt.machineDeployment, machines) + setMachinesUpToDateCondition(ctx, tt.machineDeployment, machines, tt.getMachinesSucceeded) condition := v1beta2conditions.Get(tt.machineDeployment, clusterv1.MachineDeploymentMachinesUpToDateV1Beta2Condition) g.Expect(condition).ToNot(BeNil()) @@ -867,15 +887,17 @@ func Test_setRemediatingCondition(t *testing.T) { ownerRemediatedV1Beta2 := metav1.Condition{Type: clusterv1.MachineOwnerRemediatedV1Beta2Condition, Status: metav1.ConditionFalse, Message: "Remediation in progress"} tests := []struct { - name string - machineDeployment *clusterv1.MachineDeployment - machines []*clusterv1.Machine - expectCondition metav1.Condition + name string + machineDeployment *clusterv1.MachineDeployment + machines []*clusterv1.Machine + getMachinesSucceeded bool + expectCondition metav1.Condition }{ { - name: "get machines failed", - machineDeployment: &clusterv1.MachineDeployment{}, - machines: nil, + name: "get machines failed", + machineDeployment: &clusterv1.MachineDeployment{}, + machines: nil, + getMachinesSucceeded: false, expectCondition: metav1.Condition{ Type: clusterv1.MachineDeploymentRemediatingV1Beta2Condition, Status: metav1.ConditionUnknown, @@ -890,6 +912,7 @@ func Test_setRemediatingCondition(t *testing.T) { fakeMachine("m1"), fakeMachine("m2"), }, + getMachinesSucceeded: true, expectCondition: metav1.Condition{ Type: clusterv1.MachineDeploymentRemediatingV1Beta2Condition, Status: metav1.ConditionFalse, @@ -897,13 +920,14 @@ func Test_setRemediatingCondition(t *testing.T) { }, }, { - name: "With machines to be remediated by KCP", + name: "With machines to be remediated by MD/MS", machineDeployment: &clusterv1.MachineDeployment{}, machines: []*clusterv1.Machine{ fakeMachine("m1", withConditions(healthCheckSucceeded)), // Healthy machine fakeMachine("m2", withConditions(healthCheckNotSucceeded)), // Unhealthy machine, not yet marked for remediation fakeMachine("m3", withConditions(healthCheckNotSucceeded, ownerRemediated), withV1Beta2Condition(ownerRemediatedV1Beta2)), }, + getMachinesSucceeded: true, expectCondition: metav1.Condition{ Type: clusterv1.MachineDeploymentRemediatingV1Beta2Condition, Status: metav1.ConditionTrue, @@ -912,33 +936,35 @@ func Test_setRemediatingCondition(t *testing.T) { }, }, { - name: "With one unhealthy machine not to be remediated by KCP", + name: "With one unhealthy machine not to be remediated by MD/MS", machineDeployment: &clusterv1.MachineDeployment{}, machines: []*clusterv1.Machine{ fakeMachine("m1", withConditions(healthCheckSucceeded)), // Healthy machine fakeMachine("m2", withConditions(healthCheckNotSucceeded)), // Unhealthy machine, not yet marked for remediation fakeMachine("m3", withConditions(healthCheckSucceeded)), // Healthy machine }, + getMachinesSucceeded: true, expectCondition: metav1.Condition{ Type: clusterv1.MachineDeploymentRemediatingV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.MachineDeploymentNotRemediatingV1Beta2Reason, - Message: "Machine m2 is not healthy (not to be remediated by KCP)", + Message: "Machine m2 is not healthy (not to be remediated by MachineDeployment/MachineSet)", }, }, { - name: "With two unhealthy machine not to be remediated by KCP", + name: "With two unhealthy machine not to be remediated by MD/MS", machineDeployment: &clusterv1.MachineDeployment{}, machines: []*clusterv1.Machine{ fakeMachine("m1", withConditions(healthCheckNotSucceeded)), // Unhealthy machine, not yet marked for remediation fakeMachine("m2", withConditions(healthCheckNotSucceeded)), // Unhealthy machine, not yet marked for remediation fakeMachine("m3", withConditions(healthCheckSucceeded)), // Healthy machine }, + getMachinesSucceeded: true, expectCondition: metav1.Condition{ Type: clusterv1.MachineDeploymentRemediatingV1Beta2Condition, Status: metav1.ConditionFalse, Reason: clusterv1.MachineDeploymentNotRemediatingV1Beta2Reason, - Message: "Machines m1, m2 are not healthy (not to be remediated by KCP)", + Message: "Machines m1, m2 are not healthy (not to be remediated by MachineDeployment/MachineSet)", }, }, } @@ -947,12 +973,12 @@ func Test_setRemediatingCondition(t *testing.T) { g := NewWithT(t) var machinesToBeRemediated, unHealthyMachines collections.Machines - if tt.machines != nil { + if tt.getMachinesSucceeded { machines := collections.FromMachines(tt.machines...) machinesToBeRemediated = machines.Filter(collections.IsUnhealthyAndOwnerRemediated) unHealthyMachines = machines.Filter(collections.IsUnhealthy) } - setRemediatingCondition(ctx, tt.machineDeployment, machinesToBeRemediated, unHealthyMachines) + setRemediatingCondition(ctx, tt.machineDeployment, machinesToBeRemediated, unHealthyMachines, tt.getMachinesSucceeded) condition := v1beta2conditions.Get(tt.machineDeployment, clusterv1.MachineDeploymentRemediatingV1Beta2Condition) g.Expect(condition).ToNot(BeNil()) @@ -966,16 +992,18 @@ func Test_setDeletingCondition(t *testing.T) { name string machineDeployment *clusterv1.MachineDeployment machineSets []*clusterv1.MachineSet - machines []*clusterv1.Machine getAndAdoptMachineSetsForDeploymentSucceeded bool + machines []*clusterv1.Machine + getMachinesSucceeded bool expectCondition metav1.Condition }{ { name: "get machine sets failed", machineDeployment: &clusterv1.MachineDeployment{}, machineSets: nil, - machines: []*clusterv1.Machine{}, getAndAdoptMachineSetsForDeploymentSucceeded: false, + machines: []*clusterv1.Machine{}, + getMachinesSucceeded: true, expectCondition: metav1.Condition{ Type: clusterv1.MachineDeploymentDeletingV1Beta2Condition, Status: metav1.ConditionUnknown, @@ -987,8 +1015,9 @@ func Test_setDeletingCondition(t *testing.T) { name: "get machines failed", machineDeployment: &clusterv1.MachineDeployment{}, machineSets: []*clusterv1.MachineSet{}, - machines: nil, getAndAdoptMachineSetsForDeploymentSucceeded: true, + machines: nil, + getMachinesSucceeded: false, expectCondition: metav1.Condition{ Type: clusterv1.MachineDeploymentDeletingV1Beta2Condition, Status: metav1.ConditionUnknown, @@ -1000,8 +1029,9 @@ func Test_setDeletingCondition(t *testing.T) { name: "not deleting", machineDeployment: &clusterv1.MachineDeployment{}, machineSets: []*clusterv1.MachineSet{}, - machines: []*clusterv1.Machine{}, getAndAdoptMachineSetsForDeploymentSucceeded: true, + machines: []*clusterv1.Machine{}, + getMachinesSucceeded: true, expectCondition: metav1.Condition{ Type: clusterv1.MachineDeploymentDeletingV1Beta2Condition, Status: metav1.ConditionFalse, @@ -1012,47 +1042,50 @@ func Test_setDeletingCondition(t *testing.T) { name: "Deleting with still some machine", machineDeployment: &clusterv1.MachineDeployment{ObjectMeta: metav1.ObjectMeta{DeletionTimestamp: &metav1.Time{Time: time.Now()}}}, machineSets: []*clusterv1.MachineSet{ - fakeMachineSet("ms1", withSpecReplicas(1)), + fakeMachineSet("ms1", withStatusReplicas(1)), }, + getAndAdoptMachineSetsForDeploymentSucceeded: true, machines: []*clusterv1.Machine{ fakeMachine("m1"), }, - getAndAdoptMachineSetsForDeploymentSucceeded: true, + getMachinesSucceeded: true, expectCondition: metav1.Condition{ Type: clusterv1.MachineDeploymentDeletingV1Beta2Condition, - Status: metav1.ConditionFalse, + Status: metav1.ConditionTrue, Reason: clusterv1.MachineDeploymentDeletingDeletionTimestampSetV1Beta2Reason, - Message: "Deleting 1 replicas", + Message: "Deleting 1 Machine", }, }, { name: "Deleting with still some stale machine", machineDeployment: &clusterv1.MachineDeployment{ObjectMeta: metav1.ObjectMeta{DeletionTimestamp: &metav1.Time{Time: time.Now()}}}, machineSets: []*clusterv1.MachineSet{ - fakeMachineSet("ms1", withSpecReplicas(1)), + fakeMachineSet("ms1", withStatusReplicas(1)), }, + getAndAdoptMachineSetsForDeploymentSucceeded: true, machines: []*clusterv1.Machine{ fakeMachine("m1", withStaleDeletion()), }, - getAndAdoptMachineSetsForDeploymentSucceeded: true, + getMachinesSucceeded: true, expectCondition: metav1.Condition{ Type: clusterv1.MachineDeploymentDeletingV1Beta2Condition, - Status: metav1.ConditionFalse, + Status: metav1.ConditionTrue, Reason: clusterv1.MachineDeploymentDeletingDeletionTimestampSetV1Beta2Reason, - Message: "Deleting 1 replicas and Machine m1 is in deletion since more than 30m", + Message: "Deleting 1 Machine and Machine m1 is in deletion since more than 30m", }, }, { name: "Deleting with no machines and a machine set still around", machineDeployment: &clusterv1.MachineDeployment{ObjectMeta: metav1.ObjectMeta{DeletionTimestamp: &metav1.Time{Time: time.Now()}}}, machineSets: []*clusterv1.MachineSet{ - fakeMachineSet("ms1", withSpecReplicas(1)), + fakeMachineSet("ms1", withStatusReplicas(1)), }, - machines: []*clusterv1.Machine{}, getAndAdoptMachineSetsForDeploymentSucceeded: true, + machines: []*clusterv1.Machine{}, + getMachinesSucceeded: true, expectCondition: metav1.Condition{ Type: clusterv1.MachineDeploymentDeletingV1Beta2Condition, - Status: metav1.ConditionFalse, + Status: metav1.ConditionTrue, Reason: clusterv1.MachineDeploymentDeletingDeletionTimestampSetV1Beta2Reason, Message: "Deleting 1 MachineSets", }, @@ -1066,7 +1099,7 @@ func Test_setDeletingCondition(t *testing.T) { if tt.machines != nil { machines = collections.FromMachines(tt.machines...) } - setDeletingCondition(ctx, tt.machineDeployment, tt.machineSets, machines, tt.getAndAdoptMachineSetsForDeploymentSucceeded) + setDeletingCondition(ctx, tt.machineDeployment, tt.machineSets, machines, tt.getAndAdoptMachineSetsForDeploymentSucceeded, tt.getMachinesSucceeded) condition := v1beta2conditions.Get(tt.machineDeployment, clusterv1.MachineDeploymentDeletingV1Beta2Condition) g.Expect(condition).ToNot(BeNil()) @@ -1089,9 +1122,9 @@ func fakeMachineSet(name string, options ...fakeMachineSetOption) *clusterv1.Mac return p } -func withSpecReplicas(n int32) fakeMachineSetOption { +func withStatusReplicas(n int32) fakeMachineSetOption { return func(ms *clusterv1.MachineSet) { - ms.Spec.Replicas = &n + ms.Status.Replicas = n } } @@ -1147,7 +1180,7 @@ func withV1Beta2Condition(c metav1.Condition) fakeMachinesOption { if m.Status.V1Beta2 == nil { m.Status.V1Beta2 = &clusterv1.MachineV1Beta2Status{} } - meta.SetStatusCondition(&m.Status.V1Beta2.Conditions, c) + v1beta2conditions.Set(m, c) } } diff --git a/internal/controllers/machinedeployment/machinedeployment_sync.go b/internal/controllers/machinedeployment/machinedeployment_sync.go index db3ad69da27b..a2da7824b233 100644 --- a/internal/controllers/machinedeployment/machinedeployment_sync.go +++ b/internal/controllers/machinedeployment/machinedeployment_sync.go @@ -45,8 +45,8 @@ import ( // sync is responsible for reconciling deployments on scaling events or when they // are paused. -func (r *Reconciler) sync(ctx context.Context, md *clusterv1.MachineDeployment, msList []*clusterv1.MachineSet) error { - newMS, oldMSs, err := r.getAllMachineSetsAndSyncRevision(ctx, md, msList, false) +func (r *Reconciler) sync(ctx context.Context, md *clusterv1.MachineDeployment, msList []*clusterv1.MachineSet, templateExists bool) error { + newMS, oldMSs, err := r.getAllMachineSetsAndSyncRevision(ctx, md, msList, false, templateExists) if err != nil { return err } @@ -76,7 +76,7 @@ func (r *Reconciler) sync(ctx context.Context, md *clusterv1.MachineDeployment, // // Note that currently the deployment controller is using caches to avoid querying the server for reads. // This may lead to stale reads of machine sets, thus incorrect deployment status. -func (r *Reconciler) getAllMachineSetsAndSyncRevision(ctx context.Context, md *clusterv1.MachineDeployment, msList []*clusterv1.MachineSet, createIfNotExisted bool) (*clusterv1.MachineSet, []*clusterv1.MachineSet, error) { +func (r *Reconciler) getAllMachineSetsAndSyncRevision(ctx context.Context, md *clusterv1.MachineDeployment, msList []*clusterv1.MachineSet, createIfNotExisted, templateExists bool) (*clusterv1.MachineSet, []*clusterv1.MachineSet, error) { reconciliationTime := metav1.Now() allOldMSs, err := mdutil.FindOldMachineSets(md, msList, &reconciliationTime) if err != nil { @@ -84,7 +84,7 @@ func (r *Reconciler) getAllMachineSetsAndSyncRevision(ctx context.Context, md *c } // Get new machine set with the updated revision number - newMS, err := r.getNewMachineSet(ctx, md, msList, allOldMSs, createIfNotExisted, &reconciliationTime) + newMS, err := r.getNewMachineSet(ctx, md, msList, allOldMSs, createIfNotExisted, templateExists, &reconciliationTime) if err != nil { return nil, nil, err } @@ -95,7 +95,7 @@ func (r *Reconciler) getAllMachineSetsAndSyncRevision(ctx context.Context, md *c // Returns a MachineSet that matches the intent of the given MachineDeployment. // If there does not exist such a MachineSet and createIfNotExisted is true, create a new MachineSet. // If there is already such a MachineSet, update it to propagate in-place mutable fields from the MachineDeployment. -func (r *Reconciler) getNewMachineSet(ctx context.Context, md *clusterv1.MachineDeployment, msList, oldMSs []*clusterv1.MachineSet, createIfNotExists bool, reconciliationTime *metav1.Time) (*clusterv1.MachineSet, error) { +func (r *Reconciler) getNewMachineSet(ctx context.Context, md *clusterv1.MachineDeployment, msList, oldMSs []*clusterv1.MachineSet, createIfNotExists, templateExists bool, reconciliationTime *metav1.Time) (*clusterv1.MachineSet, error) { // Try to find a MachineSet which matches the MachineDeployments intent, while ignore diffs between // the in-place mutable fields. // If we find a matching MachineSet we just update it to propagate any changes to the in-place mutable @@ -125,6 +125,10 @@ func (r *Reconciler) getNewMachineSet(ctx context.Context, md *clusterv1.Machine return nil, nil } + if !templateExists { + return nil, errors.New("cannot create a new MachineSet when templates do not exist") + } + // Create a new MachineSet and wait until the new MachineSet exists in the cache. newMS, err := r.createMachineSetAndWait(ctx, md, oldMSs, createReason) if err != nil { diff --git a/internal/controllers/machinedeployment/mdutil/util.go b/internal/controllers/machinedeployment/mdutil/util.go index 7e60ffa016a7..21b119a1861e 100644 --- a/internal/controllers/machinedeployment/mdutil/util.go +++ b/internal/controllers/machinedeployment/mdutil/util.go @@ -547,6 +547,7 @@ func GetAvailableReplicaCountForMachineSets(machineSets []*clusterv1.MachineSet) } // GetV1Beta2ReadyReplicaCountForMachineSets returns the number of ready machines corresponding to the given machine sets. +// Note: When none of the ms.Status.V1Beta2.ReadyReplicas are set, the func returns nil. func GetV1Beta2ReadyReplicaCountForMachineSets(machineSets []*clusterv1.MachineSet) *int32 { var totalReadyReplicas *int32 for _, ms := range machineSets { @@ -558,6 +559,7 @@ func GetV1Beta2ReadyReplicaCountForMachineSets(machineSets []*clusterv1.MachineS } // GetV1Beta2AvailableReplicaCountForMachineSets returns the number of available machines corresponding to the given machine sets. +// Note: When none of the ms.Status.V1Beta2.AvailableReplicas are set, the func returns nil. func GetV1Beta2AvailableReplicaCountForMachineSets(machineSets []*clusterv1.MachineSet) *int32 { var totalAvailableReplicas *int32 for _, ms := range machineSets { @@ -569,6 +571,7 @@ func GetV1Beta2AvailableReplicaCountForMachineSets(machineSets []*clusterv1.Mach } // GetV1Beta2UptoDateReplicaCountForMachineSets returns the number of up to date machines corresponding to the given machine sets. +// Note: When none of the ms.Status.V1Beta2.UpToDateReplicas are set, the func returns nil. func GetV1Beta2UptoDateReplicaCountForMachineSets(machineSets []*clusterv1.MachineSet) *int32 { var totalUpToDateReplicas *int32 for _, ms := range machineSets { diff --git a/internal/controllers/machineset/machineset_controller_status.go b/internal/controllers/machineset/machineset_controller_status.go index a47aee17777d..72309c4b98be 100644 --- a/internal/controllers/machineset/machineset_controller_status.go +++ b/internal/controllers/machineset/machineset_controller_status.go @@ -296,6 +296,10 @@ func calculateMissingReferencesMessage(ms *clusterv1.MachineSet, bootstrapTempla } func aggregateStaleMachines(machines []*clusterv1.Machine) string { + if len(machines) == 0 { + return "" + } + machineNames := []string{} for _, machine := range machines { if !machine.GetDeletionTimestamp().IsZero() && time.Since(machine.GetDeletionTimestamp().Time) > time.Minute*30 {