From a237f2fa2fbd3983203b3f5d6b941d3d2ff898dc Mon Sep 17 00:00:00 2001 From: Todd Short Date: Thu, 22 Feb 2024 09:53:06 -0500 Subject: [PATCH] Fix finalizer processing There are two reconcilers for the CSV, one is controller-runtime based, the other is based on queueinformer. A finalizer was added to the CSV and it's handled by the controller-runtime reconciler. However, the queueinformer-based reconciler may take a while to do its processing such that the CSV may be deleted and the finalizer run via the controller-runtime reconciler. (This still takes <1s.) This causes problems when CSV being synced is deleted. The queueinformer attempts to sync RBAC to the stale CSV. The proper (BIG/risky) fix is to consolidate the two reconcilers. A less risky fix is to move the finalizer to the queueinformer reconciler and query the lister cache to see if the CSV has been deleted, and not do the RBAC updates if that's the case. Signed-off-by: Todd Short --- pkg/controller/operators/olm/operator.go | 112 ++++++++++++++++- pkg/controller/operators/olm/operator_test.go | 5 + .../operatorconditiongenerator_controller.go | 118 ------------------ test/e2e/installplan_e2e_test.go | 2 +- 4 files changed, 117 insertions(+), 120 deletions(-) diff --git a/pkg/controller/operators/olm/operator.go b/pkg/controller/operators/olm/operator.go index ee75f84c0f..b7f9c26ffa 100644 --- a/pkg/controller/operators/olm/operator.go +++ b/pkg/controller/operators/olm/operator.go @@ -36,6 +36,8 @@ import ( apiregistrationv1 "k8s.io/kube-aggregator/pkg/apis/apiregistration/v1" kagg "k8s.io/kube-aggregator/pkg/client/informers/externalversions" utilclock "k8s.io/utils/clock" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" operatorsv1 "github.com/operator-framework/api/pkg/operators/v1" "github.com/operator-framework/api/pkg/operators/v1alpha1" @@ -1093,6 +1095,9 @@ func (a *Operator) handleClusterServiceVersionDeletion(obj interface{}) { "phase": clusterServiceVersion.Status.Phase, }) + logger.Debug("start deleting CSV") + defer logger.Debug("end deleting CSV") + metrics.DeleteCSVMetric(clusterServiceVersion) if clusterServiceVersion.IsCopied() { @@ -1277,6 +1282,96 @@ func (a *Operator) deleteChild(csv *metav1.PartialObjectMetadata, logger *logrus return a.client.OperatorsV1alpha1().ClusterServiceVersions(csv.GetNamespace()).Delete(context.TODO(), csv.GetName(), metav1.DeleteOptions{}) } +// Return values, err, ok; ok == true: continue Reconcile, ok == false: exit Reconcile +func (a *Operator) processFinalizer(csv *v1alpha1.ClusterServiceVersion, log *logrus.Entry) (error, bool) { + myFinalizerName := "operators.coreos.com/csv-cleanup" + + if csv.ObjectMeta.DeletionTimestamp.IsZero() { + // CSV is not being deleted, add finalizer if not present + if !controllerutil.ContainsFinalizer(csv, myFinalizerName) { + controllerutil.AddFinalizer(csv, myFinalizerName) + _, err := a.client.OperatorsV1alpha1().ClusterServiceVersions(csv.GetNamespace()).Update(a.ctx, csv, metav1.UpdateOptions{}) + if err != nil { + log.WithError(err).Error("Adding finalizer") + return err, false + } + } + return nil, true + } + + if !controllerutil.ContainsFinalizer(csv, myFinalizerName) { + // Finalizer has been removed; stop reconciliation as the CSV is being deleted + return nil, false + } + + log.Info("started finalizer") + defer log.Info("completed finalizer") + + // CSV is being deleted and the finalizer still present; do any clean up + ownerSelector := ownerutil.CSVOwnerSelector(csv) + listOptions := metav1.ListOptions{ + LabelSelector: ownerSelector.String(), + } + deleteOptions := metav1.DeleteOptions{} + // Look for resources owned by this CSV, and delete them. + log.WithFields(logrus.Fields{"selector": ownerSelector}).Info("Cleaning up resources after CSV deletion") + var errs []error + + err := a.opClient.KubernetesInterface().RbacV1().ClusterRoleBindings().DeleteCollection(a.ctx, deleteOptions, listOptions) + if client.IgnoreNotFound(err) != nil { + log.WithError(err).Error("Deleting ClusterRoleBindings on CSV delete") + errs = append(errs, err) + } + + err = a.opClient.KubernetesInterface().RbacV1().ClusterRoles().DeleteCollection(a.ctx, deleteOptions, listOptions) + if client.IgnoreNotFound(err) != nil { + log.WithError(err).Error("Deleting ClusterRoles on CSV delete") + errs = append(errs, err) + } + err = a.opClient.KubernetesInterface().AdmissionregistrationV1().MutatingWebhookConfigurations().DeleteCollection(a.ctx, deleteOptions, listOptions) + if client.IgnoreNotFound(err) != nil { + log.WithError(err).Error("Deleting MutatingWebhookConfigurations on CSV delete") + errs = append(errs, err) + } + + err = a.opClient.KubernetesInterface().AdmissionregistrationV1().ValidatingWebhookConfigurations().DeleteCollection(a.ctx, deleteOptions, listOptions) + if client.IgnoreNotFound(err) != nil { + log.WithError(err).Error("Deleting ValidatingWebhookConfigurations on CSV delete") + errs = append(errs, err) + } + + // Make sure things are deleted + crbList, err := a.lister.RbacV1().ClusterRoleBindingLister().List(ownerSelector) + if err != nil { + errs = append(errs, err) + } else if len(crbList) != 0 { + errs = append(errs, fmt.Errorf("waiting for ClusterRoleBindings to delete")) + } + + crList, err := a.lister.RbacV1().ClusterRoleLister().List(ownerSelector) + if err != nil { + errs = append(errs, err) + } else if len(crList) != 0 { + errs = append(errs, fmt.Errorf("waiting for ClusterRoles to delete")) + } + + // Return any errors + if err := utilerrors.NewAggregate(errs); err != nil { + return err, false + } + + // If no errors, remove our finalizer from the CSV and update + controllerutil.RemoveFinalizer(csv, myFinalizerName) + _, err = a.client.OperatorsV1alpha1().ClusterServiceVersions(csv.GetNamespace()).Update(a.ctx, csv, metav1.UpdateOptions{}) + if err != nil { + log.WithError(err).Error("Removing finalizer") + return err, false + } + + // Stop reconciliation as the csv is being deleted + return nil, false +} + // syncClusterServiceVersion is the method that gets called when we see a CSV event in the cluster func (a *Operator) syncClusterServiceVersion(obj interface{}) (syncError error) { clusterServiceVersion, ok := obj.(*v1alpha1.ClusterServiceVersion) @@ -1291,7 +1386,22 @@ func (a *Operator) syncClusterServiceVersion(obj interface{}) (syncError error) "namespace": clusterServiceVersion.GetNamespace(), "phase": clusterServiceVersion.Status.Phase, }) - logger.Debug("syncing CSV") + logger.Debug("start syncing CSV") + defer logger.Debug("end syncing CSV") + + // get an up-to-date clusterServiceVersion from the cache + clusterServiceVersion, err := a.lister.OperatorsV1alpha1().ClusterServiceVersionLister().ClusterServiceVersions(clusterServiceVersion.GetNamespace()).Get(clusterServiceVersion.GetName()) + if apierrors.IsNotFound(err) { + logger.Info("CSV has beeen deleted") + return nil + } else if err != nil { + logger.Info("Error getting latest version of CSV") + return err + } + + if err, ok := a.processFinalizer(clusterServiceVersion, logger); !ok { + return err + } if a.csvNotification != nil { a.csvNotification.OnAddOrUpdate(clusterServiceVersion) diff --git a/pkg/controller/operators/olm/operator_test.go b/pkg/controller/operators/olm/operator_test.go index 538408db34..2efe59a413 100644 --- a/pkg/controller/operators/olm/operator_test.go +++ b/pkg/controller/operators/olm/operator_test.go @@ -5832,6 +5832,9 @@ func RequireObjectsInCache(t *testing.T, lister operatorlister.OperatorLister, n fetched, err = lister.RbacV1().RoleBindingLister().RoleBindings(namespace).Get(o.GetName()) case *v1alpha1.ClusterServiceVersion: fetched, err = lister.OperatorsV1alpha1().ClusterServiceVersionLister().ClusterServiceVersions(namespace).Get(o.GetName()) + // We don't care about finalizers + object.(*v1alpha1.ClusterServiceVersion).Finalizers = nil + fetched.(*v1alpha1.ClusterServiceVersion).Finalizers = nil case *operatorsv1.OperatorGroup: fetched, err = lister.OperatorsV1().OperatorGroupLister().OperatorGroups(namespace).Get(o.GetName()) default: @@ -5885,6 +5888,8 @@ func CheckObjectsInNamespace(t *testing.T, opClient operatorclient.ClientInterfa // and this will still check that the final state is correct object.(*v1alpha1.ClusterServiceVersion).Status.Conditions = nil fetched.(*v1alpha1.ClusterServiceVersion).Status.Conditions = nil + object.(*v1alpha1.ClusterServiceVersion).Finalizers = nil + fetched.(*v1alpha1.ClusterServiceVersion).Finalizers = nil case *operatorsv1.OperatorGroup: name = o.GetName() fetched, err = client.OperatorsV1().OperatorGroups(namespace).Get(context.TODO(), o.GetName(), metav1.GetOptions{}) diff --git a/pkg/controller/operators/operatorconditiongenerator_controller.go b/pkg/controller/operators/operatorconditiongenerator_controller.go index 1d1963c888..75bd847b88 100644 --- a/pkg/controller/operators/operatorconditiongenerator_controller.go +++ b/pkg/controller/operators/operatorconditiongenerator_controller.go @@ -2,20 +2,15 @@ package operators import ( "context" - "fmt" "reflect" "github.com/go-logr/logr" - admissionregistrationv1 "k8s.io/api/admissionregistration/v1" - rbacv1 "k8s.io/api/rbac/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" - utilerrors "k8s.io/apimachinery/pkg/util/errors" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/predicate" @@ -100,10 +95,6 @@ func (r *OperatorConditionGeneratorReconciler) Reconcile(ctx context.Context, re return ctrl.Result{}, client.IgnoreNotFound(err) } - if err, ok := r.processFinalizer(ctx, in); !ok { - return ctrl.Result{}, err - } - operatorCondition := &operatorsv2.OperatorCondition{ ObjectMeta: metav1.ObjectMeta{ // For now, only generate an OperatorCondition with the same name as the csv. @@ -179,112 +170,3 @@ func (r *OperatorConditionGeneratorReconciler) ensureOperatorCondition(operatorC existingOperatorCondition.Spec.ServiceAccounts = operatorCondition.Spec.ServiceAccounts return r.Client.Update(context.TODO(), existingOperatorCondition) } - -// Return values, err, ok; ok == true: continue Reconcile, ok == false: exit Reconcile -func (r *OperatorConditionGeneratorReconciler) processFinalizer(ctx context.Context, csv *operatorsv1alpha1.ClusterServiceVersion) (error, bool) { - myFinalizerName := "operators.coreos.com/csv-cleanup" - log := r.log.WithValues("name", csv.GetName()).WithValues("namespace", csv.GetNamespace()) - - if csv.ObjectMeta.DeletionTimestamp.IsZero() { - // CSV is not being deleted, add finalizer if not present - if !controllerutil.ContainsFinalizer(csv, myFinalizerName) { - patch := csv.DeepCopy() - controllerutil.AddFinalizer(patch, myFinalizerName) - if err := r.Client.Patch(ctx, patch, client.MergeFrom(csv)); err != nil { - log.Error(err, "Adding finalizer") - return err, false - } - } - return nil, true - } - - if !controllerutil.ContainsFinalizer(csv, myFinalizerName) { - // Finalizer has been removed; stop reconciliation as the CSV is being deleted - return nil, false - } - - // CSV is being deleted and the finalizer still present; do any clean up - ownerSelector := ownerutil.CSVOwnerSelector(csv) - listOptions := client.ListOptions{ - LabelSelector: ownerSelector, - } - deleteOptions := client.DeleteAllOfOptions{ - ListOptions: listOptions, - } - // Look for resources owned by this CSV, and delete them. - log.WithValues("selector", ownerSelector).Info("Cleaning up resources after CSV deletion") - var errs []error - - err := r.Client.DeleteAllOf(ctx, &rbacv1.ClusterRoleBinding{}, &deleteOptions) - if client.IgnoreNotFound(err) != nil { - log.Error(err, "Deleting ClusterRoleBindings on CSV delete") - errs = append(errs, err) - } - - err = r.Client.DeleteAllOf(ctx, &rbacv1.ClusterRole{}, &deleteOptions) - if client.IgnoreNotFound(err) != nil { - log.Error(err, "Deleting ClusterRoles on CSV delete") - errs = append(errs, err) - } - - err = r.Client.DeleteAllOf(ctx, &admissionregistrationv1.MutatingWebhookConfiguration{}, &deleteOptions) - if client.IgnoreNotFound(err) != nil { - log.Error(err, "Deleting MutatingWebhookConfigurations on CSV delete") - errs = append(errs, err) - } - - err = r.Client.DeleteAllOf(ctx, &admissionregistrationv1.ValidatingWebhookConfiguration{}, &deleteOptions) - if client.IgnoreNotFound(err) != nil { - log.Error(err, "Deleting ValidatingWebhookConfigurations on CSV delete") - errs = append(errs, err) - } - - // Make sure things are deleted - crbList := &rbacv1.ClusterRoleBindingList{} - err = r.Client.List(ctx, crbList, &listOptions) - if err != nil { - errs = append(errs, err) - } else if len(crbList.Items) != 0 { - errs = append(errs, fmt.Errorf("waiting for ClusterRoleBindings to delete")) - } - - crList := &rbacv1.ClusterRoleList{} - err = r.Client.List(ctx, crList, &listOptions) - if err != nil { - errs = append(errs, err) - } else if len(crList.Items) != 0 { - errs = append(errs, fmt.Errorf("waiting for ClusterRoles to delete")) - } - - mwcList := &admissionregistrationv1.MutatingWebhookConfigurationList{} - err = r.Client.List(ctx, mwcList, &listOptions) - if err != nil { - errs = append(errs, err) - } else if len(mwcList.Items) != 0 { - errs = append(errs, fmt.Errorf("waiting for MutatingWebhookConfigurations to delete")) - } - - vwcList := &admissionregistrationv1.ValidatingWebhookConfigurationList{} - err = r.Client.List(ctx, vwcList, &listOptions) - if err != nil { - errs = append(errs, err) - } else if len(vwcList.Items) != 0 { - errs = append(errs, fmt.Errorf("waiting for ValidatingWebhookConfigurations to delete")) - } - - // Return any errors - if err := utilerrors.NewAggregate(errs); err != nil { - return err, false - } - - // If no errors, remove our finalizer from the CSV and update - patch := csv.DeepCopy() - controllerutil.RemoveFinalizer(patch, myFinalizerName) - if err := r.Client.Patch(ctx, patch, client.MergeFrom(csv)); err != nil { - log.Error(err, "Removing finalizer") - return err, false - } - - // Stop reconciliation as the csv is being deleted - return nil, false -} diff --git a/test/e2e/installplan_e2e_test.go b/test/e2e/installplan_e2e_test.go index f3c8732f9f..da683845e3 100644 --- a/test/e2e/installplan_e2e_test.go +++ b/test/e2e/installplan_e2e_test.go @@ -2925,7 +2925,7 @@ var _ = Describe("Install Plan", func() { } return true - }, pollDuration*2, pollInterval).Should(BeTrue()) + }, pollDuration, pollInterval).Should(BeTrue()) By("Cleaning up the test") })