Skip to content

Commit 9437498

Browse files
authored
Cleanup conversion webhooks when an operator is uninstalled (#2832)
Problem: When uninstalling a CSV, OLM has always avoided deleting the associated CRD as all CRs on cluster are subsequently deleted, possibly resulting in user dataloss. OLM supports defining conversion webhooks within the CSV. On cluster, conversion webhooks are defined with a CRD and point to a service that handles conversion. If the service is unable to fulfill the request, all requests against the CRs associated with the CRD will fail. When uninstalling a CSV, OLM does not remove the conversion webhook from the CRD, meaning that all requests against the CRs associated with the CRD will fail, resulting in at least two concerns: 1. OLM is unable to subsequently reinstall the operator. When installing a CSV, if the CRD already exists and instances of CRs exist as well, OLM performs a series of checks which ensure that none of the CRs are invalidated against the new schema. The existing CRD's conversion webhooks points to a non-existant service, causing the check to fail and preventing installs. 2. Broken conversion webhooks causes kubernete's garbage collection to fail. Solution: When a CSV is deleted, if no CSV exists that is replacing it, set the CRD's conversion strategy to None. Signed-off-by: Alexander Greene <[email protected]>
1 parent f982e2f commit 9437498

File tree

3 files changed

+65
-1
lines changed

3 files changed

+65
-1
lines changed

pkg/controller/operators/olm/operator.go

+41
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
1212
corev1 "k8s.io/api/core/v1"
1313
rbacv1 "k8s.io/api/rbac/v1"
14+
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
1415
extinf "k8s.io/apiextensions-apiserver/pkg/client/informers/externalversions"
1516
apierrors "k8s.io/apimachinery/pkg/api/errors"
1617
"k8s.io/apimachinery/pkg/api/meta"
@@ -1121,6 +1122,46 @@ func (a *Operator) handleClusterServiceVersionDeletion(obj interface{}) {
11211122
logger.WithError(err).Warnf("failed to requeue gc event: %v", webhook)
11221123
}
11231124
}
1125+
1126+
// Conversion webhooks are defined within a CRD.
1127+
// In an effort to prevent customer dataloss, OLM does not delete CRDs associated with a CSV when it is deleted.
1128+
// Deleting a CSV that introduced a conversion webhook removes the deployment that serviced the conversion webhook calls.
1129+
// If a conversion webhook is defined and the service isn't available, all requests against the CR associated with the CRD will fail.
1130+
// This ultimately breaks kubernetes garbage collection and prevents OLM from reinstalling the CSV as CR validation against the new CRD's
1131+
// openapiv3 schema fails.
1132+
// As such, when a CSV is deleted OLM will check if it is being replaced. If the CSV is not being replaced, OLM will remove the conversion
1133+
// webhook from the CRD definition.
1134+
csvs, err := a.lister.OperatorsV1alpha1().ClusterServiceVersionLister().ClusterServiceVersions(clusterServiceVersion.GetNamespace()).List(labels.Everything())
1135+
if err != nil {
1136+
logger.Errorf("error listing csvs: %v\n", err)
1137+
}
1138+
for _, csv := range csvs {
1139+
if csv.Spec.Replaces == clusterServiceVersion.GetName() {
1140+
return
1141+
}
1142+
}
1143+
1144+
for _, desc := range clusterServiceVersion.Spec.WebhookDefinitions {
1145+
if desc.Type != v1alpha1.ConversionWebhook || len(desc.ConversionCRDs) == 0 {
1146+
continue
1147+
}
1148+
1149+
for i, crdName := range desc.ConversionCRDs {
1150+
crd, err := a.lister.APIExtensionsV1().CustomResourceDefinitionLister().Get(crdName)
1151+
if err != nil {
1152+
logger.Errorf("error getting CRD %v which was defined in CSVs spec.WebhookDefinition[%d]: %v\n", crdName, i, err)
1153+
continue
1154+
}
1155+
1156+
copy := crd.DeepCopy()
1157+
copy.Spec.Conversion.Strategy = apiextensionsv1.NoneConverter
1158+
copy.Spec.Conversion.Webhook = nil
1159+
1160+
if _, err = a.opClient.ApiextensionsInterface().ApiextensionsV1().CustomResourceDefinitions().Update(context.TODO(), copy, metav1.UpdateOptions{}); err != nil {
1161+
logger.Errorf("error updating conversion strategy for CRD %v: %v\n", crdName, err)
1162+
}
1163+
}
1164+
}
11241165
}
11251166

11261167
func (a *Operator) removeDanglingChildCSVs(csv *v1alpha1.ClusterServiceVersion) error {

test/e2e/csv_e2e_test.go

+3
Original file line numberDiff line numberDiff line change
@@ -4206,6 +4206,9 @@ func findLastEvent(events *corev1.EventList) (event corev1.Event) {
42064206
func buildCSVCleanupFunc(c operatorclient.ClientInterface, crc versioned.Interface, csv operatorsv1alpha1.ClusterServiceVersion, namespace string, deleteCRDs, deleteAPIServices bool) cleanupFunc {
42074207
return func() {
42084208
err := crc.OperatorsV1alpha1().ClusterServiceVersions(namespace).Delete(context.TODO(), csv.GetName(), metav1.DeleteOptions{})
4209+
if err != nil && apierrors.IsNotFound(err) {
4210+
err = nil
4211+
}
42094212
Expect(err).ShouldNot(HaveOccurred())
42104213

42114214
if deleteCRDs {

test/e2e/webhook_e2e_test.go

+21-1
Original file line numberDiff line numberDiff line change
@@ -639,6 +639,7 @@ var _ = Describe("CSVs with a Webhook", func() {
639639
}).Should(Equal(2))
640640
})
641641
When("Installed from a catalog Source", func() {
642+
const csvName = "webhook-operator.v0.0.1"
642643
var cleanupCSV cleanupFunc
643644
var cleanupCatSrc cleanupFunc
644645
var cleanupSubscription cleanupFunc
@@ -687,7 +688,7 @@ var _ = Describe("CSVs with a Webhook", func() {
687688
defer cleanupSubscription()
688689

689690
// Wait for webhook-operator v2 csv to succeed
690-
csv, err := awaitCSV(crc, source.GetNamespace(), "webhook-operator.v0.0.1", csvSucceededChecker)
691+
csv, err := awaitCSV(crc, source.GetNamespace(), csvName, csvSucceededChecker)
691692
require.NoError(GinkgoT(), err)
692693

693694
cleanupCSV = buildCSVCleanupFunc(c, crc, *csv, source.GetNamespace(), true, true)
@@ -775,6 +776,25 @@ var _ = Describe("CSVs with a Webhook", func() {
775776
require.True(GinkgoT(), ok, "Unable to get spec.conversion.valid of v2 object")
776777
require.True(GinkgoT(), v2SpecConversionMutate)
777778
require.True(GinkgoT(), v2SpecConversionValid)
779+
780+
// Check that conversion strategies are disabled after uninstalling the operator.
781+
err = crc.OperatorsV1alpha1().ClusterServiceVersions(generatedNamespace.GetName()).Delete(context.TODO(), csvName, metav1.DeleteOptions{})
782+
require.NoError(GinkgoT(), err)
783+
784+
Eventually(func() error {
785+
crd, err := c.ApiextensionsInterface().ApiextensionsV1().CustomResourceDefinitions().Get(context.TODO(), "webhooktests.webhook.operators.coreos.io", metav1.GetOptions{})
786+
if err != nil {
787+
return err
788+
}
789+
790+
if crd.Spec.Conversion.Strategy != apiextensionsv1.NoneConverter {
791+
return fmt.Errorf("conversion strategy is not NoneConverter")
792+
}
793+
if crd.Spec.Conversion.Webhook != nil {
794+
return fmt.Errorf("webhook is not nil")
795+
}
796+
return nil
797+
}).Should(Succeed())
778798
})
779799
})
780800
When("WebhookDescription has conversionCRDs field", func() {

0 commit comments

Comments
 (0)