Skip to content

OCPBUGS-31522: Warn and allow CRD upgrade if validation fails but new CRD specifies a conversion strategy #3209

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions pkg/controller/operators/catalog/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -1585,9 +1585,7 @@ func (o *Operator) ensureSubscriptionCSVState(logger *logrus.Entry, sub *v1alpha
}

func (o *Operator) setIPReference(subs []*v1alpha1.Subscription, gen int, installPlanRef *corev1.ObjectReference) []*v1alpha1.Subscription {
var (
lastUpdated = o.now()
)
lastUpdated := o.now()
for _, sub := range subs {
sub.Status.LastUpdated = lastUpdated
if installPlanRef != nil {
Expand Down Expand Up @@ -2204,6 +2202,10 @@ func validateV1Beta1CRDCompatibility(dynamicClient dynamic.Interface, oldCRD *ap
return validateExistingCRs(dynamicClient, gr, validationsMap)
}

type validationError struct {
error
}

// validateExistingCRs lists all CRs for each version entry in validationsMap, then validates each using the paired validation.
func validateExistingCRs(dynamicClient dynamic.Interface, gr schema.GroupResource, validationsMap map[string]*apiextensions.CustomResourceValidation) error {
for version, schemaValidation := range validationsMap {
Expand All @@ -2212,7 +2214,6 @@ func validateExistingCRs(dynamicClient dynamic.Interface, gr schema.GroupResourc
if err != nil {
return fmt.Errorf("error creating validator for schema version %s: %s", version, err)
}

gvr := schema.GroupVersionResource{Group: gr.Group, Version: version, Resource: gr.Resource}
crList, err := dynamicClient.Resource(gvr).List(context.TODO(), metav1.ListOptions{})
if err != nil {
Expand All @@ -2229,7 +2230,7 @@ func validateExistingCRs(dynamicClient dynamic.Interface, gr schema.GroupResourc
} else {
namespacedName = fmt.Sprintf("%s/%s", cr.GetNamespace(), cr.GetName())
}
return fmt.Errorf("error validating %s %q: updated validation is too restrictive: %v", cr.GroupVersionKind(), namespacedName, err)
return validationError{fmt.Errorf("error validating %s %q: updated validation is too restrictive: %v", cr.GroupVersionKind(), namespacedName, err)}
}
}
}
Expand Down Expand Up @@ -2321,7 +2322,7 @@ func (o *Operator) ExecutePlan(plan *v1alpha1.InstallPlan) error {
o.logger.Errorf("failed to get a client for plan execution- %v", err)
return err
}
b := newBuilder(plan, o.lister.OperatorsV1alpha1().ClusterServiceVersionLister(), builderKubeClient, builderDynamicClient, r, o.logger)
b := newBuilder(plan, o.lister.OperatorsV1alpha1().ClusterServiceVersionLister(), builderKubeClient, builderDynamicClient, r, o.logger, o.recorder)

for i, step := range plan.Status.Plan {
if err := func(i int, step *v1alpha1.Step) error {
Expand Down Expand Up @@ -2782,7 +2783,6 @@ func (o *Operator) ExecutePlan(plan *v1alpha1.InstallPlan) error {
func (o *Operator) getExistingAPIOwners(namespace string) (map[string][]string, error) {
// Get a list of CSVs in the namespace
csvList, err := o.client.OperatorsV1alpha1().ClusterServiceVersions(namespace).List(context.TODO(), metav1.ListOptions{})

if err != nil {
return nil, err
}
Expand Down
28 changes: 17 additions & 11 deletions pkg/controller/operators/catalog/operator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,10 @@ func TestSyncInstallPlanUnhappy(t *testing.T) {
testName: "HasSteps/NoOperatorGroup",
err: fmt.Errorf("attenuated service account query failed - no operator group found that is managing this namespace"),
expectedPhase: v1alpha1.InstallPlanPhaseInstalling,
expectedCondition: &v1alpha1.InstallPlanCondition{Type: v1alpha1.InstallPlanInstalled, Status: corev1.ConditionFalse, Reason: v1alpha1.InstallPlanReasonInstallCheckFailed,
Message: "no operator group found that is managing this namespace"},
expectedCondition: &v1alpha1.InstallPlanCondition{
Type: v1alpha1.InstallPlanInstalled, Status: corev1.ConditionFalse, Reason: v1alpha1.InstallPlanReasonInstallCheckFailed,
Message: "no operator group found that is managing this namespace",
},
in: ipWithSteps,
},
{
Expand All @@ -261,8 +263,10 @@ func TestSyncInstallPlanUnhappy(t *testing.T) {
err: fmt.Errorf("attenuated service account query failed - more than one operator group(s) are managing this namespace count=2"),
expectedPhase: v1alpha1.InstallPlanPhaseInstalling,
in: ipWithSteps,
expectedCondition: &v1alpha1.InstallPlanCondition{Type: v1alpha1.InstallPlanInstalled, Status: corev1.ConditionFalse, Reason: v1alpha1.InstallPlanReasonInstallCheckFailed,
Message: "more than one operator group(s) are managing this namespace count=2"},
expectedCondition: &v1alpha1.InstallPlanCondition{
Type: v1alpha1.InstallPlanInstalled, Status: corev1.ConditionFalse, Reason: v1alpha1.InstallPlanReasonInstallCheckFailed,
Message: "more than one operator group(s) are managing this namespace count=2",
},
clientObjs: []runtime.Object{
operatorGroup("og1", "sa", namespace,
&corev1.ObjectReference{
Expand All @@ -283,8 +287,10 @@ func TestSyncInstallPlanUnhappy(t *testing.T) {
testName: "HasSteps/NonExistentServiceAccount",
err: fmt.Errorf("attenuated service account query failed - please make sure the service account exists. sa=sa1 operatorgroup=ns/og"),
expectedPhase: v1alpha1.InstallPlanPhaseInstalling,
expectedCondition: &v1alpha1.InstallPlanCondition{Type: v1alpha1.InstallPlanInstalled, Status: corev1.ConditionFalse, Reason: v1alpha1.InstallPlanReasonInstallCheckFailed,
Message: "please make sure the service account exists. sa=sa1 operatorgroup=ns/og"},
expectedCondition: &v1alpha1.InstallPlanCondition{
Type: v1alpha1.InstallPlanInstalled, Status: corev1.ConditionFalse, Reason: v1alpha1.InstallPlanReasonInstallCheckFailed,
Message: "please make sure the service account exists. sa=sa1 operatorgroup=ns/og",
},
in: ipWithSteps,
clientObjs: []runtime.Object{
operatorGroup("og", "sa1", namespace, nil),
Expand Down Expand Up @@ -1623,7 +1629,7 @@ func TestValidateV1Beta1CRDCompatibility(t *testing.T) {
},
oldCRD: unversionedCRDForV1beta1File("testdata/hivebug/crd.yaml"),
newCRD: unversionedCRDForV1beta1File("testdata/hivebug/crd.yaml"),
want: fmt.Errorf("error validating hive.openshift.io/v1, Kind=MachinePool \"test\": updated validation is too restrictive: [[].spec.clusterDeploymentRef: Invalid value: \"null\": spec.clusterDeploymentRef in body must be of type object: \"null\", [].spec.name: Required value, [].spec.platform: Required value]"),
want: validationError{fmt.Errorf("error validating hive.openshift.io/v1, Kind=MachinePool \"test\": updated validation is too restrictive: [[].spec.clusterDeploymentRef: Invalid value: \"null\": spec.clusterDeploymentRef in body must be of type object: \"null\", [].spec.name: Required value, [].spec.platform: Required value]")},
},
{
name: "backwards incompatible change",
Expand All @@ -1637,7 +1643,7 @@ func TestValidateV1Beta1CRDCompatibility(t *testing.T) {
},
oldCRD: unversionedCRDForV1beta1File("testdata/apiextensionsv1beta1/crd.old.yaml"),
newCRD: unversionedCRDForV1beta1File("testdata/apiextensionsv1beta1/crd.yaml"),
want: fmt.Errorf("error validating cluster.com/v1alpha1, Kind=testcrd \"my-cr-1\": updated validation is too restrictive: [].spec.scalar: Invalid value: 2: spec.scalar in body should be greater than or equal to 3"),
want: validationError{fmt.Errorf("error validating cluster.com/v1alpha1, Kind=testcrd \"my-cr-1\": updated validation is too restrictive: [].spec.scalar: Invalid value: 2: spec.scalar in body should be greater than or equal to 3")},
},
{
name: "unserved version",
Expand Down Expand Up @@ -1670,7 +1676,7 @@ func TestValidateV1Beta1CRDCompatibility(t *testing.T) {
},
oldCRD: unversionedCRDForV1beta1File("testdata/apiextensionsv1beta1/crd.no-versions-list.old.yaml"),
newCRD: unversionedCRDForV1beta1File("testdata/apiextensionsv1beta1/crd.no-versions-list.yaml"),
want: fmt.Errorf("error validating cluster.com/v1alpha1, Kind=testcrd \"my-cr-1\": updated validation is too restrictive: [].spec.scalar: Invalid value: 2: spec.scalar in body should be greater than or equal to 3"),
want: validationError{fmt.Errorf("error validating cluster.com/v1alpha1, Kind=testcrd \"my-cr-1\": updated validation is too restrictive: [].spec.scalar: Invalid value: 2: spec.scalar in body should be greater than or equal to 3")},
},
}
for _, tt := range tests {
Expand Down Expand Up @@ -1725,7 +1731,7 @@ func TestValidateV1CRDCompatibility(t *testing.T) {
},
oldCRD: unversionedCRDForV1File("testdata/apiextensionsv1/crontabs.crd.old.yaml"),
newCRD: unversionedCRDForV1File("testdata/apiextensionsv1/crontabs.crd.yaml"),
want: fmt.Errorf("error validating stable.example.com/v2, Kind=CronTab \"my-crontab\": updated validation is too restrictive: [].spec.replicas: Invalid value: 10: spec.replicas in body should be less than or equal to 9"),
want: validationError{fmt.Errorf("error validating stable.example.com/v2, Kind=CronTab \"my-crontab\": updated validation is too restrictive: [].spec.replicas: Invalid value: 10: spec.replicas in body should be less than or equal to 9")},
},
{
name: "cr not invalidated by unserved version",
Expand All @@ -1752,7 +1758,7 @@ func TestValidateV1CRDCompatibility(t *testing.T) {
},
oldCRD: unversionedCRDForV1File("testdata/apiextensionsv1/single-version-crd.old.yaml"),
newCRD: unversionedCRDForV1File("testdata/apiextensionsv1/single-version-crd.yaml"),
want: fmt.Errorf("error validating cluster.com/v1alpha1, Kind=testcrd \"my-cr-1\": updated validation is too restrictive: [].spec.scalar: Invalid value: 100: spec.scalar in body should be less than or equal to 50"),
want: validationError{fmt.Errorf("error validating cluster.com/v1alpha1, Kind=testcrd \"my-cr-1\": updated validation is too restrictive: [].spec.scalar: Invalid value: 100: spec.scalar in body should be less than or equal to 50")},
},
}
for _, tt := range tests {
Expand Down
31 changes: 26 additions & 5 deletions pkg/controller/operators/catalog/step.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@ import (
"github.com/operator-framework/operator-lifecycle-manager/pkg/controller/install"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
corev1 "k8s.io/api/core/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
apiextensionsv1beta1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta1"
apiextensionsv1client "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/typed/apiextensions/v1"
apiextensionsv1beta1client "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/typed/apiextensions/v1beta1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/dynamic"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/retry"

"github.com/operator-framework/api/pkg/operators/v1alpha1"
Expand Down Expand Up @@ -44,18 +46,20 @@ type builder struct {
dynamicClient dynamic.Interface
manifestResolver ManifestResolver
logger logrus.FieldLogger
eventRecorder record.EventRecorder

annotator alongside.Annotator
}

func newBuilder(plan *v1alpha1.InstallPlan, csvLister listersv1alpha1.ClusterServiceVersionLister, opclient operatorclient.ClientInterface, dynamicClient dynamic.Interface, manifestResolver ManifestResolver, logger logrus.FieldLogger) *builder {
func newBuilder(plan *v1alpha1.InstallPlan, csvLister listersv1alpha1.ClusterServiceVersionLister, opclient operatorclient.ClientInterface, dynamicClient dynamic.Interface, manifestResolver ManifestResolver, logger logrus.FieldLogger, er record.EventRecorder) *builder {
return &builder{
plan: plan,
csvLister: csvLister,
opclient: opclient,
dynamicClient: dynamicClient,
manifestResolver: manifestResolver,
logger: logger,
eventRecorder: er,
}
}

Expand Down Expand Up @@ -140,7 +144,26 @@ func (b *builder) NewCRDV1Step(client apiextensionsv1client.ApiextensionsV1Inter
currentCRD, _ := client.CustomResourceDefinitions().Get(context.TODO(), crd.GetName(), metav1.GetOptions{})
crd.SetResourceVersion(currentCRD.GetResourceVersion())
if err = validateV1CRDCompatibility(b.dynamicClient, currentCRD, crd); err != nil {
return fmt.Errorf("error validating existing CRs against new CRD's schema for %q: %w", step.Resource.Name, err)
vErr := &validationError{}
// if the conversion strategy in the new CRD is not "Webhook" OR the error is not a ValidationError
// return an error. This will catch and return any errors that occur unrelated to actual validation.
// For example, the API server returning an error when performing a list operation
if crd.Spec.Conversion == nil || crd.Spec.Conversion.Strategy != apiextensionsv1.WebhookConverter || !errors.As(err, vErr) {
return fmt.Errorf("error validating existing CRs against new CRD's schema for %q: %w", step.Resource.Name, err)
}
// If the conversion strategy in the new CRD is "Webhook" and the error that occurred
// is an error related to validation, warn that validation failed but that we are trusting
// that the conversion strategy specified by the author will successfully convert to a format
// that passes validation and allow the upgrade to continue
warnTempl := `Validation of existing CRs against the new CRD's schema failed, but a webhook conversion strategy was specified in the new CRD.
The new webhook will only start after the bundle is upgraded, so we must assume that it will successfully convert existing CRs to a format that would have passed validation.

CRD: %q
Validation Error: %s
`
warnString := fmt.Sprintf(warnTempl, step.Resource.Name, err.Error())
b.logger.Warn(warnString)
b.eventRecorder.Event(b.plan, corev1.EventTypeWarning, "CRDValidation", warnString)
}

// check to see if stored versions changed and whether the upgrade could cause potential data loss
Expand Down Expand Up @@ -267,9 +290,7 @@ func (b *builder) NewCRDV1Beta1Step(client apiextensionsv1beta1client.Apiextensi
}

func setInstalledAlongsideAnnotation(a alongside.Annotator, dst metav1.Object, namespace string, name string, lister listersv1alpha1.ClusterServiceVersionLister, srcs ...metav1.Object) {
var (
nns []alongside.NamespacedName
)
var nns []alongside.NamespacedName

// Only keep references to existing and non-copied CSVs to
// avoid unbounded growth.
Expand Down
Loading