Skip to content

[OCPBUGS-25341]: perform operator apiService certificate validity checks directly #3217

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 73 additions & 8 deletions pkg/controller/install/certresources.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/sets"
corev1ac "k8s.io/client-go/applyconfigurations/core/v1"
rbacv1ac "k8s.io/client-go/applyconfigurations/rbac/v1"

Expand Down Expand Up @@ -157,6 +158,14 @@ func ServiceName(deploymentName string) string {
return deploymentName + "-service"
}

func HostnamesForService(serviceName, namespace string) []string {
return []string{
fmt.Sprintf("%s.%s", serviceName, namespace),
fmt.Sprintf("%s.%s.svc", serviceName, namespace),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not adding .cluster nor .cluster.local?

Copy link
Contributor Author

@ankitathomas ankitathomas Jun 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These should resolve even without the cluster domain specified, though there shouldn't be any harm in adding the fqdns for the default cluster domains. We could possibly also lookup the actual domain similar to pkg/package-server/client/util.go, though that may not be necessary

fmt.Sprintf("%s.%s.svc.cluster.local", serviceName, namespace),
}
}

func (i *StrategyDeploymentInstaller) getCertResources() []certResource {
return append(i.apiServiceDescriptions, i.webhookDescriptions...)
}
Expand Down Expand Up @@ -223,15 +232,74 @@ func (i *StrategyDeploymentInstaller) CertsRotated() bool {
return i.certificatesRotated
}

func ShouldRotateCerts(csv *v1alpha1.ClusterServiceVersion) bool {
// shouldRotateCerts indicates whether an apiService cert should be rotated due to being
// malformed, invalid, expired, inactive or within a specific freshness interval (DefaultCertMinFresh) before expiry.
func shouldRotateCerts(certSecret *corev1.Secret, hosts []string) bool {
now := metav1.Now()
if !csv.Status.CertsRotateAt.IsZero() && csv.Status.CertsRotateAt.Before(&now) {
caPEM, ok := certSecret.Data[OLMCAPEMKey]
if !ok {
// missing CA cert in secret
return true
}
certPEM, ok := certSecret.Data["tls.crt"]
if !ok {
// missing cert in secret
return true
}

ca, err := certs.PEMToCert(caPEM)
if err != nil {
// malformed CA cert
return true
}
cert, err := certs.PEMToCert(certPEM)
if err != nil {
// malformed cert
return true
}

// check for cert freshness
if !certs.Active(ca) || now.Time.After(CalculateCertRotatesAt(ca.NotAfter)) ||
!certs.Active(cert) || now.Time.After(CalculateCertRotatesAt(cert.NotAfter)) {
return true
}

// Check validity of serving cert and if serving cert is trusted by the CA
for _, host := range hosts {
if err := certs.VerifyCert(ca, cert, host); err != nil {
return true
}
}
return false
}

func (i *StrategyDeploymentInstaller) ShouldRotateCerts(s Strategy) (bool, error) {
strategy, ok := s.(*v1alpha1.StrategyDetailsDeployment)
if !ok {
return false, fmt.Errorf("failed to install %s strategy with deployment installer: unsupported deployment install strategy", strategy.GetStrategyName())
}

hasCerts := sets.New[string]()
for _, c := range i.getCertResources() {
hasCerts.Insert(c.getDeploymentName())
}
for _, sddSpec := range strategy.DeploymentSpecs {
if hasCerts.Has(sddSpec.Name) {
certSecret, err := i.strategyClient.GetOpLister().CoreV1().SecretLister().Secrets(i.owner.GetNamespace()).Get(SecretName(ServiceName(sddSpec.Name)))
if err == nil {
if shouldRotateCerts(certSecret, HostnamesForService(ServiceName(sddSpec.Name), i.owner.GetNamespace())) {
return true, nil
}
} else if apierrors.IsNotFound(err) {
return true, nil
} else {
return false, err
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there's some other error, what happens? This does not rotate the certificate.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume it means we enter exponential backoff (at least until we hit the 5m install plan timeout)?

Any other error is likely to be either temporary (in which case it's hopefully resolved next time around), or not (in which case we would likely fail in a similar way trying to rotate the cert).

The only scenario that is coming to mind where Get secret fails, but a subsequent Create or Update would succeed would be if we lack read permission but have write permission.

}
}
}
return false, nil
}

func CalculateCertExpiration(startingFrom time.Time) time.Time {
return startingFrom.Add(DefaultCertValidFor)
}
Expand Down Expand Up @@ -267,10 +335,7 @@ func (i *StrategyDeploymentInstaller) installCertRequirementsForDeployment(deplo
}

// Create signed serving cert
hosts := []string{
fmt.Sprintf("%s.%s", serviceName, i.owner.GetNamespace()),
fmt.Sprintf("%s.%s.svc", serviceName, i.owner.GetNamespace()),
}
hosts := HostnamesForService(serviceName, i.owner.GetNamespace())
servingPair, err := certGenerator.Generate(expiration, Organization, ca, hosts)
if err != nil {
logger.Warnf("could not generate signed certs for hosts %v", hosts)
Expand Down Expand Up @@ -314,10 +379,10 @@ func (i *StrategyDeploymentInstaller) installCertRequirementsForDeployment(deplo

// Attempt an update
// TODO: Check that the secret was not modified
if existingCAPEM, ok := existingSecret.Data[OLMCAPEMKey]; ok && !ShouldRotateCerts(i.owner.(*v1alpha1.ClusterServiceVersion)) {
if !shouldRotateCerts(existingSecret, HostnamesForService(serviceName, i.owner.GetNamespace())) {
logger.Warnf("reusing existing cert %s", secret.GetName())
secret = existingSecret
caPEM = existingCAPEM
caPEM = existingSecret.Data[OLMCAPEMKey]
caHash = certs.PEMSHA256(caPEM)
} else {
if _, err := i.strategyClient.GetOpClient().UpdateSecret(secret); err != nil {
Expand Down
5 changes: 5 additions & 0 deletions pkg/controller/install/resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ type Strategy interface {
type StrategyInstaller interface {
Install(strategy Strategy) error
CheckInstalled(strategy Strategy) (bool, error)
ShouldRotateCerts(strategy Strategy) (bool, error)
CertsRotateAt() time.Time
CertsRotated() bool
}
Expand Down Expand Up @@ -79,3 +80,7 @@ func (i *NullStrategyInstaller) CertsRotateAt() time.Time {
func (i *NullStrategyInstaller) CertsRotated() bool {
return false
}

func (i *NullStrategyInstaller) ShouldRotateCerts(s Strategy) (bool, error) {
return false, nil
}
26 changes: 2 additions & 24 deletions pkg/controller/operators/olm/apiservices.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,17 +93,12 @@ func (a *Operator) checkAPIServiceResources(csv *v1alpha1.ClusterServiceVersion,

// Check if CA is Active
caBundle := apiService.Spec.CABundle
ca, err := certs.PEMToCert(caBundle)
_, err = certs.PEMToCert(caBundle)
if err != nil {
logger.Warnf("could not convert APIService CA bundle to x509 cert")
errs = append(errs, err)
continue
}
if !certs.Active(ca) {
logger.Warnf("CA cert not active")
errs = append(errs, fmt.Errorf("found the CA cert is not active"))
continue
}
Comment on lines -102 to -106
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're no longer checking the certs (here and below)?

Copy link
Contributor Author

@ankitathomas ankitathomas May 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've combined the cert checks with freshness immediately after the APIService checks for successful/failed CSVs. We don't necessarily need to repeat the checks here again.


// Check if serving cert is active
secretName := install.SecretName(serviceName)
Expand All @@ -113,17 +108,12 @@ func (a *Operator) checkAPIServiceResources(csv *v1alpha1.ClusterServiceVersion,
errs = append(errs, err)
continue
}
cert, err := certs.PEMToCert(secret.Data["tls.crt"])
_, err = certs.PEMToCert(secret.Data["tls.crt"])
if err != nil {
logger.Warnf("could not convert serving cert to x509 cert")
errs = append(errs, err)
continue
}
if !certs.Active(cert) {
logger.Warnf("serving cert not active")
errs = append(errs, fmt.Errorf("found the serving cert not active"))
continue
}

// Check if CA hash matches expected
caHash := hashFunc(caBundle)
Expand All @@ -133,18 +123,6 @@ func (a *Operator) checkAPIServiceResources(csv *v1alpha1.ClusterServiceVersion,
continue
}

// Check if serving cert is trusted by the CA
hosts := []string{
fmt.Sprintf("%s.%s", service.GetName(), csv.GetNamespace()),
fmt.Sprintf("%s.%s.svc", service.GetName(), csv.GetNamespace()),
}
for _, host := range hosts {
if err := certs.VerifyCert(ca, cert, host); err != nil {
errs = append(errs, fmt.Errorf("could not verify cert: %s", err.Error()))
continue
}
}

// Ensure the existing Deployment has a matching CA hash annotation
deployment, err := a.lister.AppsV1().DeploymentLister().Deployments(csv.GetNamespace()).Get(desc.DeploymentName)
if apierrors.IsNotFound(err) || err != nil {
Expand Down
10 changes: 8 additions & 2 deletions pkg/controller/operators/olm/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -2242,7 +2242,10 @@ func (a *Operator) transitionCSVState(in v1alpha1.ClusterServiceVersion) (out *v
}

// Check if it's time to refresh owned APIService certs
if install.ShouldRotateCerts(out) {
if shouldRotate, err := installer.ShouldRotateCerts(strategy); err != nil {
logger.WithError(err).Info("cert validity check")
return
} else if shouldRotate {
logger.Debug("CSV owns resources that require a cert refresh")
out.SetPhaseWithEvent(v1alpha1.CSVPhasePending, v1alpha1.CSVReasonNeedsCertRotation, "CSV owns resources that require a cert refresh", now, a.recorder)
return
Expand Down Expand Up @@ -2352,7 +2355,10 @@ func (a *Operator) transitionCSVState(in v1alpha1.ClusterServiceVersion) (out *v
}

// Check if it's time to refresh owned APIService certs
if install.ShouldRotateCerts(out) {
if shouldRotate, err := installer.ShouldRotateCerts(strategy); err != nil {
logger.WithError(err).Info("cert validity check")
return
} else if shouldRotate {
logger.Debug("CSV owns resources that require a cert refresh")
out.SetPhaseWithEvent(v1alpha1.CSVPhasePending, v1alpha1.CSVReasonNeedsCertRotation, "owned APIServices need cert refresh", now, a.recorder)
return
Expand Down
40 changes: 22 additions & 18 deletions pkg/controller/operators/olm/operator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ func (i *TestInstaller) CheckInstalled(s install.Strategy) (bool, error) {
return true, nil
}

func (i *TestInstaller) ShouldRotateCerts(s install.Strategy) (bool, error) {
return false, nil
}

func (i *TestInstaller) CertsRotateAt() time.Time {
return time.Time{}
}
Expand Down Expand Up @@ -1956,26 +1960,26 @@ func TestTransitionCSV(t *testing.T) {
},
clientObjs: []runtime.Object{defaultOperatorGroup},
apis: []runtime.Object{
apiService("a1", "v1", "v1-a1", namespace, "a1", expiredCAPEM, apiregistrationv1.ConditionTrue, ownerLabelFromCSV("csv1", namespace)),
apiService("a1", "v1", install.ServiceName("a1"), namespace, "a1", expiredCAPEM, apiregistrationv1.ConditionTrue, ownerLabelFromCSV("csv1", namespace)),
},
objs: []runtime.Object{
deployment("a1", namespace, "sa", addAnnotations(defaultTemplateAnnotations, map[string]string{
install.OLMCAHashAnnotationKey: expiredCAHash,
})),
withAnnotations(keyPairToTLSSecret("v1.a1-cert", namespace, signedServingPair(time.Now().Add(24*time.Hour), expiredCA, []string{"v1-a1.ns", "v1-a1.ns.svc"})), map[string]string{
withAnnotations(keyPairToTLSSecret(install.SecretName(install.ServiceName("a1")), namespace, signedServingPair(time.Now().Add(24*time.Hour), expiredCA, install.HostnamesForService(install.ServiceName("a1"), "ns"))), map[string]string{
install.OLMCAHashAnnotationKey: expiredCAHash,
}),
service("v1-a1", namespace, "a1", 80),
service(install.ServiceName("a1"), namespace, "a1", 80),
serviceAccount("sa", namespace),
role("v1.a1-cert", namespace, []rbacv1.PolicyRule{
role(install.SecretName(install.ServiceName("a1")), namespace, []rbacv1.PolicyRule{
{
Verbs: []string{"get"},
APIGroups: []string{""},
Resources: []string{"secrets"},
ResourceNames: []string{"v1.a1-cert"},
ResourceNames: []string{install.SecretName(install.ServiceName("a1"))},
},
}),
roleBinding("v1.a1-cert", namespace, "v1.a1-cert", "sa", namespace),
roleBinding(install.SecretName(install.ServiceName("a1")), namespace, install.SecretName(install.ServiceName("a1")), "sa", namespace),
role("extension-apiserver-authentication-reader", "kube-system", []rbacv1.PolicyRule{
{
Verbs: []string{"get"},
Expand All @@ -1984,7 +1988,7 @@ func TestTransitionCSV(t *testing.T) {
ResourceNames: []string{"extension-apiserver-authentication"},
},
}),
roleBinding("v1.a1-auth-reader", "kube-system", "extension-apiserver-authentication-reader", "sa", namespace),
roleBinding(install.AuthReaderRoleBindingName(install.ServiceName("a1")), "kube-system", "extension-apiserver-authentication-reader", "sa", namespace),
clusterRole("system:auth-delegator", []rbacv1.PolicyRule{
{
Verbs: []string{"create"},
Expand All @@ -1997,15 +2001,15 @@ func TestTransitionCSV(t *testing.T) {
Resources: []string{"subjectaccessreviews"},
},
}),
clusterRoleBinding("v1.a1-system:auth-delegator", "system:auth-delegator", "sa", namespace),
clusterRoleBinding(install.AuthDelegatorClusterRoleBindingName(install.ServiceName("a1")), "system:auth-delegator", "sa", namespace),
},
crds: []runtime.Object{
crd("c1", "v1", "g1"),
},
},
expected: expected{
csvStates: map[string]csvState{
"csv1": {exists: true, phase: v1alpha1.CSVPhaseFailed, reason: v1alpha1.CSVReasonAPIServiceResourceIssue},
"csv1": {exists: true, phase: v1alpha1.CSVPhaseFailed, reason: v1alpha1.CSVReasonNeedsCertRotation},
},
},
},
Expand All @@ -2025,26 +2029,26 @@ func TestTransitionCSV(t *testing.T) {
},
clientObjs: []runtime.Object{defaultOperatorGroup},
apis: []runtime.Object{
apiService("a1", "v1", "v1-a1", namespace, "a1", expiredCAPEM, apiregistrationv1.ConditionTrue, ownerLabelFromCSV("csv1", namespace)),
apiService("a1", "v1", install.ServiceName("a1"), namespace, "a1", expiredCAPEM, apiregistrationv1.ConditionTrue, ownerLabelFromCSV("csv1", namespace)),
},
objs: []runtime.Object{
deployment("a1", namespace, "sa", addAnnotations(defaultTemplateAnnotations, map[string]string{
install.OLMCAHashAnnotationKey: expiredCAHash,
})),
withAnnotations(keyPairToTLSSecret("v1.a1-cert", namespace, signedServingPair(time.Now().Add(24*time.Hour), expiredCA, []string{"v1-a1.ns", "v1-a1.ns.svc"})), map[string]string{
withAnnotations(keyPairToTLSSecret(install.SecretName(install.ServiceName("a1")), namespace, signedServingPair(time.Now().Add(24*time.Hour), expiredCA, install.HostnamesForService(install.ServiceName("a1"), "ns"))), map[string]string{
install.OLMCAHashAnnotationKey: expiredCAHash,
}),
service("v1-a1", namespace, "a1", 80),
service(install.ServiceName("a1"), namespace, "a1", 80),
serviceAccount("sa", namespace),
role("v1.a1-cert", namespace, []rbacv1.PolicyRule{
role(install.SecretName(install.ServiceName("a1")), namespace, []rbacv1.PolicyRule{
{
Verbs: []string{"get"},
APIGroups: []string{""},
Resources: []string{"secrets"},
ResourceNames: []string{"v1.a1-cert"},
ResourceNames: []string{install.SecretName(install.ServiceName("a1"))},
},
}),
roleBinding("v1.a1-cert", namespace, "v1.a1-cert", "sa", namespace),
roleBinding(install.SecretName(install.ServiceName("a1")), namespace, install.SecretName(install.ServiceName("a1")), "sa", namespace),
role("extension-apiserver-authentication-reader", "kube-system", []rbacv1.PolicyRule{
{
Verbs: []string{"get"},
Expand All @@ -2053,7 +2057,7 @@ func TestTransitionCSV(t *testing.T) {
ResourceNames: []string{"extension-apiserver-authentication"},
},
}),
roleBinding("v1.a1-auth-reader", "kube-system", "extension-apiserver-authentication-reader", "sa", namespace),
roleBinding(install.AuthReaderRoleBindingName(install.ServiceName("a1")), "kube-system", "extension-apiserver-authentication-reader", "sa", namespace),
clusterRole("system:auth-delegator", []rbacv1.PolicyRule{
{
Verbs: []string{"create"},
Expand All @@ -2066,15 +2070,15 @@ func TestTransitionCSV(t *testing.T) {
Resources: []string{"subjectaccessreviews"},
},
}),
clusterRoleBinding("v1.a1-system:auth-delegator", "system:auth-delegator", "sa", namespace),
clusterRoleBinding(install.AuthDelegatorClusterRoleBindingName(install.ServiceName("a1")), "system:auth-delegator", "sa", namespace),
},
crds: []runtime.Object{
crd("c1", "v1", "g1"),
},
},
expected: expected{
csvStates: map[string]csvState{
"csv1": {exists: true, phase: v1alpha1.CSVPhasePending, reason: v1alpha1.CSVReasonAPIServiceResourcesNeedReinstall},
"csv1": {exists: true, phase: v1alpha1.CSVPhasePending, reason: v1alpha1.CSVReasonNeedsCertRotation},
},
},
},
Expand Down
Loading
Loading