Skip to content

[release-4.8] Bug 2074680: Emit CSV metric on startup #288

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions staging/operator-lifecycle-manager/cmd/olm/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,11 @@ func main() {
op.Run(ctx)
<-op.Ready()

// Emit CSV metric
if err = op.EnsureCSVMetric(); err != nil {
logger.WithError(err).Fatalf("error emitting metrics for existing CSV")
}

if *writeStatusName != "" {
reconciler, err := openshift.NewClusterOperatorReconciler(
openshift.WithClient(mgr.GetClient()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,23 @@ func (a *Operator) RegisterCSVWatchNotification(csvNotification csvutility.Watch
a.csvNotification = csvNotification
}

func (a *Operator) EnsureCSVMetric() error {
csvs, err := a.lister.OperatorsV1alpha1().ClusterServiceVersionLister().List(labels.Everything())
if err != nil {
return err
}
for _, csv := range csvs {
logger := a.logger.WithFields(logrus.Fields{
"name": csv.GetName(),
"namespace": csv.GetNamespace(),
"self": csv.GetSelfLink(),
})
logger.Debug("emitting metrics for existing CSV")
metrics.EmitCSVMetric(csv, csv)
}
return nil
}

func (a *Operator) syncGCObject(obj interface{}) (syncError error) {
metaObj, ok := obj.(metav1.Object)
if !ok {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ var _ = Describe("Install Plan", func() {

BeforeEach(func() {
counter = 0
for _, metric := range getMetricsFromPod(ctx.Ctx().KubeClient(), getPodWithLabel(ctx.Ctx().KubeClient(), "app=catalog-operator"), "8081") {
for _, metric := range getMetricsFromPod(ctx.Ctx().KubeClient(), getPodWithLabel(ctx.Ctx().KubeClient(), "app=catalog-operator")) {
if metric.Family == "installplan_warnings_total" {
counter = metric.Value
}
Expand Down Expand Up @@ -189,7 +189,7 @@ var _ = Describe("Install Plan", func() {

It("increments a metric counting the warning", func() {
Eventually(func() []Metric {
return getMetricsFromPod(ctx.Ctx().KubeClient(), getPodWithLabel(ctx.Ctx().KubeClient(), "app=catalog-operator"), "8081")
return getMetricsFromPod(ctx.Ctx().KubeClient(), getPodWithLabel(ctx.Ctx().KubeClient(), "app=catalog-operator"))
}).Should(ContainElement(LikeMetric(
WithFamily("installplan_warnings_total"),
WithValueGreaterThan(counter),
Expand Down
132 changes: 116 additions & 16 deletions staging/operator-lifecycle-manager/test/e2e/metrics_e2e_test.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//go:build !bare
// +build !bare

package e2e
Expand All @@ -7,6 +8,7 @@ import (
"context"
"fmt"
"regexp"
"strconv"
"strings"
"sync"

Expand All @@ -15,6 +17,7 @@ import (
. "github.com/onsi/gomega"
io_prometheus_client "github.com/prometheus/client_model/go"
"github.com/prometheus/common/expfmt"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apiextensions-apiserver/pkg/apis/apiextensions"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -77,7 +80,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() {

It("generates csv_abnormal metric for OLM pod", func() {

Expect(getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"), "8081")).To(And(
Expect(getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"))).To(And(
ContainElement(LikeMetric(
WithFamily("csv_abnormal"),
WithName(failingCSV.Name),
Expand Down Expand Up @@ -105,13 +108,55 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() {

It("deletes its associated CSV metrics", func() {
// Verify that when the csv has been deleted, it deletes the corresponding CSV metrics
Expect(getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"), "8081")).ToNot(And(
Expect(getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"))).ToNot(And(
ContainElement(LikeMetric(WithFamily("csv_abnormal"), WithName(failingCSV.Name))),
ContainElement(LikeMetric(WithFamily("csv_succeeded"), WithName(failingCSV.Name))),
))
})
})
})

When("a CSV is created", func() {
var (
cleanupCSV cleanupFunc
csv v1alpha1.ClusterServiceVersion
)
BeforeEach(func() {
packageName := genName("csv-test-")
packageStable := fmt.Sprintf("%s-stable", packageName)
csv = newCSV(packageStable, testNamespace, "", semver.MustParse("0.1.0"), nil, nil, nil)

var err error
_, err = createCSV(c, crc, csv, testNamespace, false, false)
Expect(err).ToNot(HaveOccurred())
_, err = fetchCSV(crc, csv.Name, testNamespace, csvSucceededChecker)
Expect(err).ToNot(HaveOccurred())
})
AfterEach(func() {
if cleanupCSV != nil {
cleanupCSV()
}
})
It("emits a CSV metrics", func() {
Expect(getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"))).To(
ContainElement(LikeMetric(WithFamily("csv_succeeded"), WithName(csv.Name), WithValue(1))),
)
})
When("the OLM pod restarts", func() {
BeforeEach(func() {
restartDeploymentWithLabel(c, "app=olm-operator")
})
It("CSV metric is preserved", func() {
Eventually(func() []Metric {
return getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"))
}).Should(ContainElement(LikeMetric(
WithFamily("csv_succeeded"),
WithName(csv.Name),
WithValue(1),
)))
})
})
})
})

Context("Metrics emitted by objects during operator installation", func() {
Expand All @@ -135,7 +180,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() {

// Verify metrics have been emitted for subscription
Eventually(func() []Metric {
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081")
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"))
}).Should(ContainElement(LikeMetric(
WithFamily("subscription_sync_total"),
WithName("metric-subscription-for-create"),
Expand All @@ -150,7 +195,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() {
// Verify metrics have been emitted for dependency resolution
Eventually(func() bool {
return Eventually(func() []Metric {
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081")
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"))
}).Should(ContainElement(LikeMetric(
WithFamily("olm_resolution_duration_seconds"),
WithLabel("outcome", "failed"),
Expand All @@ -165,7 +210,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() {
BeforeEach(func() {
subscriptionCleanup, subscription = createSubscription(GinkgoT(), crc, testNamespace, "metric-subscription-for-update", testPackageName, stableChannel, v1alpha1.ApprovalManual)
Eventually(func() []Metric {
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081")
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"))
}).Should(ContainElement(LikeMetric(WithFamily("subscription_sync_total"), WithLabel("name", "metric-subscription-for-update"))))
Eventually(func() error {
s, err := crc.OperatorsV1alpha1().Subscriptions(subscription.GetNamespace()).Get(context.TODO(), subscription.GetName(), metav1.GetOptions{})
Expand All @@ -186,7 +231,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() {

It("deletes the old Subscription metric and emits the new metric", func() {
Eventually(func() []Metric {
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081")
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"))
}).Should(And(
Not(ContainElement(LikeMetric(
WithFamily("subscription_sync_total"),
Expand Down Expand Up @@ -220,7 +265,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() {

It("deletes the old subscription metric and emits the new metric(there is only one metric for the subscription)", func() {
Eventually(func() []Metric {
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081")
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"))
}).Should(And(
Not(ContainElement(LikeMetric(
WithFamily("subscription_sync_total"),
Expand Down Expand Up @@ -250,7 +295,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() {
BeforeEach(func() {
subscriptionCleanup, subscription = createSubscription(GinkgoT(), crc, testNamespace, "metric-subscription-for-delete", testPackageName, stableChannel, v1alpha1.ApprovalManual)
Eventually(func() []Metric {
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081")
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"))
}).Should(ContainElement(LikeMetric(WithFamily("subscription_sync_total"), WithLabel("name", "metric-subscription-for-delete"))))
if subscriptionCleanup != nil {
subscriptionCleanup()
Expand All @@ -266,7 +311,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() {

It("deletes the Subscription metric", func() {
Eventually(func() []Metric {
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081")
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"))
}).ShouldNot(ContainElement(LikeMetric(WithFamily("subscription_sync_total"), WithName("metric-subscription-for-delete"))))
})
})
Expand Down Expand Up @@ -309,7 +354,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() {
})
It("emits metrics for the catalogSource", func() {
Eventually(func() []Metric {
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081")
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"))
}).Should(And(
ContainElement(LikeMetric(
WithFamily("catalog_source_count"),
Expand All @@ -329,7 +374,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() {
})
It("deletes the metrics for the CatalogSource", func() {
Eventually(func() []Metric {
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081")
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"))
}).Should(And(
Not(ContainElement(LikeMetric(
WithFamily("catalogsource_ready"),
Expand All @@ -353,7 +398,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() {
})
It("emits metrics for the CatlogSource with a Value greater than 0", func() {
Eventually(func() []Metric {
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081")
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"))
}).Should(And(
ContainElement(LikeMetric(
WithFamily("catalogsource_ready"),
Expand All @@ -363,7 +408,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() {
)),
))
Consistently(func() []Metric {
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081")
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"))
}, "3m").Should(And(
ContainElement(LikeMetric(
WithFamily("catalogsource_ready"),
Expand Down Expand Up @@ -392,7 +437,63 @@ func getPodWithLabel(client operatorclient.ClientInterface, label string) *corev
return &podList.Items[0]
}

func getMetricsFromPod(client operatorclient.ClientInterface, pod *corev1.Pod, port string) []Metric {
func getDeploymentWithLabel(client operatorclient.ClientInterface, label string) *appsv1.Deployment {
listOptions := metav1.ListOptions{LabelSelector: label}
var deploymentList *appsv1.DeploymentList
EventuallyWithOffset(1, func() (numDeps int, err error) {
deploymentList, err = client.KubernetesInterface().AppsV1().Deployments(operatorNamespace).List(context.TODO(), listOptions)
if deploymentList != nil {
numDeps = len(deploymentList.Items)
}

return
}).Should(Equal(1), "expected exactly one Deployment")

return &deploymentList.Items[0]
}

func restartDeploymentWithLabel(client operatorclient.ClientInterface, l string) {
d := getDeploymentWithLabel(client, l)
z := int32(0)
oldZ := *d.Spec.Replicas
d.Spec.Replicas = &z
_, err := client.KubernetesInterface().AppsV1().Deployments(operatorNamespace).Update(context.TODO(), d, metav1.UpdateOptions{})
Expect(err).ToNot(HaveOccurred())

EventuallyWithOffset(1, func() (replicas int32, err error) {
deployment, err := client.KubernetesInterface().AppsV1().Deployments(operatorNamespace).Get(context.TODO(), d.Name, metav1.GetOptions{})
if deployment != nil {
replicas = deployment.Status.Replicas
}
return
}).Should(Equal(int32(0)), "expected exactly 0 Deployments")

updated := getDeploymentWithLabel(client, l)
updated.Spec.Replicas = &oldZ
_, err = client.KubernetesInterface().AppsV1().Deployments(operatorNamespace).Update(context.TODO(), updated, metav1.UpdateOptions{})
Expect(err).ToNot(HaveOccurred())

EventuallyWithOffset(1, func() (replicas int32, err error) {
deployment, err := client.KubernetesInterface().AppsV1().Deployments(operatorNamespace).Get(context.TODO(), d.Name, metav1.GetOptions{})
if deployment != nil {
replicas = deployment.Status.Replicas
}
return
}).Should(Equal(oldZ), "expected exactly 1 Deployment")
}

func extractMetricPortFromPod(pod *corev1.Pod) string {
for _, container := range pod.Spec.Containers {
for _, port := range container.Ports {
if port.Name == "metrics" {
return strconv.Itoa(int(port.ContainerPort))
}
}
}
return "-1"
}

func getMetricsFromPod(client operatorclient.ClientInterface, pod *corev1.Pod) []Metric {
ctx.Ctx().Logf("querying pod %s/%s\n", pod.GetNamespace(), pod.GetName())

// assuming -tls-cert and -tls-key aren't used anywhere else as a parameter value
Expand All @@ -414,14 +515,13 @@ func getMetricsFromPod(client operatorclient.ClientInterface, pod *corev1.Pod, p
scheme = "http"
}
ctx.Ctx().Logf("Retrieving metrics using scheme %v\n", scheme)

mfs := make(map[string]*io_prometheus_client.MetricFamily)
EventuallyWithOffset(1, func() error {
raw, err := client.KubernetesInterface().CoreV1().RESTClient().Get().
Namespace(pod.GetNamespace()).
Resource("pods").
SubResource("proxy").
Name(net.JoinSchemeNamePort(scheme, pod.GetName(), port)).
Name(net.JoinSchemeNamePort(scheme, pod.GetName(), extractMetricPortFromPod(pod))).
Suffix("metrics").
Do(context.Background()).Raw()
if err != nil {
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.