diff --git a/staging/operator-lifecycle-manager/cmd/olm/main.go b/staging/operator-lifecycle-manager/cmd/olm/main.go index 087ebb74a8..f1606c6397 100644 --- a/staging/operator-lifecycle-manager/cmd/olm/main.go +++ b/staging/operator-lifecycle-manager/cmd/olm/main.go @@ -221,6 +221,11 @@ func main() { op.Run(ctx) <-op.Ready() + // Emit CSV metric + if err = op.EnsureCSVMetric(); err != nil { + logger.WithError(err).Fatalf("error emitting metrics for existing CSV") + } + if *writeStatusName != "" { reconciler, err := openshift.NewClusterOperatorReconciler( openshift.WithClient(mgr.GetClient()), diff --git a/staging/operator-lifecycle-manager/pkg/controller/operators/olm/operator.go b/staging/operator-lifecycle-manager/pkg/controller/operators/olm/operator.go index 9a327af27d..259f3661e5 100644 --- a/staging/operator-lifecycle-manager/pkg/controller/operators/olm/operator.go +++ b/staging/operator-lifecycle-manager/pkg/controller/operators/olm/operator.go @@ -603,6 +603,23 @@ func (a *Operator) RegisterCSVWatchNotification(csvNotification csvutility.Watch a.csvNotification = csvNotification } +func (a *Operator) EnsureCSVMetric() error { + csvs, err := a.lister.OperatorsV1alpha1().ClusterServiceVersionLister().List(labels.Everything()) + if err != nil { + return err + } + for _, csv := range csvs { + logger := a.logger.WithFields(logrus.Fields{ + "name": csv.GetName(), + "namespace": csv.GetNamespace(), + "self": csv.GetSelfLink(), + }) + logger.Debug("emitting metrics for existing CSV") + metrics.EmitCSVMetric(csv, csv) + } + return nil +} + func (a *Operator) syncGCObject(obj interface{}) (syncError error) { metaObj, ok := obj.(metav1.Object) if !ok { diff --git a/staging/operator-lifecycle-manager/test/e2e/installplan_e2e_test.go b/staging/operator-lifecycle-manager/test/e2e/installplan_e2e_test.go index d52495678d..bb2d742684 100644 --- a/staging/operator-lifecycle-manager/test/e2e/installplan_e2e_test.go +++ b/staging/operator-lifecycle-manager/test/e2e/installplan_e2e_test.go @@ -89,7 +89,7 @@ var _ = Describe("Install Plan", func() { BeforeEach(func() { counter = 0 - for _, metric := range getMetricsFromPod(ctx.Ctx().KubeClient(), getPodWithLabel(ctx.Ctx().KubeClient(), "app=catalog-operator"), "8081") { + for _, metric := range getMetricsFromPod(ctx.Ctx().KubeClient(), getPodWithLabel(ctx.Ctx().KubeClient(), "app=catalog-operator")) { if metric.Family == "installplan_warnings_total" { counter = metric.Value } @@ -189,7 +189,7 @@ var _ = Describe("Install Plan", func() { It("increments a metric counting the warning", func() { Eventually(func() []Metric { - return getMetricsFromPod(ctx.Ctx().KubeClient(), getPodWithLabel(ctx.Ctx().KubeClient(), "app=catalog-operator"), "8081") + return getMetricsFromPod(ctx.Ctx().KubeClient(), getPodWithLabel(ctx.Ctx().KubeClient(), "app=catalog-operator")) }).Should(ContainElement(LikeMetric( WithFamily("installplan_warnings_total"), WithValueGreaterThan(counter), diff --git a/staging/operator-lifecycle-manager/test/e2e/metrics_e2e_test.go b/staging/operator-lifecycle-manager/test/e2e/metrics_e2e_test.go index cea7135f77..9296d4c11e 100644 --- a/staging/operator-lifecycle-manager/test/e2e/metrics_e2e_test.go +++ b/staging/operator-lifecycle-manager/test/e2e/metrics_e2e_test.go @@ -1,3 +1,4 @@ +//go:build !bare // +build !bare package e2e @@ -7,6 +8,7 @@ import ( "context" "fmt" "regexp" + "strconv" "strings" "sync" @@ -15,6 +17,7 @@ import ( . "github.com/onsi/gomega" io_prometheus_client "github.com/prometheus/client_model/go" "github.com/prometheus/common/expfmt" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -77,7 +80,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() { It("generates csv_abnormal metric for OLM pod", func() { - Expect(getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"), "8081")).To(And( + Expect(getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"))).To(And( ContainElement(LikeMetric( WithFamily("csv_abnormal"), WithName(failingCSV.Name), @@ -105,13 +108,55 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() { It("deletes its associated CSV metrics", func() { // Verify that when the csv has been deleted, it deletes the corresponding CSV metrics - Expect(getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"), "8081")).ToNot(And( + Expect(getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"))).ToNot(And( ContainElement(LikeMetric(WithFamily("csv_abnormal"), WithName(failingCSV.Name))), ContainElement(LikeMetric(WithFamily("csv_succeeded"), WithName(failingCSV.Name))), )) }) }) }) + + When("a CSV is created", func() { + var ( + cleanupCSV cleanupFunc + csv v1alpha1.ClusterServiceVersion + ) + BeforeEach(func() { + packageName := genName("csv-test-") + packageStable := fmt.Sprintf("%s-stable", packageName) + csv = newCSV(packageStable, testNamespace, "", semver.MustParse("0.1.0"), nil, nil, nil) + + var err error + _, err = createCSV(c, crc, csv, testNamespace, false, false) + Expect(err).ToNot(HaveOccurred()) + _, err = fetchCSV(crc, csv.Name, testNamespace, csvSucceededChecker) + Expect(err).ToNot(HaveOccurred()) + }) + AfterEach(func() { + if cleanupCSV != nil { + cleanupCSV() + } + }) + It("emits a CSV metrics", func() { + Expect(getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"))).To( + ContainElement(LikeMetric(WithFamily("csv_succeeded"), WithName(csv.Name), WithValue(1))), + ) + }) + When("the OLM pod restarts", func() { + BeforeEach(func() { + restartDeploymentWithLabel(c, "app=olm-operator") + }) + It("CSV metric is preserved", func() { + Eventually(func() []Metric { + return getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator")) + }).Should(ContainElement(LikeMetric( + WithFamily("csv_succeeded"), + WithName(csv.Name), + WithValue(1), + ))) + }) + }) + }) }) Context("Metrics emitted by objects during operator installation", func() { @@ -135,7 +180,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() { // Verify metrics have been emitted for subscription Eventually(func() []Metric { - return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081") + return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) }).Should(ContainElement(LikeMetric( WithFamily("subscription_sync_total"), WithName("metric-subscription-for-create"), @@ -150,7 +195,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() { // Verify metrics have been emitted for dependency resolution Eventually(func() bool { return Eventually(func() []Metric { - return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081") + return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) }).Should(ContainElement(LikeMetric( WithFamily("olm_resolution_duration_seconds"), WithLabel("outcome", "failed"), @@ -165,7 +210,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() { BeforeEach(func() { subscriptionCleanup, subscription = createSubscription(GinkgoT(), crc, testNamespace, "metric-subscription-for-update", testPackageName, stableChannel, v1alpha1.ApprovalManual) Eventually(func() []Metric { - return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081") + return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) }).Should(ContainElement(LikeMetric(WithFamily("subscription_sync_total"), WithLabel("name", "metric-subscription-for-update")))) Eventually(func() error { s, err := crc.OperatorsV1alpha1().Subscriptions(subscription.GetNamespace()).Get(context.TODO(), subscription.GetName(), metav1.GetOptions{}) @@ -186,7 +231,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() { It("deletes the old Subscription metric and emits the new metric", func() { Eventually(func() []Metric { - return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081") + return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) }).Should(And( Not(ContainElement(LikeMetric( WithFamily("subscription_sync_total"), @@ -220,7 +265,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() { It("deletes the old subscription metric and emits the new metric(there is only one metric for the subscription)", func() { Eventually(func() []Metric { - return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081") + return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) }).Should(And( Not(ContainElement(LikeMetric( WithFamily("subscription_sync_total"), @@ -250,7 +295,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() { BeforeEach(func() { subscriptionCleanup, subscription = createSubscription(GinkgoT(), crc, testNamespace, "metric-subscription-for-delete", testPackageName, stableChannel, v1alpha1.ApprovalManual) Eventually(func() []Metric { - return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081") + return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) }).Should(ContainElement(LikeMetric(WithFamily("subscription_sync_total"), WithLabel("name", "metric-subscription-for-delete")))) if subscriptionCleanup != nil { subscriptionCleanup() @@ -266,7 +311,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() { It("deletes the Subscription metric", func() { Eventually(func() []Metric { - return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081") + return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) }).ShouldNot(ContainElement(LikeMetric(WithFamily("subscription_sync_total"), WithName("metric-subscription-for-delete")))) }) }) @@ -309,7 +354,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() { }) It("emits metrics for the catalogSource", func() { Eventually(func() []Metric { - return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081") + return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) }).Should(And( ContainElement(LikeMetric( WithFamily("catalog_source_count"), @@ -329,7 +374,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() { }) It("deletes the metrics for the CatalogSource", func() { Eventually(func() []Metric { - return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081") + return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) }).Should(And( Not(ContainElement(LikeMetric( WithFamily("catalogsource_ready"), @@ -353,7 +398,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() { }) It("emits metrics for the CatlogSource with a Value greater than 0", func() { Eventually(func() []Metric { - return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081") + return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) }).Should(And( ContainElement(LikeMetric( WithFamily("catalogsource_ready"), @@ -363,7 +408,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() { )), )) Consistently(func() []Metric { - return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081") + return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator")) }, "3m").Should(And( ContainElement(LikeMetric( WithFamily("catalogsource_ready"), @@ -392,7 +437,63 @@ func getPodWithLabel(client operatorclient.ClientInterface, label string) *corev return &podList.Items[0] } -func getMetricsFromPod(client operatorclient.ClientInterface, pod *corev1.Pod, port string) []Metric { +func getDeploymentWithLabel(client operatorclient.ClientInterface, label string) *appsv1.Deployment { + listOptions := metav1.ListOptions{LabelSelector: label} + var deploymentList *appsv1.DeploymentList + EventuallyWithOffset(1, func() (numDeps int, err error) { + deploymentList, err = client.KubernetesInterface().AppsV1().Deployments(operatorNamespace).List(context.TODO(), listOptions) + if deploymentList != nil { + numDeps = len(deploymentList.Items) + } + + return + }).Should(Equal(1), "expected exactly one Deployment") + + return &deploymentList.Items[0] +} + +func restartDeploymentWithLabel(client operatorclient.ClientInterface, l string) { + d := getDeploymentWithLabel(client, l) + z := int32(0) + oldZ := *d.Spec.Replicas + d.Spec.Replicas = &z + _, err := client.KubernetesInterface().AppsV1().Deployments(operatorNamespace).Update(context.TODO(), d, metav1.UpdateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + EventuallyWithOffset(1, func() (replicas int32, err error) { + deployment, err := client.KubernetesInterface().AppsV1().Deployments(operatorNamespace).Get(context.TODO(), d.Name, metav1.GetOptions{}) + if deployment != nil { + replicas = deployment.Status.Replicas + } + return + }).Should(Equal(int32(0)), "expected exactly 0 Deployments") + + updated := getDeploymentWithLabel(client, l) + updated.Spec.Replicas = &oldZ + _, err = client.KubernetesInterface().AppsV1().Deployments(operatorNamespace).Update(context.TODO(), updated, metav1.UpdateOptions{}) + Expect(err).ToNot(HaveOccurred()) + + EventuallyWithOffset(1, func() (replicas int32, err error) { + deployment, err := client.KubernetesInterface().AppsV1().Deployments(operatorNamespace).Get(context.TODO(), d.Name, metav1.GetOptions{}) + if deployment != nil { + replicas = deployment.Status.Replicas + } + return + }).Should(Equal(oldZ), "expected exactly 1 Deployment") +} + +func extractMetricPortFromPod(pod *corev1.Pod) string { + for _, container := range pod.Spec.Containers { + for _, port := range container.Ports { + if port.Name == "metrics" { + return strconv.Itoa(int(port.ContainerPort)) + } + } + } + return "-1" +} + +func getMetricsFromPod(client operatorclient.ClientInterface, pod *corev1.Pod) []Metric { ctx.Ctx().Logf("querying pod %s/%s\n", pod.GetNamespace(), pod.GetName()) // assuming -tls-cert and -tls-key aren't used anywhere else as a parameter value @@ -414,14 +515,13 @@ func getMetricsFromPod(client operatorclient.ClientInterface, pod *corev1.Pod, p scheme = "http" } ctx.Ctx().Logf("Retrieving metrics using scheme %v\n", scheme) - mfs := make(map[string]*io_prometheus_client.MetricFamily) EventuallyWithOffset(1, func() error { raw, err := client.KubernetesInterface().CoreV1().RESTClient().Get(). Namespace(pod.GetNamespace()). Resource("pods"). SubResource("proxy"). - Name(net.JoinSchemeNamePort(scheme, pod.GetName(), port)). + Name(net.JoinSchemeNamePort(scheme, pod.GetName(), extractMetricPortFromPod(pod))). Suffix("metrics"). Do(context.Background()).Raw() if err != nil { diff --git a/vendor/github.com/operator-framework/operator-lifecycle-manager/cmd/olm/main.go b/vendor/github.com/operator-framework/operator-lifecycle-manager/cmd/olm/main.go index 087ebb74a8..f1606c6397 100644 --- a/vendor/github.com/operator-framework/operator-lifecycle-manager/cmd/olm/main.go +++ b/vendor/github.com/operator-framework/operator-lifecycle-manager/cmd/olm/main.go @@ -221,6 +221,11 @@ func main() { op.Run(ctx) <-op.Ready() + // Emit CSV metric + if err = op.EnsureCSVMetric(); err != nil { + logger.WithError(err).Fatalf("error emitting metrics for existing CSV") + } + if *writeStatusName != "" { reconciler, err := openshift.NewClusterOperatorReconciler( openshift.WithClient(mgr.GetClient()), diff --git a/vendor/github.com/operator-framework/operator-lifecycle-manager/pkg/controller/operators/olm/operator.go b/vendor/github.com/operator-framework/operator-lifecycle-manager/pkg/controller/operators/olm/operator.go index 9a327af27d..259f3661e5 100644 --- a/vendor/github.com/operator-framework/operator-lifecycle-manager/pkg/controller/operators/olm/operator.go +++ b/vendor/github.com/operator-framework/operator-lifecycle-manager/pkg/controller/operators/olm/operator.go @@ -603,6 +603,23 @@ func (a *Operator) RegisterCSVWatchNotification(csvNotification csvutility.Watch a.csvNotification = csvNotification } +func (a *Operator) EnsureCSVMetric() error { + csvs, err := a.lister.OperatorsV1alpha1().ClusterServiceVersionLister().List(labels.Everything()) + if err != nil { + return err + } + for _, csv := range csvs { + logger := a.logger.WithFields(logrus.Fields{ + "name": csv.GetName(), + "namespace": csv.GetNamespace(), + "self": csv.GetSelfLink(), + }) + logger.Debug("emitting metrics for existing CSV") + metrics.EmitCSVMetric(csv, csv) + } + return nil +} + func (a *Operator) syncGCObject(obj interface{}) (syncError error) { metaObj, ok := obj.(metav1.Object) if !ok {