Skip to content

Commit 64cff34

Browse files
anik120awgreeneJosef Karasekakihikokuroda
authored
Bug 2081483: Emit csv_succeeded metrics on startup (#2768)
* Update getMetricsFromPort to infer port number Problem: The getMetricsFromPod function assumes that metrics are exposed on port 8080. This function fails to retrieve metrics from the olm or catalog operator when the port is changed. Solution: Name the port in each of the deployments and update the getMetricsFromPod function to infer the port number from the deployments. Signed-off-by: Alexander Greene <[email protected]> * Emit CSV metric on startup (#2216) Signed-off-by: Josef Karasek <[email protected]> * fix e2e CSV metric is preserved failure (#2530) Signed-off-by: akihikokuroda <[email protected]> Co-authored-by: Alexander Greene <[email protected]> Co-authored-by: Josef Karasek <[email protected]> Co-authored-by: Akihiko (Aki) Kuroda <[email protected]>
1 parent 6bc6b9c commit 64cff34

File tree

3 files changed

+136
-13
lines changed

3 files changed

+136
-13
lines changed

Diff for: cmd/olm/main.go

+5
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,11 @@ func main() {
211211
op.Run(ctx)
212212
<-op.Ready()
213213

214+
// Emit CSV metric
215+
if err = op.EnsureCSVMetric(); err != nil {
216+
logger.WithError(err).Fatalf("error emitting metrics for existing CSV")
217+
}
218+
214219
if *writeStatusName != "" {
215220
operatorstatus.MonitorClusterStatus(*writeStatusName, op.AtLevel(), ctx.Done(), opClient, configClient, crClient)
216221
}

Diff for: pkg/controller/operators/olm/operator.go

+17
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,23 @@ func (a *Operator) RegisterCSVWatchNotification(csvNotification csvutility.Watch
600600
a.csvNotification = csvNotification
601601
}
602602

603+
func (a *Operator) EnsureCSVMetric() error {
604+
csvs, err := a.lister.OperatorsV1alpha1().ClusterServiceVersionLister().List(labels.Everything())
605+
if err != nil {
606+
return err
607+
}
608+
for _, csv := range csvs {
609+
logger := a.logger.WithFields(logrus.Fields{
610+
"name": csv.GetName(),
611+
"namespace": csv.GetNamespace(),
612+
"self": csv.GetSelfLink(),
613+
})
614+
logger.Debug("emitting metrics for existing CSV")
615+
metrics.EmitCSVMetric(csv, csv)
616+
}
617+
return nil
618+
}
619+
603620
func (a *Operator) syncGCObject(obj interface{}) (syncError error) {
604621
metaObj, ok := obj.(metav1.Object)
605622
if !ok {

Diff for: test/e2e/metrics_e2e_test.go

+114-13
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,22 @@
1+
//go:build !bare
12
// +build !bare
23

34
package e2e
45

56
import (
67
"bytes"
78
"context"
9+
"fmt"
810
"regexp"
11+
"strconv"
912
"strings"
1013

14+
"github.com/blang/semver/v4"
1115
. "github.com/onsi/ginkgo"
1216
. "github.com/onsi/gomega"
1317
io_prometheus_client "github.com/prometheus/client_model/go"
1418
"github.com/prometheus/common/expfmt"
19+
appsv1 "k8s.io/api/apps/v1"
1520
corev1 "k8s.io/api/core/v1"
1621
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1722
"k8s.io/apimachinery/pkg/util/net"
@@ -71,8 +76,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() {
7176
})
7277

7378
It("generates csv_abnormal metric for OLM pod", func() {
74-
75-
Expect(getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"), "8081")).To(And(
79+
Expect(getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"))).To(And(
7680
ContainElement(LikeMetric(
7781
WithFamily("csv_abnormal"),
7882
WithName(failingCSV.Name),
@@ -100,13 +104,55 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() {
100104

101105
It("deletes its associated CSV metrics", func() {
102106
// Verify that when the csv has been deleted, it deletes the corresponding CSV metrics
103-
Expect(getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"), "8081")).ToNot(And(
107+
Expect(getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"))).ToNot(And(
104108
ContainElement(LikeMetric(WithFamily("csv_abnormal"), WithName(failingCSV.Name))),
105109
ContainElement(LikeMetric(WithFamily("csv_succeeded"), WithName(failingCSV.Name))),
106110
))
107111
})
108112
})
109113
})
114+
115+
When("a CSV is created", func() {
116+
var (
117+
cleanupCSV cleanupFunc
118+
csv v1alpha1.ClusterServiceVersion
119+
)
120+
BeforeEach(func() {
121+
packageName := genName("csv-test-")
122+
packageStable := fmt.Sprintf("%s-stable", packageName)
123+
csv = newCSV(packageStable, testNamespace, "", semver.MustParse("0.1.0"), nil, nil, nil)
124+
125+
var err error
126+
_, err = createCSV(c, crc, csv, testNamespace, false, false)
127+
Expect(err).ToNot(HaveOccurred())
128+
_, err = fetchCSV(crc, csv.Name, testNamespace, csvSucceededChecker)
129+
Expect(err).ToNot(HaveOccurred())
130+
})
131+
AfterEach(func() {
132+
if cleanupCSV != nil {
133+
cleanupCSV()
134+
}
135+
})
136+
It("emits a CSV metrics", func() {
137+
Expect(getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"))).To(
138+
ContainElement(LikeMetric(WithFamily("csv_succeeded"), WithName(csv.Name), WithValue(1))),
139+
)
140+
})
141+
When("the OLM pod restarts", func() {
142+
BeforeEach(func() {
143+
restartDeploymentWithLabel(c, "app=olm-operator")
144+
})
145+
It("CSV metric is preserved", func() {
146+
Eventually(func() []Metric {
147+
return getMetricsFromPod(c, getPodWithLabel(c, "app=olm-operator"))
148+
}).Should(ContainElement(LikeMetric(
149+
WithFamily("csv_succeeded"),
150+
WithName(csv.Name),
151+
WithValue(1),
152+
)))
153+
})
154+
})
155+
})
110156
})
111157

112158
Context("Metrics emitted by objects during operator installation", func() {
@@ -130,7 +176,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() {
130176

131177
// Verify metrics have been emitted for subscription
132178
Eventually(func() []Metric {
133-
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081")
179+
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"))
134180
}).Should(ContainElement(LikeMetric(
135181
WithFamily("subscription_sync_total"),
136182
WithName("metric-subscription-for-create"),
@@ -145,7 +191,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() {
145191
// Verify metrics have been emitted for dependency resolution
146192
Eventually(func() bool {
147193
return Eventually(func() []Metric {
148-
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081")
194+
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"))
149195
}).Should(ContainElement(LikeMetric(
150196
WithFamily("olm_resolution_duration_seconds"),
151197
WithLabel("outcome", "failed"),
@@ -160,7 +206,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() {
160206
BeforeEach(func() {
161207
subscriptionCleanup, subscription = createSubscription(GinkgoT(), crc, testNamespace, "metric-subscription-for-update", testPackageName, stableChannel, v1alpha1.ApprovalManual)
162208
Eventually(func() []Metric {
163-
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081")
209+
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"))
164210
}).Should(ContainElement(LikeMetric(WithFamily("subscription_sync_total"), WithLabel("name", "metric-subscription-for-update"))))
165211
Eventually(func() error {
166212
s, err := crc.OperatorsV1alpha1().Subscriptions(subscription.GetNamespace()).Get(context.TODO(), subscription.GetName(), metav1.GetOptions{})
@@ -181,7 +227,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() {
181227

182228
It("deletes the old Subscription metric and emits the new metric", func() {
183229
Eventually(func() []Metric {
184-
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081")
230+
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"))
185231
}).Should(And(
186232
Not(ContainElement(LikeMetric(
187233
WithFamily("subscription_sync_total"),
@@ -215,7 +261,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() {
215261

216262
It("deletes the old subscription metric and emits the new metric(there is only one metric for the subscription)", func() {
217263
Eventually(func() []Metric {
218-
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081")
264+
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"))
219265
}).Should(And(
220266
Not(ContainElement(LikeMetric(
221267
WithFamily("subscription_sync_total"),
@@ -245,7 +291,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() {
245291
BeforeEach(func() {
246292
subscriptionCleanup, subscription = createSubscription(GinkgoT(), crc, testNamespace, "metric-subscription-for-delete", testPackageName, stableChannel, v1alpha1.ApprovalManual)
247293
Eventually(func() []Metric {
248-
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081")
294+
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"))
249295
}).Should(ContainElement(LikeMetric(WithFamily("subscription_sync_total"), WithLabel("name", "metric-subscription-for-delete"))))
250296
if subscriptionCleanup != nil {
251297
subscriptionCleanup()
@@ -261,7 +307,7 @@ var _ = Describe("Metrics are generated for OLM managed resources", func() {
261307

262308
It("deletes the Subscription metric", func() {
263309
Eventually(func() []Metric {
264-
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"), "8081")
310+
return getMetricsFromPod(c, getPodWithLabel(c, "app=catalog-operator"))
265311
}).ShouldNot(ContainElement(LikeMetric(WithFamily("subscription_sync_total"), WithName("metric-subscription-for-delete"))))
266312
})
267313
})
@@ -283,7 +329,63 @@ func getPodWithLabel(client operatorclient.ClientInterface, label string) *corev
283329
return &podList.Items[0]
284330
}
285331

286-
func getMetricsFromPod(client operatorclient.ClientInterface, pod *corev1.Pod, port string) []Metric {
332+
func getDeploymentWithLabel(client operatorclient.ClientInterface, label string) *appsv1.Deployment {
333+
listOptions := metav1.ListOptions{LabelSelector: label}
334+
var deploymentList *appsv1.DeploymentList
335+
EventuallyWithOffset(1, func() (numDeps int, err error) {
336+
deploymentList, err = client.KubernetesInterface().AppsV1().Deployments(operatorNamespace).List(context.TODO(), listOptions)
337+
if deploymentList != nil {
338+
numDeps = len(deploymentList.Items)
339+
}
340+
341+
return
342+
}).Should(Equal(1), "expected exactly one Deployment")
343+
344+
return &deploymentList.Items[0]
345+
}
346+
347+
func restartDeploymentWithLabel(client operatorclient.ClientInterface, l string) {
348+
d := getDeploymentWithLabel(client, l)
349+
z := int32(0)
350+
oldZ := *d.Spec.Replicas
351+
d.Spec.Replicas = &z
352+
_, err := client.KubernetesInterface().AppsV1().Deployments(operatorNamespace).Update(context.TODO(), d, metav1.UpdateOptions{})
353+
Expect(err).ToNot(HaveOccurred())
354+
355+
EventuallyWithOffset(1, func() (replicas int32, err error) {
356+
deployment, err := client.KubernetesInterface().AppsV1().Deployments(operatorNamespace).Get(context.TODO(), d.Name, metav1.GetOptions{})
357+
if deployment != nil {
358+
replicas = deployment.Status.Replicas
359+
}
360+
return
361+
}).Should(Equal(int32(0)), "expected exactly 0 Deployments")
362+
363+
updated := getDeploymentWithLabel(client, l)
364+
updated.Spec.Replicas = &oldZ
365+
_, err = client.KubernetesInterface().AppsV1().Deployments(operatorNamespace).Update(context.TODO(), updated, metav1.UpdateOptions{})
366+
Expect(err).ToNot(HaveOccurred())
367+
368+
EventuallyWithOffset(1, func() (replicas int32, err error) {
369+
deployment, err := client.KubernetesInterface().AppsV1().Deployments(operatorNamespace).Get(context.TODO(), d.Name, metav1.GetOptions{})
370+
if deployment != nil {
371+
replicas = deployment.Status.Replicas
372+
}
373+
return
374+
}).Should(Equal(oldZ), "expected exactly 1 Deployment")
375+
}
376+
377+
func extractMetricPortFromPod(pod *corev1.Pod) string {
378+
for _, container := range pod.Spec.Containers {
379+
for _, port := range container.Ports {
380+
if port.Name == "metrics" {
381+
return strconv.Itoa(int(port.ContainerPort))
382+
}
383+
}
384+
}
385+
return "-1"
386+
}
387+
388+
func getMetricsFromPod(client operatorclient.ClientInterface, pod *corev1.Pod) []Metric {
287389
ctx.Ctx().Logf("querying pod %s/%s\n", pod.GetNamespace(), pod.GetName())
288390

289391
// assuming -tls-cert and -tls-key aren't used anywhere else as a parameter value
@@ -305,14 +407,13 @@ func getMetricsFromPod(client operatorclient.ClientInterface, pod *corev1.Pod, p
305407
scheme = "http"
306408
}
307409
ctx.Ctx().Logf("Retrieving metrics using scheme %v\n", scheme)
308-
309410
mfs := make(map[string]*io_prometheus_client.MetricFamily)
310411
EventuallyWithOffset(1, func() error {
311412
raw, err := client.KubernetesInterface().CoreV1().RESTClient().Get().
312413
Namespace(pod.GetNamespace()).
313414
Resource("pods").
314415
SubResource("proxy").
315-
Name(net.JoinSchemeNamePort(scheme, pod.GetName(), port)).
416+
Name(net.JoinSchemeNamePort(scheme, pod.GetName(), extractMetricPortFromPod(pod))).
316417
Suffix("metrics").
317418
Do(context.Background()).Raw()
318419
if err != nil {

0 commit comments

Comments
 (0)