Skip to content

Commit 90d750b

Browse files
authored
OCPBUGS-34784: Collect aggregated Prometheus Alertmanager instances (#941)
* Add draft gatherer for Prometheuses and Alertmanagers * Add rule for GatherAggregatedInstances to access Prometheus and Alertmanagers * Add sample file for GatherAggregatedInstances gatherer * Refactor and Docs for GatherAggregatedInstances gatherer * Add entry for GatherAggregatedInstances gatherer * Fix file folder * Fix linting issues * Add unit tests for aggregatedInstances gatherer * Add new unit tests for not found scenarios * Refactor constant visibility * Fix errcheck linting * Refactor name of the gatherer
1 parent e32ee76 commit 90d750b

File tree

7 files changed

+363
-0
lines changed

7 files changed

+363
-0
lines changed

Diff for: docs/gathered-data.md

+29
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,35 @@ None
6969
None
7070

7171

72+
## AggregatedMonitoringCRNames
73+
74+
Collects instances outside of the `openshift-monitoring` of the following custom resources:
75+
- Kind: `Prometheus` Group: `monitoring.coreos.com`
76+
- Kind: `AlertManager` Group: `monitoring.coreos.com`
77+
78+
### API Reference
79+
- https://docs.openshift.com/container-platform/4.13/rest_api/monitoring_apis/alertmanager-monitoring-coreos-com-v1.html
80+
- https://docs.openshift.com/container-platform/4.13/rest_api/monitoring_apis/prometheus-monitoring-coreos-com-v1.html
81+
82+
### Sample data
83+
- [docs/insights-archive-sample/aggregated/custom_prometheuses_alertmanagers.json](./insights-archive-sample/aggregated/custom_prometheuses_alertmanagers.json)
84+
85+
### Location in archive
86+
- `aggregated/custom_prometheuses_alertmanagers.json`
87+
88+
### Config ID
89+
`clusterconfig/aggregated_monitoring_cr_names`
90+
91+
### Released version
92+
- 4.16
93+
94+
### Backported versions
95+
TBD
96+
97+
### Changes
98+
None
99+
100+
72101
## CRD
73102

74103
Collects the specified Custom Resource Definitions.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"prometheuses": [
3+
"example-prometheus-name"
4+
],
5+
"alertmanagers": [
6+
"example-alertmanager-name"
7+
]
8+
}

Diff for: manifests/03-clusterrole.yaml

+8
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,14 @@ metadata:
168168
include.release.openshift.io/single-node-developer: "true"
169169
capability.openshift.io/name: Insights
170170
rules:
171+
- apiGroups:
172+
- "monitoring.coreos.com"
173+
resources:
174+
- alertmanagers
175+
- prometheuses
176+
verbs:
177+
- get
178+
- list
171179
- apiGroups:
172180
- ""
173181
resources:

Diff for: pkg/gatherers/clusterconfig/clusterconfig_gatherer.go

+1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ type gathererFuncPtr = func(*Gatherer, context.Context) ([]record.Record, []erro
2727

2828
var gatheringFunctions = map[string]gathererFuncPtr{
2929
"active_alerts": (*Gatherer).GatherActiveAlerts,
30+
"aggregated_monitoring_cr_names": (*Gatherer).GatherAggregatedMonitoringCRNames,
3031
"authentication": (*Gatherer).GatherClusterAuthentication,
3132
"certificate_signing_requests": (*Gatherer).GatherCertificateSigningRequests,
3233
"ceph_cluster": (*Gatherer).GatherCephCluster,

Diff for: pkg/gatherers/clusterconfig/const.go

+5
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ import (
77
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
88
)
99

10+
const (
11+
// introduced by GatherAggregatedInstances gatherer
12+
monitoringNamespace string = "openshift-monitoring"
13+
)
14+
1015
var (
1116
registryScheme = runtime.NewScheme()
1217
// logMaxLines sets maximum number of lines of the log file
+118
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
package clusterconfig
2+
3+
import (
4+
"context"
5+
6+
"github.com/openshift/insights-operator/pkg/record"
7+
8+
promcli "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned"
9+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
10+
)
11+
12+
// GatherAggregatedMonitoringCRNames Collects instances outside of the `openshift-monitoring` of the following custom resources:
13+
// - Kind: `Prometheus` Group: `monitoring.coreos.com`
14+
// - Kind: `AlertManager` Group: `monitoring.coreos.com`
15+
//
16+
// ### API Reference
17+
// - https://docs.openshift.com/container-platform/4.13/rest_api/monitoring_apis/alertmanager-monitoring-coreos-com-v1.html
18+
// - https://docs.openshift.com/container-platform/4.13/rest_api/monitoring_apis/prometheus-monitoring-coreos-com-v1.html
19+
//
20+
// ### Sample data
21+
// - docs/insights-archive-sample/aggregated/custom_prometheuses_alertmanagers.json
22+
//
23+
// ### Location in archive
24+
// - `aggregated/custom_prometheuses_alertmanagers.json`
25+
//
26+
// ### Config ID
27+
// `clusterconfig/aggregated_monitoring_cr_names`
28+
//
29+
// ### Released version
30+
// - 4.16
31+
//
32+
// ### Backported versions
33+
// TBD
34+
//
35+
// ### Changes
36+
// None
37+
func (g *Gatherer) GatherAggregatedMonitoringCRNames(ctx context.Context) ([]record.Record, []error) {
38+
client, err := promcli.NewForConfig(g.gatherKubeConfig)
39+
if err != nil {
40+
return nil, []error{err}
41+
}
42+
43+
return monitoringCRNames{}.gather(ctx, client)
44+
}
45+
46+
type monitoringCRNames struct {
47+
Prometheuses []string `json:"prometheuses"`
48+
Alertmanagers []string `json:"alertmanagers"`
49+
}
50+
51+
// gather returns records for all Prometheus and Alertmanager instances that exist outside the openshift-monitoring namespace.
52+
// It could instead return a collection of errors found when trying to get those instances.
53+
func (mn monitoringCRNames) gather(ctx context.Context, client promcli.Interface) ([]record.Record, []error) {
54+
const Filename = "aggregated/custom_prometheuses_alertmanagers"
55+
56+
errs := []error{}
57+
prometheusList, err := mn.getOutcastedPrometheuses(ctx, client)
58+
if err != nil {
59+
errs = append(errs, err)
60+
}
61+
62+
alertManagersList, err := mn.getOutcastedAlertManagers(ctx, client)
63+
if err != nil {
64+
errs = append(errs, err)
65+
}
66+
67+
if len(errs) > 0 {
68+
return nil, errs
69+
}
70+
71+
// De not return an empty file if no Custom Resources were found
72+
if len(prometheusList) == 0 && len(alertManagersList) == 0 {
73+
return []record.Record{}, nil
74+
}
75+
76+
mn.Prometheuses = prometheusList
77+
mn.Alertmanagers = alertManagersList
78+
79+
return []record.Record{{Name: Filename, Item: record.JSONMarshaller{Object: mn}}}, nil
80+
}
81+
82+
// getOutcastedAlertManagers returns a collection of AlertManagers names, if any, from other than the openshift-monitoring namespace
83+
// or an error if it couldn't retrieve them
84+
func (mn monitoringCRNames) getOutcastedAlertManagers(ctx context.Context, client promcli.Interface) ([]string, error) {
85+
alertManagersList, err := client.MonitoringV1().Alertmanagers(metav1.NamespaceAll).List(ctx, metav1.ListOptions{})
86+
if err != nil {
87+
return nil, err
88+
}
89+
90+
amNames := []string{}
91+
for i := range alertManagersList.Items {
92+
alertMgr := alertManagersList.Items[i]
93+
if alertMgr.GetNamespace() != monitoringNamespace {
94+
amNames = append(amNames, alertMgr.GetName())
95+
}
96+
}
97+
98+
return amNames, nil
99+
}
100+
101+
// getOutcastedPrometheuses returns a collection of Prometheus names, if any, from other than the openshift-monitoring namespace
102+
// or an error if it couldn't retrieve them
103+
func (mn monitoringCRNames) getOutcastedPrometheuses(ctx context.Context, client promcli.Interface) ([]string, error) {
104+
prometheusList, err := client.MonitoringV1().Prometheuses(metav1.NamespaceAll).List(ctx, metav1.ListOptions{})
105+
if err != nil {
106+
return nil, err
107+
}
108+
109+
promNames := []string{}
110+
for i := range prometheusList.Items {
111+
prom := prometheusList.Items[i]
112+
if prom.GetNamespace() != monitoringNamespace {
113+
promNames = append(promNames, prom.GetName())
114+
}
115+
}
116+
117+
return promNames, nil
118+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
package clusterconfig
2+
3+
import (
4+
"context"
5+
"testing"
6+
7+
"github.com/openshift/insights-operator/pkg/record"
8+
v1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
9+
"github.com/prometheus-operator/prometheus-operator/pkg/client/versioned/fake"
10+
11+
"github.com/stretchr/testify/assert"
12+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
13+
)
14+
15+
// Test_GatherAggregatedInstances provides unit tests for the correct output file structure
16+
func Test_GatherAggregatedInstances(t *testing.T) {
17+
testCases := []struct {
18+
name string
19+
proms []*v1.Prometheus
20+
alertMgrs []*v1.Alertmanager
21+
expected []record.Record
22+
}{
23+
{
24+
name: "The function returns the name of the Prometheus instance in the correct field",
25+
proms: []*v1.Prometheus{
26+
{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "test-namespace"}},
27+
},
28+
expected: []record.Record{{
29+
Name: "aggregated/custom_prometheuses_alertmanagers",
30+
Item: record.JSONMarshaller{Object: monitoringCRNames{
31+
Prometheuses: []string{"test"}, Alertmanagers: []string{},
32+
}}},
33+
},
34+
}, {
35+
name: "The function returns the name of the AlertManager instance in the correct field",
36+
alertMgrs: []*v1.Alertmanager{
37+
{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "test-namespace"}},
38+
},
39+
expected: []record.Record{{
40+
Name: "aggregated/custom_prometheuses_alertmanagers",
41+
Item: record.JSONMarshaller{Object: monitoringCRNames{
42+
Alertmanagers: []string{"test"}, Prometheuses: []string{},
43+
}}},
44+
},
45+
}, {
46+
name: "The function returns the names of the mixed instances in the correct field",
47+
alertMgrs: []*v1.Alertmanager{
48+
{ObjectMeta: metav1.ObjectMeta{Name: "test-alertmanager", Namespace: "test-namespace"}},
49+
},
50+
proms: []*v1.Prometheus{
51+
{ObjectMeta: metav1.ObjectMeta{Name: "test-prometheus", Namespace: "test-namespace"}},
52+
},
53+
expected: []record.Record{{
54+
Name: "aggregated/custom_prometheuses_alertmanagers",
55+
Item: record.JSONMarshaller{Object: monitoringCRNames{
56+
Alertmanagers: []string{"test-alertmanager"}, Prometheuses: []string{"test-prometheus"},
57+
}}},
58+
},
59+
}, {
60+
name: "The function returns no records if no instances are found",
61+
alertMgrs: []*v1.Alertmanager{},
62+
proms: []*v1.Prometheus{},
63+
expected: []record.Record{},
64+
},
65+
}
66+
67+
for _, tc := range testCases {
68+
t.Run(tc.name, func(t *testing.T) {
69+
// Given
70+
clientset := fake.NewSimpleClientset()
71+
for _, am := range tc.alertMgrs {
72+
assert.NoError(t,
73+
clientset.Tracker().Add(am))
74+
}
75+
for _, prom := range tc.proms {
76+
assert.NoError(t,
77+
clientset.Tracker().Add(prom))
78+
}
79+
80+
// When
81+
test, errs := monitoringCRNames{}.gather(context.Background(), clientset)
82+
83+
// Assert
84+
assert.Empty(t, errs)
85+
assert.EqualValues(t, tc.expected, test)
86+
})
87+
}
88+
}
89+
90+
// Test_getOutcastedAlertManagers provides unit tests for the namespace filtering logic of AlertManager instances
91+
func Test_getOutcastedAlertManagers(t *testing.T) {
92+
testCases := []struct {
93+
name string
94+
alertMgrs []*v1.Alertmanager
95+
expected []string
96+
}{
97+
{
98+
name: "The function returns the name of the Prometheus outside the 'openshift-monitoring' namespace",
99+
alertMgrs: []*v1.Alertmanager{
100+
{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "test-namespace"}},
101+
},
102+
expected: []string{"test"},
103+
}, {
104+
name: "The function ignores the name of the Prometheus inside the 'openshift-monitoring' namespace",
105+
alertMgrs: []*v1.Alertmanager{
106+
{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "openshift-monitoring"}},
107+
},
108+
expected: []string{},
109+
}, {
110+
name: "The function returns only items outside of the namespace on a mixed response from client",
111+
alertMgrs: []*v1.Alertmanager{
112+
{ObjectMeta: metav1.ObjectMeta{Name: "test1", Namespace: "test-namespace"}},
113+
{ObjectMeta: metav1.ObjectMeta{Name: "ignore", Namespace: "openshift-monitoring"}},
114+
{ObjectMeta: metav1.ObjectMeta{Name: "test2", Namespace: "test-namespace"}},
115+
},
116+
expected: []string{"test1", "test2"},
117+
}, {
118+
name: "The function returns an empty slice if no instances are found",
119+
alertMgrs: []*v1.Alertmanager{},
120+
expected: []string{},
121+
},
122+
}
123+
124+
for _, tc := range testCases {
125+
t.Run(tc.name, func(t *testing.T) {
126+
// Given
127+
clientset := fake.NewSimpleClientset()
128+
for _, am := range tc.alertMgrs {
129+
assert.NoError(t,
130+
clientset.Tracker().Add(am))
131+
}
132+
133+
// When
134+
test, err := monitoringCRNames{}.getOutcastedAlertManagers(context.Background(), clientset)
135+
136+
// Assert
137+
assert.NoError(t, err)
138+
assert.EqualValues(t, tc.expected, test)
139+
})
140+
}
141+
}
142+
143+
// Test_getOutcastedPrometheuses provides unit tests for the namespace filtering logic of Prometheus instances
144+
func Test_getOutcastedPrometheuses(t *testing.T) {
145+
testCases := []struct {
146+
name string
147+
proms []*v1.Prometheus
148+
expected []string
149+
}{
150+
{
151+
name: "The function returns the name of the Prometheus outside the 'openshift-monitoring' namespace",
152+
proms: []*v1.Prometheus{
153+
{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "test-namespace"}},
154+
},
155+
expected: []string{"test"},
156+
}, {
157+
name: "The function ignores the name of the Prometheus inside the 'openshift-monitoring' namespace",
158+
proms: []*v1.Prometheus{
159+
{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "openshift-monitoring"}},
160+
},
161+
expected: []string{},
162+
}, {
163+
name: "The function returns only items outside of the namespace on a mixed response from client",
164+
proms: []*v1.Prometheus{
165+
{ObjectMeta: metav1.ObjectMeta{Name: "test1", Namespace: "test-namespace"}},
166+
{ObjectMeta: metav1.ObjectMeta{Name: "ignore", Namespace: "openshift-monitoring"}},
167+
{ObjectMeta: metav1.ObjectMeta{Name: "test2", Namespace: "test-namespace"}},
168+
},
169+
expected: []string{"test1", "test2"},
170+
}, {
171+
name: "The function returns an empty slice if no instances are found",
172+
proms: []*v1.Prometheus{},
173+
expected: []string{},
174+
},
175+
}
176+
177+
for _, tc := range testCases {
178+
t.Run(tc.name, func(t *testing.T) {
179+
// Given
180+
clientset := fake.NewSimpleClientset()
181+
for _, prom := range tc.proms {
182+
assert.NoError(t,
183+
clientset.Tracker().Add(prom))
184+
}
185+
186+
// When
187+
test, err := monitoringCRNames{}.getOutcastedPrometheuses(context.Background(), clientset)
188+
189+
// Assert
190+
assert.NoError(t, err)
191+
assert.EqualValues(t, tc.expected, test)
192+
})
193+
}
194+
}

0 commit comments

Comments
 (0)