@@ -3,21 +3,20 @@ package metrics
3
3
import (
4
4
"context"
5
5
"encoding/json"
6
- "fmt"
7
6
8
7
"github.com/operator-framework/operator-sdk/pkg/k8sutil"
9
- "github.com/operator-framework/operator-sdk/pkg/metrics"
10
8
"github.com/pkg/errors"
11
9
monclient "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned/typed/monitoring/v1"
12
10
"k8s.io/api/core/v1"
13
- apierrors "k8s.io/apimachinery/pkg/ api/errors "
11
+ core "k8s.io/api/core/v1 "
14
12
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
15
13
"k8s.io/apimachinery/pkg/types"
16
- "k8s.io/apimachinery/pkg/util/intstr"
17
14
"k8s.io/client-go/kubernetes"
18
15
k8sclient "k8s.io/client-go/kubernetes"
19
16
"k8s.io/client-go/rest"
17
+ "k8s.io/client-go/tools/record"
20
18
logf "sigs.k8s.io/controller-runtime/pkg/log"
19
+ "sigs.k8s.io/controller-runtime/pkg/manager"
21
20
22
21
"github.com/openshift/windows-machine-config-operator/pkg/nodeconfig"
23
22
)
26
25
log = logf .Log .WithName ("metrics" )
27
26
// metricsEnabled specifies if metrics are enabled in the current cluster
28
27
metricsEnabled = true
29
- // windowsMetricsResource is the name of an object created for Windows metrics
30
- windowsMetricsResource = ""
31
28
)
32
29
33
30
const (
@@ -37,6 +34,9 @@ const (
37
34
Host = "0.0.0.0"
38
35
// Port is the port number on which windows-exporter is exposed.
39
36
Port int32 = 9182
37
+ // WindowsMetricsResource is the name for objects created for Prometheus monitoring
38
+ // by current operator version. Its name is defined through the bundle manifests
39
+ WindowsMetricsResource = "windows-machine-config-operator-metrics"
40
40
)
41
41
42
42
// PrometheusNodeConfig holds the information required to configure Prometheus, so that it can scrape metrics from the
@@ -48,6 +48,18 @@ type PrometheusNodeConfig struct {
48
48
namespace string
49
49
}
50
50
51
+ // Config holds the information required to interact with metrics objects
52
+ type Config struct {
53
+ // a handle that allows us to interact with the Kubernetes API.
54
+ * kubernetes.Clientset
55
+ // a handle that allows us to interact with the Monitoring API.
56
+ * monclient.MonitoringV1Client
57
+ // namespace is the namespace in which metrics objects are created
58
+ namespace string
59
+ // recorder to generate events
60
+ Recorder record.EventRecorder
61
+ }
62
+
51
63
// patchEndpoint contains information regarding patching metrics Endpoint
52
64
type patchEndpoint struct {
53
65
// op defines patch operation to be performed on the Endpoints object
@@ -71,74 +83,21 @@ func NewPrometheusNodeConfig(clientset *kubernetes.Clientset) (*PrometheusNodeCo
71
83
}, err
72
84
}
73
85
74
- // Add will create the Services and Service Monitors that allows the operator to export the metrics by using
75
- // the Prometheus operator
76
- func Add (ctx context.Context , cfg * rest.Config , namespace string ) error {
77
- // Add to the below struct any other metrics ports you want to expose.
78
- servicePorts := []v1.ServicePort {
79
- {Port : Port , Name : PortName , Protocol : v1 .ProtocolTCP , TargetPort : intstr.IntOrString {Type : intstr .Int , IntVal : Port }},
80
- }
81
-
82
- // Create Service object to expose the metrics port(s).
83
- service , err := metrics .CreateMetricsService (ctx , cfg , servicePorts )
84
- if err != nil {
85
- return errors .Wrap (err , "could not create metrics Service" )
86
+ // NewConfig creates a new instance for Config to be used by the caller.
87
+ func NewConfig (mgr manager.Manager , cfg * rest.Config , namespace string ) (* Config , error ) {
88
+ if cfg == nil {
89
+ return nil , errors .New ("config should not be nil" )
86
90
}
87
-
88
- // the name for the metrics resources is set during creation of metrics service and is equivalent to the service name
89
- windowsMetricsResource = service .GetName ()
90
-
91
- // Create a monitoring client to interact with the ServiceMonitor object
92
- mclient , err := monclient .NewForConfig (cfg )
93
- if err != nil {
94
- return errors .Wrap (err , "could not create monitoring client" )
95
- }
96
-
97
- // In the case of an operator restart, a previous SM object will be deleted and a new one will
98
- // be created. We are deleting to ensure that the SM always exists with the correct spec. Otherwise,
99
- // metrics may exhibit unexpected behavior if created by a previous version of WMCO.
100
- err = mclient .ServiceMonitors (namespace ).Delete (context .TODO (), windowsMetricsResource , metav1.DeleteOptions {})
101
- if err != nil && ! apierrors .IsNotFound (err ) {
102
- return errors .Wrap (err , "could not delete existing ServiceMonitor object" )
103
- }
104
-
105
- // CreateServiceMonitors will automatically create the prometheus-operator ServiceMonitor resources
106
- // necessary to configure Prometheus to scrape metrics from this operator.
107
- services := []* v1.Service {service }
108
- _ , err = metrics .CreateServiceMonitors (cfg , namespace , services )
109
- if err != nil {
110
- log .Error (err , "could not create ServiceMonitor object" )
111
- // If this operator is deployed to a cluster without the prometheus-operator running, it will return
112
- // ErrServiceMonitorNotPresent, which can be used to safely skip ServiceMonitor creation.
113
- if err == metrics .ErrServiceMonitorNotPresent {
114
- metricsEnabled = false
115
- return errors .Wrap (err , "install prometheus-operator in your cluster to create ServiceMonitor objects" )
116
-
117
- }
118
- }
119
-
120
- // The ServiceMonitor created by the operator-sdk metrics package doesn't have fields required to display
121
- // node graphs for Windows. Update the Service monitor with the required fields.
122
- err = updateServiceMonitors (cfg , namespace )
123
- if err != nil {
124
- return errors .Wrap (err , "error updating service monitor" )
125
- }
126
-
127
91
oclient , err := k8sclient .NewForConfig (cfg )
128
92
if err != nil {
129
- return errors .Wrap (err , "could not create config clientset " )
93
+ return nil , errors .Wrap (err , "error creating config client " )
130
94
}
131
- // When a selector is present in a headless service i.e. spec.ClusterIP=None, Kubernetes manages the
132
- // list of endpoints reverting all the changes made by the operator. Remove selector from Metrics Service to avoid
133
- // reverting changes in the Endpoints object.
134
- patchData := fmt .Sprintf (`{"spec":{"selector": null }}` )
135
- service , err = oclient .CoreV1 ().Services (namespace ).Patch (ctx , service .Name , types .MergePatchType ,
136
- []byte (patchData ), metav1.PatchOptions {})
95
+ mclient , err := monclient .NewForConfig (cfg )
137
96
if err != nil {
138
- return errors .Wrap (err , "could not remove selector from metrics service " )
97
+ return nil , errors .Wrap (err , "error creating monitoring client " )
139
98
}
140
-
141
- return nil
99
+ return & Config { oclient , mclient , namespace ,
100
+ mgr . GetEventRecorderFor ( "metrics" )}, nil
142
101
}
143
102
144
103
// syncMetricsEndpoint updates the endpoint object with the new list of IP addresses from the Windows nodes and the
@@ -171,7 +130,7 @@ func (pc *PrometheusNodeConfig) syncMetricsEndpoint(nodeEndpointAdressess []v1.E
171
130
}
172
131
173
132
_ , err = pc .k8sclientset .CoreV1 ().Endpoints (pc .namespace ).
174
- Patch (context .TODO (), windowsMetricsResource , types .JSONPatchType , patchDataBytes , metav1.PatchOptions {})
133
+ Patch (context .TODO (), WindowsMetricsResource , types .JSONPatchType , patchDataBytes , metav1.PatchOptions {})
175
134
return errors .Wrap (err , "unable to sync metrics endpoints" )
176
135
}
177
136
@@ -191,9 +150,9 @@ func (pc *PrometheusNodeConfig) Configure() error {
191
150
192
151
// get Metrics Endpoints object
193
152
endpoints , err := pc .k8sclientset .CoreV1 ().Endpoints (pc .namespace ).Get (context .TODO (),
194
- windowsMetricsResource , metav1.GetOptions {})
153
+ WindowsMetricsResource , metav1.GetOptions {})
195
154
if err != nil {
196
- return errors .Wrapf (err , "could not get metrics endpoints %v" , windowsMetricsResource )
155
+ return errors .Wrapf (err , "could not get metrics endpoints %v" , WindowsMetricsResource )
197
156
}
198
157
199
158
if ! isEndpointsValid (nodes , endpoints ) {
@@ -204,7 +163,7 @@ func (pc *PrometheusNodeConfig) Configure() error {
204
163
return errors .Wrap (err , "error updating endpoints object with list of endpoint addresses" )
205
164
}
206
165
}
207
- log .Info ("Prometheus configured" , "endpoints" , windowsMetricsResource , "port" , Port , "name" , PortName )
166
+ log .Info ("Prometheus configured" , "endpoints" , WindowsMetricsResource , "port" , Port , "name" , PortName )
208
167
return nil
209
168
}
210
169
@@ -256,25 +215,60 @@ func isEndpointsValid(nodes *v1.NodeList, endpoints *v1.Endpoints) bool {
256
215
return true
257
216
}
258
217
259
- // updateServiceMonitors patches the metrics Service Monitor to include required fields to display node graphs on the
260
- // OpenShift console. Console graph queries require metrics endpoint target name to be node name, however
261
- // windows_exporter returns node IP. We replace the target name by adding `replace` action field to the ServiceMonitor
262
- // object that replaces node IP to node name as the metrics endpoint target.
263
- func updateServiceMonitors (cfg * rest.Config , namespace string ) error {
264
-
265
- patchData := fmt .Sprintf ("[{\" op\" : \" replace\" , \" path\" : \" /spec/endpoints/0\" , " +
266
- "\" value\" :{\" path\" : \" /%s\" ,\" port\" : \" %s\" ,\" relabelings\" : [{\" action\" : \" replace\" , \" regex\" : \" (.*)\" , " +
267
- "\" replacement\" : \" $1\" , \" sourceLabels\" : [\" __meta_kubernetes_endpoint_address_target_name\" ]," +
268
- "\" targetLabel\" : \" instance\" }]}}]" , PortName , PortName )
218
+ // Configure takes care of all the required configuration steps
219
+ // for Prometheus monitoring like validating monitoring label
220
+ // and creating metrics Endpoints object.
221
+ func (c * Config ) Configure (ctx context.Context ) error {
222
+ // validate if cluster monitoring is enabled in the operator namespace
223
+ if err := c .validate (ctx ); err != nil {
224
+ log .Error (err , "error validating cluster monitoring label" )
225
+ return nil
226
+ }
227
+ // Create Metrics Endpoint object in the operator namespace
228
+ if err := c .createEndpoint (); err != nil {
229
+ return errors .Wrap (err , "error creating metrics Endpoint" )
230
+ }
231
+ return nil
232
+ }
269
233
270
- mclient , err := monclient .NewForConfig (cfg )
234
+ // validate will verify if cluster monitoring is enabled in the operator namespace.
235
+ // If the label is not present, it will log and send warning events to the user.
236
+ func (c * Config ) validate (ctx context.Context ) error {
237
+ // validate if metrics label is added to namespace
238
+ wmcoNamespace , err := c .CoreV1 ().Namespaces ().Get (ctx , c .namespace , metav1.GetOptions {})
271
239
if err != nil {
272
- return errors .Wrap (err , "error creating monitoring client" )
240
+ return errors .Wrap (err , "error getting operator namespace" )
241
+ }
242
+ if wmcoNamespace .Labels ["openshift.io/cluster-monitoring" ] != "true" {
243
+ metricsEnabled = false
244
+ c .Recorder .Eventf (wmcoNamespace , core .EventTypeWarning , "labelValidationFailed" ,
245
+ "Cluster monitoring openshift.io/cluster-monitoring label is not enabled in %s namespace" , c .namespace )
246
+ return errors .Errorf ("monitoring not enabled in %s namespace" , c .namespace )
247
+ }
248
+ return nil
249
+ }
250
+
251
+ // createEndpoint creates an endpoint object in the operator namespace.
252
+ // WMCO is no longer creating a service with a selector therefore no Endpoint
253
+ // object is created and WMCO needs to create the Endpoint object.
254
+ // We cannot create endpoints as a part of manifests deployment as
255
+ // Endpoints resources are not currently OLM-supported for bundle creation.
256
+ func (c * Config ) createEndpoint () error {
257
+ // create new Endpoint
258
+ newEndpoint := & v1.Endpoints {
259
+ TypeMeta : metav1.TypeMeta {
260
+ Kind : "Endpoints" ,
261
+ },
262
+ ObjectMeta : metav1.ObjectMeta {
263
+ Name : WindowsMetricsResource ,
264
+ Namespace : c .namespace ,
265
+ Labels : map [string ]string {"name" : WindowsMetricsResource },
266
+ },
267
+ Subsets : nil ,
273
268
}
274
- _ , err = mclient .ServiceMonitors (namespace ).Patch (context .TODO (), windowsMetricsResource , types .JSONPatchType , []byte (patchData ),
275
- metav1.PatchOptions {})
269
+ _ , err := c .CoreV1 ().Endpoints (c .namespace ).Create (context .TODO (), newEndpoint , metav1.CreateOptions {})
276
270
if err != nil {
277
- return errors .Wrap (err , "unable to patch service monitor " )
271
+ return errors .Wrap (err , "error creating metrics Endpoint " )
278
272
}
279
273
return nil
280
274
}
0 commit comments