Skip to content

Commit 6eed632

Browse files
VaishnaviHiremansikulkarni96
authored andcommitted
Remove dependency on operator-sdk metrics package
This commit removes the dependency on operator-sdk metrics package by adding service and service monitors through package manifests and validating their creation on operator startup. Since WMCO is no longer creating a service with selector, no Endpoint object is created. This commit also ensures we create an Endpoints object before accessing it. We cannot create Endpoints resources through manifests as they are not currently OLM supported operator-framework/operator-lifecycle-manager#1996
1 parent c9e24c8 commit 6eed632

File tree

7 files changed

+143
-103
lines changed

7 files changed

+143
-103
lines changed

cmd/manager/main.go

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import (
2222
"github.com/openshift/windows-machine-config-operator/apis"
2323
"github.com/openshift/windows-machine-config-operator/controllers"
2424
"github.com/openshift/windows-machine-config-operator/pkg/cluster"
25-
winmetrics "github.com/openshift/windows-machine-config-operator/pkg/metrics"
25+
"github.com/openshift/windows-machine-config-operator/pkg/metrics"
2626
"github.com/openshift/windows-machine-config-operator/pkg/nodeconfig/payload"
2727
"github.com/openshift/windows-machine-config-operator/version"
2828
)
@@ -131,7 +131,7 @@ func main() {
131131
// Create a new Cmd to provide shared dependencies and start components
132132
mgr, err := manager.New(cfg, manager.Options{
133133
NewCache: cache.MultiNamespacedCacheBuilder(namespaces),
134-
MetricsBindAddress: fmt.Sprintf("%s:%d", winmetrics.Host, winmetrics.Port),
134+
MetricsBindAddress: fmt.Sprintf("%s:%d", metrics.Host, metrics.Port),
135135
})
136136
if err != nil {
137137
log.Error(err, "failed to create a new Manager")
@@ -152,9 +152,16 @@ func main() {
152152
os.Exit(1)
153153
}
154154

155-
// Add the Metrics Service and Service Monitor
156-
if err := winmetrics.Add(ctx, cfg, namespace); err != nil {
157-
log.Error(err, "failed to add Metrics Service and Service Monitor")
155+
metricsConfig, err := metrics.NewConfig(mgr, cfg, namespace)
156+
if err != nil {
157+
log.Error(err, "failed to create MetricsConfig object")
158+
os.Exit(1)
159+
}
160+
161+
// Configure the metric resources
162+
if err := metricsConfig.Configure(ctx); err != nil {
163+
log.Error(err, "error setting up metrics")
164+
os.Exit(1)
158165
}
159166

160167
log.Info("starting the Cmd.")
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Prometheus Monitor Service (Metrics)
2+
apiVersion: monitoring.coreos.com/v1
3+
kind: ServiceMonitor
4+
metadata:
5+
labels:
6+
name: windows-machine-config-operator
7+
name: windows-machine-config-operator-metrics
8+
spec:
9+
endpoints:
10+
- path: /metrics
11+
port: metrics
12+
interval: 30s
13+
honorLabels: true
14+
relabelings:
15+
- action: replace
16+
regex: (.*)
17+
replacement: $1
18+
sourceLabels:
19+
- __meta_kubernetes_endpoint_address_target_name
20+
targetLabel: instance
21+
selector:
22+
matchLabels:
23+
name: windows-machine-config-operator
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
kind: Service
2+
apiVersion: v1
3+
metadata:
4+
name: windows-machine-config-operator-metrics
5+
labels:
6+
name: windows-machine-config-operator
7+
spec:
8+
ports:
9+
- name: metrics
10+
protocol: TCP
11+
port: 9182
12+
targetPort: 9182

deploy/olm-catalog/windows-machine-config-operator/manifests/windows-machine-config-operator.clusterserviceversion.yaml

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -284,8 +284,6 @@ spec:
284284
- create
285285
- delete
286286
- get
287-
- update
288-
- patch
289287
- apiGroups:
290288
- ""
291289
resources:
@@ -304,16 +302,20 @@ spec:
304302
- create
305303
- get
306304
- update
305+
- apiGroups:
306+
- ""
307+
resources:
308+
- namespaces
309+
verbs:
310+
- get
307311
- apiGroups:
308312
- monitoring.coreos.com
309313
resources:
310314
- servicemonitors
311315
verbs:
312316
- get
313317
- create
314-
- update
315318
- list
316-
- patch
317319
- delete
318320
- apiGroups:
319321
- apps

deploy/role.yaml

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@ rules:
2323
- create
2424
- delete
2525
- get
26-
- update
27-
- patch
2826
- apiGroups:
2927
- ""
3028
resources:
@@ -43,16 +41,20 @@ rules:
4341
- create
4442
- get
4543
- update
44+
- apiGroups:
45+
- ""
46+
resources:
47+
- namespaces
48+
verbs:
49+
- get
4650
- apiGroups:
4751
- monitoring.coreos.com
4852
resources:
4953
- servicemonitors
5054
verbs:
5155
- get
5256
- create
53-
- update
5457
- list
55-
- patch
5658
- delete
5759
# deployment/finalizers permissions needed for the metrics server
5860
- apiGroups:

pkg/metrics/metrics.go

Lines changed: 81 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,20 @@ package metrics
33
import (
44
"context"
55
"encoding/json"
6-
"fmt"
76

87
"github.com/operator-framework/operator-sdk/pkg/k8sutil"
9-
"github.com/operator-framework/operator-sdk/pkg/metrics"
108
"github.com/pkg/errors"
119
monclient "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned/typed/monitoring/v1"
1210
"k8s.io/api/core/v1"
13-
apierrors "k8s.io/apimachinery/pkg/api/errors"
11+
core "k8s.io/api/core/v1"
1412
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1513
"k8s.io/apimachinery/pkg/types"
16-
"k8s.io/apimachinery/pkg/util/intstr"
1714
"k8s.io/client-go/kubernetes"
1815
k8sclient "k8s.io/client-go/kubernetes"
1916
"k8s.io/client-go/rest"
17+
"k8s.io/client-go/tools/record"
2018
logf "sigs.k8s.io/controller-runtime/pkg/log"
19+
"sigs.k8s.io/controller-runtime/pkg/manager"
2120

2221
"github.com/openshift/windows-machine-config-operator/pkg/nodeconfig"
2322
)
@@ -26,8 +25,6 @@ var (
2625
log = logf.Log.WithName("metrics")
2726
// metricsEnabled specifies if metrics are enabled in the current cluster
2827
metricsEnabled = true
29-
// windowsMetricsResource is the name of an object created for Windows metrics
30-
windowsMetricsResource = ""
3128
)
3229

3330
const (
@@ -37,6 +34,9 @@ const (
3734
Host = "0.0.0.0"
3835
// Port is the port number on which windows-exporter is exposed.
3936
Port int32 = 9182
37+
// WindowsMetricsResource is the name for objects created for Prometheus monitoring
38+
// by current operator version. Its name is defined through the bundle manifests
39+
WindowsMetricsResource = "windows-machine-config-operator-metrics"
4040
)
4141

4242
// PrometheusNodeConfig holds the information required to configure Prometheus, so that it can scrape metrics from the
@@ -48,6 +48,18 @@ type PrometheusNodeConfig struct {
4848
namespace string
4949
}
5050

51+
// Config holds the information required to interact with metrics objects
52+
type Config struct {
53+
// a handle that allows us to interact with the Kubernetes API.
54+
*kubernetes.Clientset
55+
// a handle that allows us to interact with the Monitoring API.
56+
*monclient.MonitoringV1Client
57+
// namespace is the namespace in which metrics objects are created
58+
namespace string
59+
// recorder to generate events
60+
Recorder record.EventRecorder
61+
}
62+
5163
// patchEndpoint contains information regarding patching metrics Endpoint
5264
type patchEndpoint struct {
5365
// op defines patch operation to be performed on the Endpoints object
@@ -71,74 +83,21 @@ func NewPrometheusNodeConfig(clientset *kubernetes.Clientset) (*PrometheusNodeCo
7183
}, err
7284
}
7385

74-
// Add will create the Services and Service Monitors that allows the operator to export the metrics by using
75-
// the Prometheus operator
76-
func Add(ctx context.Context, cfg *rest.Config, namespace string) error {
77-
// Add to the below struct any other metrics ports you want to expose.
78-
servicePorts := []v1.ServicePort{
79-
{Port: Port, Name: PortName, Protocol: v1.ProtocolTCP, TargetPort: intstr.IntOrString{Type: intstr.Int, IntVal: Port}},
80-
}
81-
82-
// Create Service object to expose the metrics port(s).
83-
service, err := metrics.CreateMetricsService(ctx, cfg, servicePorts)
84-
if err != nil {
85-
return errors.Wrap(err, "could not create metrics Service")
86+
// NewConfig creates a new instance for Config to be used by the caller.
87+
func NewConfig(mgr manager.Manager, cfg *rest.Config, namespace string) (*Config, error) {
88+
if cfg == nil {
89+
return nil, errors.New("config should not be nil")
8690
}
87-
88-
// the name for the metrics resources is set during creation of metrics service and is equivalent to the service name
89-
windowsMetricsResource = service.GetName()
90-
91-
// Create a monitoring client to interact with the ServiceMonitor object
92-
mclient, err := monclient.NewForConfig(cfg)
93-
if err != nil {
94-
return errors.Wrap(err, "could not create monitoring client")
95-
}
96-
97-
// In the case of an operator restart, a previous SM object will be deleted and a new one will
98-
// be created. We are deleting to ensure that the SM always exists with the correct spec. Otherwise,
99-
// metrics may exhibit unexpected behavior if created by a previous version of WMCO.
100-
err = mclient.ServiceMonitors(namespace).Delete(context.TODO(), windowsMetricsResource, metav1.DeleteOptions{})
101-
if err != nil && !apierrors.IsNotFound(err) {
102-
return errors.Wrap(err, "could not delete existing ServiceMonitor object")
103-
}
104-
105-
// CreateServiceMonitors will automatically create the prometheus-operator ServiceMonitor resources
106-
// necessary to configure Prometheus to scrape metrics from this operator.
107-
services := []*v1.Service{service}
108-
_, err = metrics.CreateServiceMonitors(cfg, namespace, services)
109-
if err != nil {
110-
log.Error(err, "could not create ServiceMonitor object")
111-
// If this operator is deployed to a cluster without the prometheus-operator running, it will return
112-
// ErrServiceMonitorNotPresent, which can be used to safely skip ServiceMonitor creation.
113-
if err == metrics.ErrServiceMonitorNotPresent {
114-
metricsEnabled = false
115-
return errors.Wrap(err, "install prometheus-operator in your cluster to create ServiceMonitor objects")
116-
117-
}
118-
}
119-
120-
// The ServiceMonitor created by the operator-sdk metrics package doesn't have fields required to display
121-
// node graphs for Windows. Update the Service monitor with the required fields.
122-
err = updateServiceMonitors(cfg, namespace)
123-
if err != nil {
124-
return errors.Wrap(err, "error updating service monitor")
125-
}
126-
12791
oclient, err := k8sclient.NewForConfig(cfg)
12892
if err != nil {
129-
return errors.Wrap(err, "could not create config clientset")
93+
return nil, errors.Wrap(err, "error creating config client")
13094
}
131-
// When a selector is present in a headless service i.e. spec.ClusterIP=None, Kubernetes manages the
132-
// list of endpoints reverting all the changes made by the operator. Remove selector from Metrics Service to avoid
133-
// reverting changes in the Endpoints object.
134-
patchData := fmt.Sprintf(`{"spec":{"selector": null }}`)
135-
service, err = oclient.CoreV1().Services(namespace).Patch(ctx, service.Name, types.MergePatchType,
136-
[]byte(patchData), metav1.PatchOptions{})
95+
mclient, err := monclient.NewForConfig(cfg)
13796
if err != nil {
138-
return errors.Wrap(err, "could not remove selector from metrics service")
97+
return nil, errors.Wrap(err, "error creating monitoring client")
13998
}
140-
141-
return nil
99+
return &Config{oclient, mclient, namespace,
100+
mgr.GetEventRecorderFor("metrics")}, nil
142101
}
143102

144103
// syncMetricsEndpoint updates the endpoint object with the new list of IP addresses from the Windows nodes and the
@@ -171,7 +130,7 @@ func (pc *PrometheusNodeConfig) syncMetricsEndpoint(nodeEndpointAdressess []v1.E
171130
}
172131

173132
_, err = pc.k8sclientset.CoreV1().Endpoints(pc.namespace).
174-
Patch(context.TODO(), windowsMetricsResource, types.JSONPatchType, patchDataBytes, metav1.PatchOptions{})
133+
Patch(context.TODO(), WindowsMetricsResource, types.JSONPatchType, patchDataBytes, metav1.PatchOptions{})
175134
return errors.Wrap(err, "unable to sync metrics endpoints")
176135
}
177136

@@ -191,9 +150,9 @@ func (pc *PrometheusNodeConfig) Configure() error {
191150

192151
// get Metrics Endpoints object
193152
endpoints, err := pc.k8sclientset.CoreV1().Endpoints(pc.namespace).Get(context.TODO(),
194-
windowsMetricsResource, metav1.GetOptions{})
153+
WindowsMetricsResource, metav1.GetOptions{})
195154
if err != nil {
196-
return errors.Wrapf(err, "could not get metrics endpoints %v", windowsMetricsResource)
155+
return errors.Wrapf(err, "could not get metrics endpoints %v", WindowsMetricsResource)
197156
}
198157

199158
if !isEndpointsValid(nodes, endpoints) {
@@ -204,7 +163,7 @@ func (pc *PrometheusNodeConfig) Configure() error {
204163
return errors.Wrap(err, "error updating endpoints object with list of endpoint addresses")
205164
}
206165
}
207-
log.Info("Prometheus configured", "endpoints", windowsMetricsResource, "port", Port, "name", PortName)
166+
log.Info("Prometheus configured", "endpoints", WindowsMetricsResource, "port", Port, "name", PortName)
208167
return nil
209168
}
210169

@@ -256,25 +215,60 @@ func isEndpointsValid(nodes *v1.NodeList, endpoints *v1.Endpoints) bool {
256215
return true
257216
}
258217

259-
// updateServiceMonitors patches the metrics Service Monitor to include required fields to display node graphs on the
260-
// OpenShift console. Console graph queries require metrics endpoint target name to be node name, however
261-
// windows_exporter returns node IP. We replace the target name by adding `replace` action field to the ServiceMonitor
262-
// object that replaces node IP to node name as the metrics endpoint target.
263-
func updateServiceMonitors(cfg *rest.Config, namespace string) error {
264-
265-
patchData := fmt.Sprintf("[{\"op\": \"replace\", \"path\": \"/spec/endpoints/0\", "+
266-
"\"value\":{\"path\": \"/%s\",\"port\": \"%s\",\"relabelings\": [{\"action\": \"replace\", \"regex\": \"(.*)\", "+
267-
"\"replacement\": \"$1\", \"sourceLabels\": [\"__meta_kubernetes_endpoint_address_target_name\"],"+
268-
"\"targetLabel\": \"instance\"}]}}]", PortName, PortName)
218+
// Configure takes care of all the required configuration steps
219+
// for Prometheus monitoring like validating monitoring label
220+
// and creating metrics Endpoints object.
221+
func (c *Config) Configure(ctx context.Context) error {
222+
// validate if cluster monitoring is enabled in the operator namespace
223+
if err := c.validate(ctx); err != nil {
224+
log.Error(err, "error validating cluster monitoring label")
225+
return nil
226+
}
227+
// Create Metrics Endpoint object in the operator namespace
228+
if err := c.createEndpoint(); err != nil {
229+
return errors.Wrap(err, "error creating metrics Endpoint")
230+
}
231+
return nil
232+
}
269233

270-
mclient, err := monclient.NewForConfig(cfg)
234+
// validate will verify if cluster monitoring is enabled in the operator namespace.
235+
// If the label is not present, it will log and send warning events to the user.
236+
func (c *Config) validate(ctx context.Context) error {
237+
// validate if metrics label is added to namespace
238+
wmcoNamespace, err := c.CoreV1().Namespaces().Get(ctx, c.namespace, metav1.GetOptions{})
271239
if err != nil {
272-
return errors.Wrap(err, "error creating monitoring client")
240+
return errors.Wrap(err, "error getting operator namespace")
241+
}
242+
if wmcoNamespace.Labels["openshift.io/cluster-monitoring"] != "true" {
243+
metricsEnabled = false
244+
c.Recorder.Eventf(wmcoNamespace, core.EventTypeWarning, "labelValidationFailed",
245+
"Cluster monitoring openshift.io/cluster-monitoring label is not enabled in %s namespace", c.namespace)
246+
return errors.Errorf("monitoring not enabled in %s namespace", c.namespace)
247+
}
248+
return nil
249+
}
250+
251+
// createEndpoint creates an endpoint object in the operator namespace.
252+
// WMCO is no longer creating a service with a selector therefore no Endpoint
253+
// object is created and WMCO needs to create the Endpoint object.
254+
// We cannot create endpoints as a part of manifests deployment as
255+
// Endpoints resources are not currently OLM-supported for bundle creation.
256+
func (c *Config) createEndpoint() error {
257+
// create new Endpoint
258+
newEndpoint := &v1.Endpoints{
259+
TypeMeta: metav1.TypeMeta{
260+
Kind: "Endpoints",
261+
},
262+
ObjectMeta: metav1.ObjectMeta{
263+
Name: WindowsMetricsResource,
264+
Namespace: c.namespace,
265+
Labels: map[string]string{"name": WindowsMetricsResource},
266+
},
267+
Subsets: nil,
273268
}
274-
_, err = mclient.ServiceMonitors(namespace).Patch(context.TODO(), windowsMetricsResource, types.JSONPatchType, []byte(patchData),
275-
metav1.PatchOptions{})
269+
_, err := c.CoreV1().Endpoints(c.namespace).Create(context.TODO(), newEndpoint, metav1.CreateOptions{})
276270
if err != nil {
277-
return errors.Wrap(err, "unable to patch service monitor")
271+
return errors.Wrap(err, "error creating metrics Endpoint")
278272
}
279273
return nil
280274
}

0 commit comments

Comments
 (0)