Skip to content

Commit 682fe3f

Browse files
authored
Introduce insightsoperators.openshift.io CR & implement its gather st… (#666)
* Introduce insightsoperators.openshift.io CR & implement its gather status attribute * Update * Fix * Minor update
1 parent 210ef9b commit 682fe3f

File tree

75 files changed

+7583
-15
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+7583
-15
lines changed

manifests/03-clusterrole.yaml

+14
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,20 @@ rules:
5959
- clusteroperators
6060
verbs:
6161
- create
62+
- apiGroups:
63+
- "operator.openshift.io"
64+
resources:
65+
- insightsoperators/status
66+
verbs:
67+
- get
68+
- update
69+
- patch
70+
- apiGroups:
71+
- "operator.openshift.io"
72+
resources:
73+
- insightsoperators
74+
verbs:
75+
- get
6276
- apiGroups:
6377
- "config.openshift.io"
6478
resources:
File renamed without changes.
File renamed without changes.

manifests/08-operator-crd.yaml

+257
Large diffs are not rendered by default.

manifests/09-operator-cr.yaml

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
apiVersion: operator.openshift.io/v1
2+
kind: InsightsOperator
3+
metadata:
4+
name: cluster
5+
annotations:
6+
include.release.openshift.io/ibm-cloud-managed: "true"
7+
include.release.openshift.io/self-managed-high-availability: "true"
8+
include.release.openshift.io/single-node-developer: "true"
9+
capability.openshift.io/name: Insights
10+
spec:
11+
logLevel: Normal
12+
managementState: Managed
13+
operatorLogLevel: Normal
File renamed without changes.
File renamed without changes.

pkg/controller/operator.go

+7-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"time"
88

99
configv1client "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1"
10+
operatorv1client "github.com/openshift/client-go/operator/clientset/versioned/typed/operator/v1"
1011
"github.com/openshift/library-go/pkg/controller/controllercmd"
1112
"k8s.io/apimachinery/pkg/api/errors"
1213
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -63,6 +64,11 @@ func (s *Operator) Run(ctx context.Context, controller *controllercmd.Controller
6364
return err
6465
}
6566

67+
operatorClient, err := operatorv1client.NewForConfig(controller.KubeConfig)
68+
if err != nil {
69+
return err
70+
}
71+
6672
gatherProtoKubeConfig, gatherKubeConfig, metricsGatherKubeConfig, alertsGatherKubeConfig := prepareGatherConfigs(
6773
controller.ProtoKubeConfig, controller.KubeConfig, s.Impersonate,
6874
)
@@ -114,7 +120,7 @@ func (s *Operator) Run(ctx context.Context, controller *controllercmd.Controller
114120
gatherKubeConfig, gatherProtoKubeConfig, metricsGatherKubeConfig, alertsGatherKubeConfig, anonymizer,
115121
configObserver, insightsClient,
116122
)
117-
periodicGather := periodic.New(configObserver, rec, gatherers, anonymizer)
123+
periodicGather := periodic.New(configObserver, rec, gatherers, anonymizer, operatorClient.InsightsOperators())
118124
statusReporter.AddSources(periodicGather.Sources()...)
119125

120126
// check we can read IO container status and we are not in crash loop

pkg/controller/periodic/periodic.go

+115-13
Original file line numberDiff line numberDiff line change
@@ -4,28 +4,47 @@ import (
44
"context"
55
"fmt"
66
"sort"
7+
"strings"
78
"time"
89

910
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
1011
"k8s.io/apimachinery/pkg/util/wait"
1112
"k8s.io/klog/v2"
1213

14+
v1 "github.com/openshift/api/operator/v1"
15+
operatorv1client "github.com/openshift/client-go/operator/clientset/versioned/typed/operator/v1"
1316
"github.com/openshift/insights-operator/pkg/anonymization"
1417
"github.com/openshift/insights-operator/pkg/config/configobserver"
1518
"github.com/openshift/insights-operator/pkg/controllerstatus"
1619
"github.com/openshift/insights-operator/pkg/gather"
1720
"github.com/openshift/insights-operator/pkg/gatherers"
1821
"github.com/openshift/insights-operator/pkg/recorder"
22+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
23+
)
24+
25+
const (
26+
DataGatheredCondition = "DataGathered"
27+
// NoDataGathered is a reason when there is no data gathered - e.g the resource is not in a cluster
28+
NoDataGatheredReason = "NoData"
29+
// Error is a reason when there is some error and no data gathered
30+
GatherErrorReason = "GatherError"
31+
// Panic is a reason when there is some error and no data gathered
32+
GatherPanicReason = "GatherPanic"
33+
// GatheredOK is a reason when data is gathered as expected
34+
GatheredOKReason = "GatheredOK"
35+
// GatheredWithError is a reason when data is gathered partially or with another error message
36+
GatheredWithErrorReason = "GatheredWithError"
1937
)
2038

2139
// Controller periodically runs gatherers, records their results to the recorder
2240
// and flushes the recorder to create archives
2341
type Controller struct {
24-
configurator configobserver.Configurator
25-
recorder recorder.FlushInterface
26-
gatherers []gatherers.Interface
27-
statuses map[string]controllerstatus.StatusController
28-
anonymizer *anonymization.Anonymizer
42+
configurator configobserver.Configurator
43+
recorder recorder.FlushInterface
44+
gatherers []gatherers.Interface
45+
statuses map[string]controllerstatus.StatusController
46+
anonymizer *anonymization.Anonymizer
47+
insightsOperatorCLI operatorv1client.InsightsOperatorInterface
2948
}
3049

3150
// New creates a new instance of Controller which periodically invokes the gatherers
@@ -35,6 +54,7 @@ func New(
3554
rec recorder.FlushInterface,
3655
listGatherers []gatherers.Interface,
3756
anonymizer *anonymization.Anonymizer,
57+
insightsOperatorCLI operatorv1client.InsightsOperatorInterface,
3858
) *Controller {
3959
statuses := make(map[string]controllerstatus.StatusController)
4060

@@ -44,11 +64,12 @@ func New(
4464
}
4565

4666
return &Controller{
47-
configurator: configurator,
48-
recorder: rec,
49-
gatherers: listGatherers,
50-
statuses: statuses,
51-
anonymizer: anonymizer,
67+
configurator: configurator,
68+
recorder: rec,
69+
gatherers: listGatherers,
70+
statuses: statuses,
71+
anonymizer: anonymizer,
72+
insightsOperatorCLI: insightsOperatorCLI,
5273
}
5374
}
5475

@@ -114,7 +135,7 @@ func (c *Controller) Gather() {
114135
}
115136

116137
allFunctionReports := make(map[string]gather.GathererFunctionReport)
117-
138+
gatherTime := metav1.Now()
118139
for _, gatherer := range gatherersToProcess {
119140
func() {
120141
name := gatherer.GetName()
@@ -142,8 +163,11 @@ func (c *Controller) Gather() {
142163
})
143164
}()
144165
}
145-
146-
err := gather.RecordArchiveMetadata(mapToArray(allFunctionReports), c.recorder, c.anonymizer)
166+
err := c.updateOperatorStatusCR(allFunctionReports, gatherTime)
167+
if err != nil {
168+
klog.Errorf("failed to update the Insights Operator CR status: %v", err)
169+
}
170+
err = gather.RecordArchiveMetadata(mapToArray(allFunctionReports), c.recorder, c.anonymizer)
147171
if err != nil {
148172
klog.Errorf("unable to record archive metadata because of error: %v", err)
149173
}
@@ -176,6 +200,84 @@ func (c *Controller) periodicTrigger(stopCh <-chan struct{}) {
176200
}
177201
}
178202

203+
// updateOperatorStatusCR gets the 'cluster' insightsoperators.operator.openshift.io resource and updates its status with the last
204+
// gathering details.
205+
func (c *Controller) updateOperatorStatusCR(allFunctionReports map[string]gather.GathererFunctionReport, gatherTime metav1.Time) error {
206+
insightsOperatorCR, err := c.insightsOperatorCLI.Get(context.Background(), "cluster", metav1.GetOptions{})
207+
if err != nil {
208+
return err
209+
}
210+
211+
updatedOperatorCR := insightsOperatorCR.DeepCopy()
212+
updatedOperatorCR.Status.GatherStatus = v1.GatherStatus{
213+
LastGatherTime: gatherTime,
214+
LastGatherDuration: metav1.Duration{
215+
Duration: time.Since(gatherTime.Time),
216+
},
217+
}
218+
219+
for k := range allFunctionReports {
220+
fr := allFunctionReports[k]
221+
// duration = 0 means the gatherer didn't run
222+
if fr.Duration == 0 {
223+
continue
224+
}
225+
226+
gs := createGathererStatus(&fr)
227+
updatedOperatorCR.Status.GatherStatus.Gatherers = append(updatedOperatorCR.Status.GatherStatus.Gatherers, gs)
228+
}
229+
230+
_, err = c.insightsOperatorCLI.UpdateStatus(context.Background(), updatedOperatorCR, metav1.UpdateOptions{})
231+
if err != nil {
232+
return err
233+
}
234+
return nil
235+
}
236+
237+
func createGathererStatus(gfr *gather.GathererFunctionReport) v1.GathererStatus {
238+
gs := v1.GathererStatus{
239+
Name: gfr.FuncName,
240+
LastGatherDuration: metav1.Duration{
241+
// v.Duration is in milliseconds and we need nanoseconds
242+
Duration: time.Duration(gfr.Duration * 1000000),
243+
},
244+
}
245+
con := metav1.Condition{
246+
Type: DataGatheredCondition,
247+
LastTransitionTime: metav1.Now(),
248+
Status: metav1.ConditionFalse,
249+
Reason: NoDataGatheredReason,
250+
}
251+
252+
if gfr.Panic != nil {
253+
con.Reason = GatherPanicReason
254+
con.Message = gfr.Panic.(string)
255+
}
256+
257+
if gfr.RecordsCount > 0 {
258+
con.Status = metav1.ConditionTrue
259+
con.Reason = GatheredOKReason
260+
con.Message = fmt.Sprintf("Created %d records in the archive.", gfr.RecordsCount)
261+
262+
if len(gfr.Errors) > 0 {
263+
con.Reason = GatheredWithErrorReason
264+
con.Message = fmt.Sprintf("%s Error: %s", con.Message, strings.Join(gfr.Errors, ","))
265+
}
266+
267+
gs.Conditions = append(gs.Conditions, con)
268+
return gs
269+
}
270+
271+
if len(gfr.Errors) > 0 {
272+
con.Reason = GatherErrorReason
273+
con.Message = strings.Join(gfr.Errors, ",")
274+
}
275+
276+
gs.Conditions = append(gs.Conditions, con)
277+
278+
return gs
279+
}
280+
179281
func mapToArray(m map[string]gather.GathererFunctionReport) []gather.GathererFunctionReport {
180282
a := make([]gather.GathererFunctionReport, 0, len(m))
181283
for _, v := range m {

0 commit comments

Comments
 (0)