Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce insightsoperators.openshift.io CR & implement its gather st… #666

Merged
merged 4 commits into from
Aug 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions manifests/03-clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,20 @@ rules:
- clusteroperators
verbs:
- create
- apiGroups:
- "operator.openshift.io"
resources:
- insightsoperators/status
verbs:
- get
- update
- patch
- apiGroups:
- "operator.openshift.io"
resources:
- insightsoperators
verbs:
- get
- apiGroups:
- "config.openshift.io"
resources:
Expand Down
File renamed without changes.
File renamed without changes.
257 changes: 257 additions & 0 deletions manifests/08-operator-crd.yaml

Large diffs are not rendered by default.

13 changes: 13 additions & 0 deletions manifests/09-operator-cr.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: operator.openshift.io/v1
kind: InsightsOperator
metadata:
name: cluster
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You need to annotate this with include.release.openshift.io/... for the cluster profiles it should belong to. See your other manifests for examples. As it stands now:

$ curl -s https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/origin-ci-test/pr-logs/pull/openshift_insights-operator/666/pull-ci-openshift-insights-operator-master-e2e/1562059761355067392/artifacts/e2e/gather-extra/artifacts/pods/openshift-cluster-version_cluster-version-operator-5bff5b8bd6-2d6n7_cluster-version-operator.log | grep kind=InsightsOperator
I0823 13:42:20.564370       1 payload.go:210] excluding 0000_50_insights-operator_09-operator-cr.yaml group=operator.openshift.io kind=InsightsOperator namespace= name=cluster: no annotations
I0823 13:42:21.332159       1 payload.go:210] excluding 0000_50_insights-operator_09-operator-cr.yaml group=operator.openshift.io kind=InsightsOperator namespace= name=cluster: no annotations

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you

annotations:
include.release.openshift.io/ibm-cloud-managed: "true"
include.release.openshift.io/self-managed-high-availability: "true"
include.release.openshift.io/single-node-developer: "true"
capability.openshift.io/name: Insights
spec:
logLevel: Normal
managementState: Managed
operatorLogLevel: Normal
File renamed without changes.
File renamed without changes.
8 changes: 7 additions & 1 deletion pkg/controller/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"time"

configv1client "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1"
operatorv1client "github.com/openshift/client-go/operator/clientset/versioned/typed/operator/v1"
"github.com/openshift/library-go/pkg/controller/controllercmd"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -63,6 +64,11 @@ func (s *Operator) Run(ctx context.Context, controller *controllercmd.Controller
return err
}

operatorClient, err := operatorv1client.NewForConfig(controller.KubeConfig)
if err != nil {
return err
}

gatherProtoKubeConfig, gatherKubeConfig, metricsGatherKubeConfig, alertsGatherKubeConfig := prepareGatherConfigs(
controller.ProtoKubeConfig, controller.KubeConfig, s.Impersonate,
)
Expand Down Expand Up @@ -114,7 +120,7 @@ func (s *Operator) Run(ctx context.Context, controller *controllercmd.Controller
gatherKubeConfig, gatherProtoKubeConfig, metricsGatherKubeConfig, alertsGatherKubeConfig, anonymizer,
configObserver, insightsClient,
)
periodicGather := periodic.New(configObserver, rec, gatherers, anonymizer)
periodicGather := periodic.New(configObserver, rec, gatherers, anonymizer, operatorClient.InsightsOperators())
statusReporter.AddSources(periodicGather.Sources()...)

// check we can read IO container status and we are not in crash loop
Expand Down
128 changes: 115 additions & 13 deletions pkg/controller/periodic/periodic.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,47 @@ import (
"context"
"fmt"
"sort"
"strings"
"time"

utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/klog/v2"

v1 "github.com/openshift/api/operator/v1"
operatorv1client "github.com/openshift/client-go/operator/clientset/versioned/typed/operator/v1"
"github.com/openshift/insights-operator/pkg/anonymization"
"github.com/openshift/insights-operator/pkg/config/configobserver"
"github.com/openshift/insights-operator/pkg/controllerstatus"
"github.com/openshift/insights-operator/pkg/gather"
"github.com/openshift/insights-operator/pkg/gatherers"
"github.com/openshift/insights-operator/pkg/recorder"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

const (
DataGatheredCondition = "DataGathered"
// NoDataGathered is a reason when there is no data gathered - e.g the resource is not in a cluster
NoDataGatheredReason = "NoData"
// Error is a reason when there is some error and no data gathered
GatherErrorReason = "GatherError"
// Panic is a reason when there is some error and no data gathered
GatherPanicReason = "GatherPanic"
// GatheredOK is a reason when data is gathered as expected
GatheredOKReason = "GatheredOK"
// GatheredWithError is a reason when data is gathered partially or with another error message
GatheredWithErrorReason = "GatheredWithError"
)

// Controller periodically runs gatherers, records their results to the recorder
// and flushes the recorder to create archives
type Controller struct {
configurator configobserver.Configurator
recorder recorder.FlushInterface
gatherers []gatherers.Interface
statuses map[string]controllerstatus.StatusController
anonymizer *anonymization.Anonymizer
configurator configobserver.Configurator
recorder recorder.FlushInterface
gatherers []gatherers.Interface
statuses map[string]controllerstatus.StatusController
anonymizer *anonymization.Anonymizer
insightsOperatorCLI operatorv1client.InsightsOperatorInterface
}

// New creates a new instance of Controller which periodically invokes the gatherers
Expand All @@ -35,6 +54,7 @@ func New(
rec recorder.FlushInterface,
listGatherers []gatherers.Interface,
anonymizer *anonymization.Anonymizer,
insightsOperatorCLI operatorv1client.InsightsOperatorInterface,
) *Controller {
statuses := make(map[string]controllerstatus.StatusController)

Expand All @@ -44,11 +64,12 @@ func New(
}

return &Controller{
configurator: configurator,
recorder: rec,
gatherers: listGatherers,
statuses: statuses,
anonymizer: anonymizer,
configurator: configurator,
recorder: rec,
gatherers: listGatherers,
statuses: statuses,
anonymizer: anonymizer,
insightsOperatorCLI: insightsOperatorCLI,
}
}

Expand Down Expand Up @@ -114,7 +135,7 @@ func (c *Controller) Gather() {
}

allFunctionReports := make(map[string]gather.GathererFunctionReport)

gatherTime := metav1.Now()
for _, gatherer := range gatherersToProcess {
func() {
name := gatherer.GetName()
Expand Down Expand Up @@ -142,8 +163,11 @@ func (c *Controller) Gather() {
})
}()
}

err := gather.RecordArchiveMetadata(mapToArray(allFunctionReports), c.recorder, c.anonymizer)
err := c.updateOperatorStatusCR(allFunctionReports, gatherTime)
if err != nil {
klog.Errorf("failed to update the Insights Operator CR status: %v", err)
}
err = gather.RecordArchiveMetadata(mapToArray(allFunctionReports), c.recorder, c.anonymizer)
if err != nil {
klog.Errorf("unable to record archive metadata because of error: %v", err)
}
Expand Down Expand Up @@ -176,6 +200,84 @@ func (c *Controller) periodicTrigger(stopCh <-chan struct{}) {
}
}

// updateOperatorStatusCR gets the 'cluster' insightsoperators.operator.openshift.io resource and updates its status with the last
// gathering details.
func (c *Controller) updateOperatorStatusCR(allFunctionReports map[string]gather.GathererFunctionReport, gatherTime metav1.Time) error {
insightsOperatorCR, err := c.insightsOperatorCLI.Get(context.Background(), "cluster", metav1.GetOptions{})
if err != nil {
return err
}

updatedOperatorCR := insightsOperatorCR.DeepCopy()
updatedOperatorCR.Status.GatherStatus = v1.GatherStatus{
LastGatherTime: gatherTime,
LastGatherDuration: metav1.Duration{
Duration: time.Since(gatherTime.Time),
},
}

for k := range allFunctionReports {
fr := allFunctionReports[k]
// duration = 0 means the gatherer didn't run
if fr.Duration == 0 {
continue
}

gs := createGathererStatus(&fr)
updatedOperatorCR.Status.GatherStatus.Gatherers = append(updatedOperatorCR.Status.GatherStatus.Gatherers, gs)
}

_, err = c.insightsOperatorCLI.UpdateStatus(context.Background(), updatedOperatorCR, metav1.UpdateOptions{})
if err != nil {
return err
}
return nil
}

func createGathererStatus(gfr *gather.GathererFunctionReport) v1.GathererStatus {
gs := v1.GathererStatus{
Name: gfr.FuncName,
LastGatherDuration: metav1.Duration{
// v.Duration is in milliseconds and we need nanoseconds
Duration: time.Duration(gfr.Duration * 1000000),
},
}
con := metav1.Condition{
Type: DataGatheredCondition,
LastTransitionTime: metav1.Now(),
Status: metav1.ConditionFalse,
Reason: NoDataGatheredReason,
}

if gfr.Panic != nil {
con.Reason = GatherPanicReason
con.Message = gfr.Panic.(string)
}

if gfr.RecordsCount > 0 {
con.Status = metav1.ConditionTrue
con.Reason = GatheredOKReason
con.Message = fmt.Sprintf("Created %d records in the archive.", gfr.RecordsCount)

if len(gfr.Errors) > 0 {
con.Reason = GatheredWithErrorReason
con.Message = fmt.Sprintf("%s Error: %s", con.Message, strings.Join(gfr.Errors, ","))
}

gs.Conditions = append(gs.Conditions, con)
return gs
}

if len(gfr.Errors) > 0 {
con.Reason = GatherErrorReason
con.Message = strings.Join(gfr.Errors, ",")
}

gs.Conditions = append(gs.Conditions, con)

return gs
}

func mapToArray(m map[string]gather.GathererFunctionReport) []gather.GathererFunctionReport {
a := make([]gather.GathererFunctionReport, 0, len(m))
for _, v := range m {
Expand Down
Loading