diff --git a/manifests/08-prometheus_rule.yaml b/manifests/08-prometheus_rule.yaml index 094c96182..955d76dd5 100644 --- a/manifests/08-prometheus_rule.yaml +++ b/manifests/08-prometheus_rule.yaml @@ -25,7 +25,7 @@ spec: description: 'Simple content access (SCA) is not enabled. Once enabled, Insights Operator can automatically import the SCA certificates from Red Hat OpenShift Cluster Manager making it easier to use the content provided by your Red Hat subscriptions when creating container images. See https://docs.openshift.com/container-platform/latest/cicd/builds/running-entitled-builds.html for more information.' summary: Simple content access certificates are not available. expr: | - max_over_time(cluster_operator_conditions{name="insights", condition="SCANotAvailable", reason="NotFound"}[5m]) == 1 + max_over_time(cluster_operator_conditions{name="insights", condition="SCAAvailable", reason="NotFound"}[5m]) == 0 for: 5m labels: severity: info diff --git a/pkg/controller/periodic/periodic.go b/pkg/controller/periodic/periodic.go index 10b1aadd0..9354fe810 100644 --- a/pkg/controller/periodic/periodic.go +++ b/pkg/controller/periodic/periodic.go @@ -24,7 +24,7 @@ type Controller struct { configurator configobserver.Configurator recorder recorder.FlushInterface gatherers []gatherers.Interface - statuses map[string]*controllerstatus.Simple + statuses map[string]controllerstatus.StatusController anonymizer *anonymization.Anonymizer } @@ -36,11 +36,11 @@ func New( listGatherers []gatherers.Interface, anonymizer *anonymization.Anonymizer, ) *Controller { - statuses := make(map[string]*controllerstatus.Simple) + statuses := make(map[string]controllerstatus.StatusController) for _, gatherer := range listGatherers { gathererName := gatherer.GetName() - statuses[gathererName] = &controllerstatus.Simple{Name: fmt.Sprintf("periodic-%s", gathererName)} + statuses[gathererName] = controllerstatus.New(fmt.Sprintf("periodic-%s", gathererName)) } return &Controller{ @@ -52,13 +52,13 @@ func New( } } -func (c *Controller) Sources() []controllerstatus.Interface { +func (c *Controller) Sources() []controllerstatus.StatusController { keys := make([]string, 0, len(c.statuses)) for key := range c.statuses { keys = append(keys, key) } sort.Strings(keys) - sources := make([]controllerstatus.Interface, 0, len(keys)) + sources := make([]controllerstatus.StatusController, 0, len(keys)) for _, key := range keys { sources = append(sources, c.statuses[key]) } diff --git a/pkg/controller/status/conditions.go b/pkg/controller/status/conditions.go index e2c744af7..d7f5904e0 100644 --- a/pkg/controller/status/conditions.go +++ b/pkg/controller/status/conditions.go @@ -12,11 +12,10 @@ const ( InsightsUploadDegraded configv1.ClusterStatusConditionType = "UploadDegraded" // InsightsDownloadDegraded defines the condition type (when set to True) when the Insights report can't be successfully downloaded InsightsDownloadDegraded configv1.ClusterStatusConditionType = "InsightsDownloadDegraded" - // SCANotAvailable is a condition type providing info about unsuccessful SCA pull attempt from the OCM API - SCANotAvailable configv1.ClusterStatusConditionType = "SCANotAvailable" - // ClusterTransferFailed is a condition type providing info about unsuccessful pull attempt of the ClusterTransfer from the OCM API - // or unsuccessful pull-secret update - ClusterTransferFailed configv1.ClusterStatusConditionType = "ClusterTransferFailed" + // ClusterTransferAvailable is a condition type providing info about ClusterTransfer controller status + ClusterTransferAvailable configv1.ClusterStatusConditionType = "ClusterTransferAvailable" + // SCAAvailable is a condition type providing info about SCA controller status + SCAAvailable configv1.ClusterStatusConditionType = "SCAAvailable" ) type conditionsMap map[configv1.ClusterStatusConditionType]configv1.ClusterOperatorStatusCondition @@ -26,7 +25,7 @@ type conditions struct { } func newConditions(cos *configv1.ClusterOperatorStatus, time metav1.Time) *conditions { - entries := map[configv1.ClusterStatusConditionType]configv1.ClusterOperatorStatusCondition{ + entries := map[configv1.ClusterStatusConditionType]configv1.ClusterOperatorStatusCondition{ // nolint: dupl configv1.OperatorAvailable: { Type: configv1.OperatorAvailable, Status: configv1.ConditionUnknown, @@ -45,6 +44,18 @@ func newConditions(cos *configv1.ClusterOperatorStatus, time metav1.Time) *condi LastTransitionTime: time, Reason: "", }, + SCAAvailable: { + Type: SCAAvailable, + Status: configv1.ConditionUnknown, + LastTransitionTime: time, + Reason: "", + }, + ClusterTransferAvailable: { + Type: ClusterTransferAvailable, + Status: configv1.ConditionUnknown, + LastTransitionTime: time, + Reason: "", + }, } for _, c := range cos.Conditions { diff --git a/pkg/controller/status/conditions_test.go b/pkg/controller/status/conditions_test.go index 3ebce0924..2e5d26b29 100644 --- a/pkg/controller/status/conditions_test.go +++ b/pkg/controller/status/conditions_test.go @@ -377,7 +377,7 @@ func Test_conditions_setCondition(t *testing.T) { } } -func Test_newConditions(t *testing.T) { +func Test_newConditions(t *testing.T) { // nolint: funlen time := metav1.Now() type args struct { @@ -396,7 +396,7 @@ func Test_newConditions(t *testing.T) { time: time, }, want: &conditions{ - entryMap: map[configv1.ClusterStatusConditionType]configv1.ClusterOperatorStatusCondition{ + entryMap: map[configv1.ClusterStatusConditionType]configv1.ClusterOperatorStatusCondition{ // nolint: dupl configv1.OperatorAvailable: { Type: configv1.OperatorAvailable, Status: configv1.ConditionUnknown, @@ -415,6 +415,18 @@ func Test_newConditions(t *testing.T) { LastTransitionTime: time, Reason: "", }, + SCAAvailable: { + Type: SCAAvailable, + Status: configv1.ConditionUnknown, + LastTransitionTime: time, + Reason: "", + }, + ClusterTransferAvailable: { + Type: ClusterTransferAvailable, + Status: configv1.ConditionUnknown, + LastTransitionTime: time, + Reason: "", + }, }, }, }, @@ -455,6 +467,18 @@ func Test_newConditions(t *testing.T) { Reason: "degraded reason", Message: "degraded message", }, + SCAAvailable: { + Type: SCAAvailable, + Status: configv1.ConditionUnknown, + LastTransitionTime: time, + Reason: "", + }, + ClusterTransferAvailable: { + Type: ClusterTransferAvailable, + Status: configv1.ConditionUnknown, + LastTransitionTime: time, + Reason: "", + }, }, }, }, diff --git a/pkg/controller/status/controller.go b/pkg/controller/status/controller.go index b686e1122..72090134b 100644 --- a/pkg/controller/status/controller.go +++ b/pkg/controller/status/controller.go @@ -22,15 +22,15 @@ import ( "github.com/openshift/insights-operator/pkg/config/configobserver" "github.com/openshift/insights-operator/pkg/controllerstatus" + "github.com/openshift/insights-operator/pkg/ocm" + "github.com/openshift/insights-operator/pkg/ocm/clustertransfer" + "github.com/openshift/insights-operator/pkg/ocm/sca" ) const ( // How many upload failures in a row we tolerate before starting reporting // as InsightsUploadDegraded uploadFailuresCountThreshold = 5 - // OCMAPIFailureCountThreshold defines how many unsuccessful responses from the OCM API in a row is tolerated - // before the operator is marked as Degraded - OCMAPIFailureCountThreshold = 5 insightsAvailableMessage = "Insights works as expected" ) @@ -50,7 +50,7 @@ type Controller struct { statusCh chan struct{} configurator configobserver.Configurator - sources []controllerstatus.Interface + sources map[string]controllerstatus.StatusController reported Reported start time.Time @@ -67,6 +67,7 @@ func NewController(client configv1client.ConfigV1Interface, configurator configo configurator: configurator, client: client, namespace: namespace, + sources: make(map[string]controllerstatus.StatusController), ctrlStatus: newControllerStatus(), } return c @@ -108,20 +109,29 @@ func (c *Controller) SetLastReportedTime(at time.Time) { // AddSources adds sources in a thread-safe way. // A source is used to monitor parts of the operator. -func (c *Controller) AddSources(sources ...controllerstatus.Interface) { +func (c *Controller) AddSources(sources ...controllerstatus.StatusController) { c.lock.Lock() defer c.lock.Unlock() - c.sources = append(c.sources, sources...) + for i := range sources { + source := sources[i] + c.sources[source.Name()] = source + } } // Sources provides the sources in a thread-safe way. // A source is used to monitor parts of the operator. -func (c *Controller) Sources() []controllerstatus.Interface { +func (c *Controller) Sources() map[string]controllerstatus.StatusController { c.lock.Lock() defer c.lock.Unlock() return c.sources } +func (c *Controller) Source(name string) controllerstatus.StatusController { + c.lock.Lock() + defer c.lock.Unlock() + return c.sources[name] +} + func (c *Controller) merge(clusterOperator *configv1.ClusterOperator) *configv1.ClusterOperator { // prime the object if it does not exist if clusterOperator == nil { @@ -144,7 +154,7 @@ func (c *Controller) merge(clusterOperator *configv1.ClusterOperator) *configv1. // cluster operator conditions cs := newConditions(&clusterOperator.Status, metav1.Time{Time: now}) - updateControllerConditions(cs, c.ctrlStatus, isInitializing, lastTransition) + c.updateControllerConditions(cs, isInitializing, lastTransition) updateControllerConditionsByStatus(cs, c.ctrlStatus, isInitializing) // all status conditions from conditions to cluster operator @@ -172,10 +182,10 @@ func (c *Controller) currentControllerStatus() (allReady bool, lastTransition ti allReady = true - for i, source := range c.Sources() { + for name, source := range c.Sources() { summary, ready := source.CurrentStatus() if !ready { - klog.V(4).Infof("Source %d %T is not ready", i, source) + klog.V(4).Infof("Source %s %T is not ready", name, source) allReady = false continue } @@ -183,7 +193,7 @@ func (c *Controller) currentControllerStatus() (allReady bool, lastTransition ti continue } if len(summary.Message) == 0 { - klog.Errorf("Programmer error: status source %d %T reported an empty message: %#v", i, source, summary) + klog.Errorf("Programmer error: status source %s %T reported an empty message: %#v", name, source, summary) continue } @@ -206,18 +216,16 @@ func (c *Controller) currentControllerStatus() (allReady bool, lastTransition ti // mark as degraded only in case of HTTP 500 and higher if summary.Operation.HTTPStatusCode >= 500 { klog.V(4).Infof("Failed to download the SCA certs within the threshold %d with exponential backoff. Marking as degraded.", - OCMAPIFailureCountThreshold) + ocm.OCMAPIFailureCountThreshold) degradingFailure = true } - c.ctrlStatus.setStatus(SCAPullStatus, summary.Reason, summary.Message) } else if summary.Operation.Name == controllerstatus.PullingClusterTransfer.Name { // mark as degraded only in case of HTTP 500 and higher if summary.Operation.HTTPStatusCode >= 500 { klog.V(4).Infof("Failed to pull the cluster transfer object within the threshold %d with exponential backoff. Marking as degraded.", - OCMAPIFailureCountThreshold) + ocm.OCMAPIFailureCountThreshold) degradingFailure = true } - c.ctrlStatus.setStatus(ClusterTransferStatus, summary.Reason, summary.Message) } if degradingFailure { @@ -317,11 +325,11 @@ func (c *Controller) updateStatus(ctx context.Context, initial bool) error { } // update the cluster controller status conditions -func updateControllerConditions(cs *conditions, ctrlStatus *controllerStatus, +func (c *Controller) updateControllerConditions(cs *conditions, isInitializing bool, lastTransition time.Time) { if isInitializing { // the disabled condition is optional, but set it now if we already know we're disabled - if ds := ctrlStatus.getStatus(DisabledStatus); ds != nil { + if ds := c.ctrlStatus.getStatus(DisabledStatus); ds != nil { cs.setCondition(OperatorDisabled, configv1.ConditionTrue, ds.reason, ds.message, metav1.Now()) } if !cs.hasCondition(configv1.OperatorDegraded) { @@ -331,45 +339,60 @@ func updateControllerConditions(cs *conditions, ctrlStatus *controllerStatus, // once we've initialized set Failing and Disabled as best we know // handle when disabled - if ds := ctrlStatus.getStatus(DisabledStatus); ds != nil { + if ds := c.ctrlStatus.getStatus(DisabledStatus); ds != nil { cs.setCondition(OperatorDisabled, configv1.ConditionTrue, ds.reason, ds.message, metav1.Now()) } else { cs.setCondition(OperatorDisabled, configv1.ConditionFalse, "AsExpected", "", metav1.Now()) } // handle when has errors - if es := ctrlStatus.getStatus(ErrorStatus); es != nil && !ctrlStatus.isDisabled() { + if es := c.ctrlStatus.getStatus(ErrorStatus); es != nil && !c.ctrlStatus.isDisabled() { cs.setCondition(configv1.OperatorDegraded, configv1.ConditionTrue, es.reason, es.message, metav1.Time{Time: lastTransition}) } else { cs.setCondition(configv1.OperatorDegraded, configv1.ConditionFalse, "AsExpected", insightsAvailableMessage, metav1.Now()) } // handle when upload fails - if ur := ctrlStatus.getStatus(UploadStatus); ur != nil && !ctrlStatus.isDisabled() { + if ur := c.ctrlStatus.getStatus(UploadStatus); ur != nil && !c.ctrlStatus.isDisabled() { cs.setCondition(InsightsUploadDegraded, configv1.ConditionTrue, ur.reason, ur.message, metav1.Time{Time: lastTransition}) } else { cs.removeCondition(InsightsUploadDegraded) } // handle when download fails - if ds := ctrlStatus.getStatus(DownloadStatus); ds != nil && !ctrlStatus.isDisabled() { + if ds := c.ctrlStatus.getStatus(DownloadStatus); ds != nil && !c.ctrlStatus.isDisabled() { cs.setCondition(InsightsDownloadDegraded, configv1.ConditionTrue, ds.reason, ds.message, metav1.Time{Time: lastTransition}) } else { cs.removeCondition(InsightsDownloadDegraded) } + c.updateControllerConditionByReason(cs, SCAAvailable, sca.ControllerName, sca.AvailableReason, isInitializing) + c.updateControllerConditionByReason(cs, + ClusterTransferAvailable, + clustertransfer.ControllerName, + clustertransfer.AvailableReason, + isInitializing) +} - // handler when SCA pull from OCM fails - if ss := ctrlStatus.getStatus(SCAPullStatus); ss != nil { - cs.setCondition(SCANotAvailable, configv1.ConditionTrue, ss.reason, ss.message, metav1.Time{Time: lastTransition}) - } else { - cs.removeCondition(SCANotAvailable) +func (c *Controller) updateControllerConditionByReason(cs *conditions, + condition configv1.ClusterStatusConditionType, + controllerName, reason string, + isInitializing bool) { + controller := c.Source(controllerName) + if controller == nil { + return } - - // handler when ClusterTransfer pull from the OCM fails - if ss := ctrlStatus.getStatus(ClusterTransferStatus); ss != nil { - cs.setCondition(ClusterTransferFailed, configv1.ConditionTrue, ss.reason, ss.message, metav1.Time{Time: lastTransition}) + if isInitializing { + return + } + summary, ok := controller.CurrentStatus() + // no summary to read + if !ok { + return + } + if summary.Reason == reason { + cs.setCondition(condition, configv1.ConditionTrue, summary.Reason, summary.Message, metav1.Time{Time: summary.LastTransitionTime}) } else { - cs.removeCondition(ClusterTransferFailed) + cs.setCondition(condition, configv1.ConditionFalse, summary.Reason, summary.Message, metav1.Time{Time: summary.LastTransitionTime}) } } diff --git a/pkg/controller/status/status.go b/pkg/controller/status/status.go index faf842579..17308fb50 100644 --- a/pkg/controller/status/status.go +++ b/pkg/controller/status/status.go @@ -1,12 +1,10 @@ package status const ( - DisabledStatus = "disabled" - UploadStatus = "upload" - DownloadStatus = "download" - ErrorStatus = "error" - SCAPullStatus = "scaPullStatus" - ClusterTransferStatus = "clusterTransferStatus" + DisabledStatus = "disabled" + UploadStatus = "upload" + DownloadStatus = "download" + ErrorStatus = "error" ) type controllerStatus struct { diff --git a/pkg/controllerstatus/controllerstatus.go b/pkg/controllerstatus/controllerstatus.go index 2207bca5c..bc6556448 100644 --- a/pkg/controllerstatus/controllerstatus.go +++ b/pkg/controllerstatus/controllerstatus.go @@ -7,8 +7,10 @@ import ( "k8s.io/klog/v2" ) -type Interface interface { +type StatusController interface { CurrentStatus() (summary Summary, ready bool) + UpdateStatus(summary Summary) + Name() string } type Operation struct { @@ -43,12 +45,18 @@ type Summary struct { // Simple represents the status of a given part of the operator type Simple struct { - Name string + name string lock sync.Mutex summary Summary } +func New(name string) StatusController { + return &Simple{ + name: name, + } +} + // UpdateStatus updates the status, keeps track how long a status have been in effect func (s *Simple) UpdateStatus(summary Summary) { //nolint: gocritic s.lock.Lock() @@ -59,22 +67,20 @@ func (s *Simple) UpdateStatus(summary Summary) { //nolint: gocritic } if s.summary.Healthy != summary.Healthy { - klog.V(2).Infof("name=%s healthy=%t reason=%s message=%s", s.Name, summary.Healthy, summary.Reason, summary.Message) + klog.V(2).Infof("name=%s healthy=%t reason=%s message=%s", s.name, summary.Healthy, summary.Reason, summary.Message) s.summary = summary s.summary.Count = 1 + s.summary.LastTransitionTime = summary.LastTransitionTime return } s.summary.Count++ - if summary.Healthy { - return - } if s.summary.Message != summary.Message || s.summary.Reason != summary.Reason { - klog.V(2).Infof("name=%s healthy=%t reason=%s message=%s", s.Name, summary.Healthy, summary.Reason, summary.Message) + klog.V(2).Infof("name=%s healthy=%t reason=%s message=%s", s.name, summary.Healthy, summary.Reason, summary.Message) s.summary.Reason = summary.Reason s.summary.Message = summary.Message s.summary.Operation = summary.Operation - return + s.summary.LastTransitionTime = summary.LastTransitionTime } } @@ -87,3 +93,7 @@ func (s *Simple) CurrentStatus() (Summary, bool) { } return s.summary, true } + +func (s *Simple) Name() string { + return s.name +} diff --git a/pkg/controllerstatus/controllerstatus_test.go b/pkg/controllerstatus/controllerstatus_test.go new file mode 100644 index 000000000..4871f81c2 --- /dev/null +++ b/pkg/controllerstatus/controllerstatus_test.go @@ -0,0 +1,126 @@ +package controllerstatus + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func Test_StatusController_Basic(t *testing.T) { + testCtrl := New("test-controller") + assert.Equal(t, "test-controller", testCtrl.Name()) + summary, ready := testCtrl.CurrentStatus() + assert.False(t, ready, "controller should not be ready") + assert.False(t, summary.Healthy, "controller should not be healthy") + assert.Equal(t, 0, summary.Count) + assert.True(t, summary.LastTransitionTime.IsZero()) + + updatedSummary := Summary{ + Operation: Operation{Name: "testing", HTTPStatusCode: 200}, + Healthy: true, + LastTransitionTime: time.Now(), + Reason: "UpdatedOK1", + Message: "testing status controller", + } + + testCtrl.UpdateStatus(updatedSummary) + firstUpdatedSummary, ready := testCtrl.CurrentStatus() + assert.True(t, ready, "controller should be ready after status updated") + assert.True(t, firstUpdatedSummary.Healthy, "controller should be healthy after status updated") + assert.Equal(t, 1, firstUpdatedSummary.Count) + assert.Equal(t, OperationName("testing"), firstUpdatedSummary.Operation.Name) + assert.False(t, firstUpdatedSummary.LastTransitionTime.IsZero()) + + updatedSummary2 := Summary{ + Operation: Operation{Name: "testing2", HTTPStatusCode: 200}, + Healthy: true, + LastTransitionTime: time.Now(), + Reason: "UpdatedOK1", + Message: "testing status controller", + } + time.Sleep(10 * time.Millisecond) + testCtrl.UpdateStatus(updatedSummary2) + secondUpdatedSummary, ready := testCtrl.CurrentStatus() + assert.True(t, ready, "controller should be ready after status updated") + assert.True(t, secondUpdatedSummary.Healthy, "controller should be healthy after status updated") + assert.Equal(t, 2, secondUpdatedSummary.Count) + // LastTransition time should not change and same for the operation name + assert.Equal(t, OperationName("testing"), secondUpdatedSummary.Operation.Name) + assert.Equal(t, firstUpdatedSummary.LastTransitionTime, secondUpdatedSummary.LastTransitionTime) +} + +func Test_StatusController_ChangingStatus(t *testing.T) { + tests := []struct { + name string + ctrl StatusController + firstStatus Summary + secondStatus Summary + expectedStatus Summary + }{ + { + name: "From healthy status to unhealthy", + ctrl: New("test-controller-2"), + firstStatus: Summary{ + Operation: Operation{Name: "testing", HTTPStatusCode: 200}, + Healthy: true, + LastTransitionTime: time.Now(), + Reason: "UpdatedOK1", + Message: "testing status", + }, + secondStatus: Summary{ + Operation: Operation{Name: "testing2", HTTPStatusCode: 403}, + Healthy: false, + LastTransitionTime: time.Now(), + Reason: "Failed", + Message: "Something went wrong", + }, + expectedStatus: Summary{ + Operation: Operation{Name: "testing2", HTTPStatusCode: 403}, + Healthy: false, + Reason: "Failed", + Message: "Something went wrong", + Count: 1, + }, + }, + { + name: "From healthy status to healthy but different reason", + ctrl: New("test-controller-3"), + firstStatus: Summary{ + Operation: Operation{Name: "testing1", HTTPStatusCode: 200}, + Healthy: true, + LastTransitionTime: time.Now(), + Reason: "UpdatedOK1", + Message: "testing status", + }, + secondStatus: Summary{ + Operation: Operation{Name: "testing2", HTTPStatusCode: 202}, + Healthy: true, + LastTransitionTime: time.Now(), + Reason: "UpdatedOK2", + Message: "testing status2", + }, + expectedStatus: Summary{ + Operation: Operation{Name: "testing2", HTTPStatusCode: 202}, + Healthy: true, + Reason: "UpdatedOK2", + Message: "testing status2", + Count: 1, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.ctrl.UpdateStatus(tt.firstStatus) + tt.ctrl.UpdateStatus(tt.secondStatus) + currentStatus, _ := tt.ctrl.CurrentStatus() + assert.Equal(t, tt.expectedStatus.Operation, currentStatus.Operation) + assert.Equal(t, tt.expectedStatus.Healthy, currentStatus.Healthy) + assert.Equal(t, tt.expectedStatus.Reason, currentStatus.Reason) + assert.Equal(t, tt.expectedStatus.Message, currentStatus.Message) + assert.Equal(t, tt.secondStatus.LastTransitionTime, currentStatus.LastTransitionTime) + assert.True(t, tt.firstStatus.LastTransitionTime.Before(currentStatus.LastTransitionTime)) + }) + } +} diff --git a/pkg/insights/insightsreport/insightsreport.go b/pkg/insights/insightsreport/insightsreport.go index 9adda81ee..b8917f044 100644 --- a/pkg/insights/insightsreport/insightsreport.go +++ b/pkg/insights/insightsreport/insightsreport.go @@ -20,7 +20,7 @@ import ( // Controller gathers the report from Smart Proxy type Controller struct { - controllerstatus.Simple + controllerstatus.StatusController configurator configobserver.Configurator client *insightsclient.Client @@ -76,7 +76,7 @@ func init() { // New initializes and returns a Gatherer func New(client *insightsclient.Client, configurator configobserver.Configurator, reporter InsightsReporter) *Controller { return &Controller{ - Simple: controllerstatus.Simple{Name: "insightsreport"}, + StatusController: controllerstatus.New("insightsreport"), configurator: configurator, client: client, archiveUploadReporter: reporter.ArchiveUploaded(), @@ -100,7 +100,7 @@ func (c *Controller) PullSmartProxy() (bool, error) { klog.V(4).Info("Retrieving report") resp, err := c.client.RecvReport(ctx, reportEndpoint) if authorizer.IsAuthorizationError(err) { - c.Simple.UpdateStatus(controllerstatus.Summary{ + c.StatusController.UpdateStatus(controllerstatus.Summary{ Operation: controllerstatus.DownloadingReport, Reason: "NotAuthorized", Message: fmt.Sprintf("Auth rejected for downloading latest report: %v", err), @@ -119,7 +119,7 @@ func (c *Controller) PullSmartProxy() (bool, error) { return true, ie } else if err != nil { klog.Errorf("Unexpected error retrieving the report: %s", err) - c.Simple.UpdateStatus(controllerstatus.Summary{ + c.StatusController.UpdateStatus(controllerstatus.Summary{ Operation: controllerstatus.DownloadingReport, Reason: "UnexpectedError", Message: fmt.Sprintf("Failed to download the latest report: %v", err), @@ -151,7 +151,7 @@ func (c *Controller) PullSmartProxy() (bool, error) { // we want to increment the metric only in case of download of a new report c.client.IncrementRecvReportMetric(resp.StatusCode) c.LastReport = reportResponse.Report - c.Simple.UpdateStatus(controllerstatus.Summary{Healthy: true}) + c.StatusController.UpdateStatus(controllerstatus.Summary{Healthy: true}) return true, nil } @@ -193,7 +193,7 @@ func (c *Controller) RetrieveReport() { firstPullDone = true if retryCounter >= retryThreshold { - c.Simple.UpdateStatus(controllerstatus.Summary{ + c.StatusController.UpdateStatus(controllerstatus.Summary{ Operation: controllerstatus.DownloadingReport, Reason: "NotAvailable", Message: fmt.Sprintf("Couldn't download the latest report: %v", err), @@ -242,7 +242,7 @@ func (c *Controller) RetrieveReport() { // Run goroutine code for gathering the reports from Smart Proxy func (c *Controller) Run(ctx context.Context) { - c.Simple.UpdateStatus(controllerstatus.Summary{Healthy: true}) + c.StatusController.UpdateStatus(controllerstatus.Summary{Healthy: true}) klog.V(2).Info("Starting report retriever") klog.V(2).Infof("Initial config: %v", c.configurator.Config()) diff --git a/pkg/insights/insightsuploader/insightsuploader.go b/pkg/insights/insightsuploader/insightsuploader.go index 8566d9cbe..8bd05a683 100644 --- a/pkg/insights/insightsuploader/insightsuploader.go +++ b/pkg/insights/insightsuploader/insightsuploader.go @@ -31,7 +31,7 @@ type StatusReporter interface { } type Controller struct { - controllerstatus.Simple + controllerstatus.StatusController summarizer Summarizer client *insightsclient.Client @@ -43,7 +43,7 @@ type Controller struct { func New(summarizer Summarizer, client *insightsclient.Client, configurator configobserver.Configurator, statusReporter StatusReporter, initialDelay time.Duration) *Controller { return &Controller{ - Simple: controllerstatus.Simple{Name: "insightsuploader"}, + StatusController: controllerstatus.New("insightsuploader"), summarizer: summarizer, configurator: configurator, @@ -55,7 +55,7 @@ func New(summarizer Summarizer, client *insightsclient.Client, configurator conf } func (c *Controller) Run(ctx context.Context) { - c.Simple.UpdateStatus(controllerstatus.Summary{Healthy: true}) + c.StatusController.UpdateStatus(controllerstatus.Summary{Healthy: true}) if c.client == nil { klog.Infof("No reporting possible without a configured client") @@ -107,7 +107,7 @@ func (c *Controller) Run(ctx context.Context) { source, ok, err := c.summarizer.Summary(ctx, lastReported) if err != nil { - c.Simple.UpdateStatus(controllerstatus.Summary{Reason: "SummaryFailed", Message: fmt.Sprintf("Unable to retrieve local insights data: %v", err)}) + c.StatusController.UpdateStatus(controllerstatus.Summary{Reason: "SummaryFailed", Message: fmt.Sprintf("Unable to retrieve local insights data: %v", err)}) return } if !ok { @@ -129,14 +129,14 @@ func (c *Controller) Run(ctx context.Context) { return } if authorizer.IsAuthorizationError(err) { - c.Simple.UpdateStatus(controllerstatus.Summary{Operation: controllerstatus.Uploading, + c.StatusController.UpdateStatus(controllerstatus.Summary{Operation: controllerstatus.Uploading, Reason: "NotAuthorized", Message: fmt.Sprintf("Reporting was not allowed: %v", err)}) c.initialDelay = wait.Jitter(interval/2, 2) return } c.initialDelay = wait.Jitter(interval/8, 1.2) - c.Simple.UpdateStatus(controllerstatus.Summary{Operation: controllerstatus.Uploading, + c.StatusController.UpdateStatus(controllerstatus.Summary{Operation: controllerstatus.Uploading, Reason: "UploadFailed", Message: fmt.Sprintf("Unable to report: %v", err)}) return } @@ -146,7 +146,7 @@ func (c *Controller) Run(ctx context.Context) { default: } lastReported = start.UTC() - c.Simple.UpdateStatus(controllerstatus.Summary{Healthy: true}) + c.StatusController.UpdateStatus(controllerstatus.Summary{Healthy: true}) } else { klog.V(4).Info("Display report that would be sent") // display what would have been sent (to ensure we always exercise source processing) diff --git a/pkg/ocm/clustertransfer/cluster_transfer.go b/pkg/ocm/clustertransfer/cluster_transfer.go index 90d7a7d84..7712a16e1 100644 --- a/pkg/ocm/clustertransfer/cluster_transfer.go +++ b/pkg/ocm/clustertransfer/cluster_transfer.go @@ -10,9 +10,9 @@ import ( jsonpatch "github.com/evanphx/json-patch" "github.com/openshift/insights-operator/pkg/config/configobserver" - "github.com/openshift/insights-operator/pkg/controller/status" "github.com/openshift/insights-operator/pkg/controllerstatus" "github.com/openshift/insights-operator/pkg/insights/insightsclient" + "github.com/openshift/insights-operator/pkg/ocm" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" @@ -20,8 +20,13 @@ import ( "k8s.io/klog/v2" ) +const ( + ControllerName = "clusterTransferController" + AvailableReason = "PullSecretUpdated" +) + type Controller struct { - controllerstatus.Simple + controllerstatus.StatusController coreClient corev1client.CoreV1Interface ctx context.Context configurator configobserver.Configurator @@ -39,11 +44,11 @@ func New(ctx context.Context, configurator configobserver.Configurator, insightsClient clusterTransferClient) *Controller { return &Controller{ - Simple: controllerstatus.Simple{Name: "clusterTransferController"}, - coreClient: coreClient, - ctx: ctx, - configurator: configurator, - client: insightsClient, + StatusController: controllerstatus.New(ControllerName), + coreClient: coreClient, + ctx: ctx, + configurator: configurator, + client: insightsClient, } } @@ -90,7 +95,7 @@ func (c *Controller) requestDataAndUpdateSecret(endpoint string) { // there's no cluster transfer for the cluster - HTTP 204 if len(data) == 0 { klog.Info("no available accepted cluster transfer") - c.updateStatus(true, "no cluster transfer", "NoData", nil) + c.updateStatus(true, "no available cluster transfer", "NoClusterTransfer", nil) return } // deserialize the data from the OCM API @@ -144,7 +149,7 @@ func (c *Controller) checkCTListAndOptionallyUpdatePS(ctList *clusterTransferLis return } statusMsg = "pull-secret successfully updated" - reason = "PullSecretUpdated" + reason = AvailableReason klog.Info(statusMsg) } else { statusMsg = "no new data received" @@ -207,7 +212,7 @@ func (c *Controller) requestClusterTransferWithExponentialBackoff(endpoint strin Duration: c.configurator.Config().OCMConfig.ClusterTransferInterval / 48, // 30 min as the first waiting Factor: 2, Jitter: 0, - Steps: status.OCMAPIFailureCountThreshold, + Steps: ocm.OCMAPIFailureCountThreshold, Cap: c.configurator.Config().OCMConfig.ClusterTransferInterval, } @@ -243,9 +248,10 @@ func (c *Controller) updateStatus(healthy bool, msg, reason string, httpErr *ins Operation: controllerstatus.Operation{ Name: controllerstatus.PullingClusterTransfer.Name, }, - Healthy: healthy, - Reason: reason, - Message: msg, + Healthy: healthy, + Reason: reason, + Message: msg, + LastTransitionTime: time.Now(), } if httpErr != nil { newSummary.Operation.HTTPStatusCode = httpErr.StatusCode diff --git a/pkg/ocm/clustertransfer/cluster_transfer_test.go b/pkg/ocm/clustertransfer/cluster_transfer_test.go index 881410780..4dad5d09d 100644 --- a/pkg/ocm/clustertransfer/cluster_transfer_test.go +++ b/pkg/ocm/clustertransfer/cluster_transfer_test.go @@ -141,7 +141,12 @@ func Test_ClusterTransfer_RequestDataAndUpdateSecret(t *testing.T) { ctController.requestDataAndUpdateSecret(mockConfig.Conf.OCMConfig.ClusterTransferEndpoint) summary, ok := ctController.CurrentStatus() assert.True(t, ok, "unexpected summary") - assert.EqualValues(t, tt.expectedSummary, summary) + assert.Equal(t, tt.expectedSummary.Operation, summary.Operation) + assert.Equal(t, tt.expectedSummary.Healthy, summary.Healthy) + assert.Equal(t, tt.expectedSummary.Count, summary.Count) + assert.Equal(t, tt.expectedSummary.Reason, summary.Reason) + assert.Equal(t, tt.expectedSummary.Message, summary.Message) + assert.True(t, tt.expectedSummary.LastTransitionTime.Before(summary.LastTransitionTime)) // check pull-secret value expectedPSData, err := loadDataFromFile(tt.updatedPullSecretDataFilePath) diff --git a/pkg/ocm/const.go b/pkg/ocm/const.go new file mode 100644 index 000000000..7a1ba2841 --- /dev/null +++ b/pkg/ocm/const.go @@ -0,0 +1,7 @@ +package ocm + +const ( + // OCMAPIFailureCountThreshold defines how many unsuccessful responses from the OCM API in a row is tolerated + // before the operator is marked as Degraded + OCMAPIFailureCountThreshold = 5 +) diff --git a/pkg/ocm/sca/sca.go b/pkg/ocm/sca/sca.go index a2d7ab337..b991b7d70 100644 --- a/pkg/ocm/sca/sca.go +++ b/pkg/ocm/sca/sca.go @@ -16,9 +16,9 @@ import ( "k8s.io/klog/v2" "github.com/openshift/insights-operator/pkg/config/configobserver" - "github.com/openshift/insights-operator/pkg/controller/status" "github.com/openshift/insights-operator/pkg/controllerstatus" "github.com/openshift/insights-operator/pkg/insights/insightsclient" + "github.com/openshift/insights-operator/pkg/ocm" ) const ( @@ -26,11 +26,13 @@ const ( secretName = "etc-pki-entitlement" //nolint: gosec entitlementAttrName = "entitlement.pem" entitlementKeyAttrName = "entitlement-key.pem" + ControllerName = "scaController" + AvailableReason = "Updated" ) // Controller holds all the required resources to be able to communicate with OCM API type Controller struct { - controllerstatus.Simple + controllerstatus.StatusController coreClient corev1client.CoreV1Interface ctx context.Context configurator configobserver.Configurator @@ -49,11 +51,11 @@ type Response struct { func New(ctx context.Context, coreClient corev1client.CoreV1Interface, configurator configobserver.Configurator, insightsClient *insightsclient.Client) *Controller { return &Controller{ - Simple: controllerstatus.Simple{Name: "scaController"}, - coreClient: coreClient, - ctx: ctx, - configurator: configurator, - client: insightsClient, + StatusController: controllerstatus.New(ControllerName), + coreClient: coreClient, + ctx: ctx, + configurator: configurator, + client: insightsClient, } } @@ -76,10 +78,12 @@ func (c *Controller) Run() { } else { msg := "Pulling of the SCA certs from the OCM API is disabled" klog.Warning(msg) - c.Simple.UpdateStatus(controllerstatus.Summary{ - Operation: controllerstatus.PullingSCACerts, - Healthy: true, - Message: msg, + c.StatusController.UpdateStatus(controllerstatus.Summary{ + Operation: controllerstatus.PullingSCACerts, + Healthy: true, + Message: msg, + Reason: "Disabled", + LastTransitionTime: time.Now(), }) } case <-configCh: @@ -99,20 +103,24 @@ func (c *Controller) requestDataAndCheckSecret(endpoint string) { httpErr, ok := err.(insightsclient.HttpError) errMsg := fmt.Sprintf("Failed to pull SCA certs from %s: %v", endpoint, err) if ok { - c.Simple.UpdateStatus(controllerstatus.Summary{ + c.StatusController.UpdateStatus(controllerstatus.Summary{ Operation: controllerstatus.Operation{ Name: controllerstatus.PullingSCACerts.Name, HTTPStatusCode: httpErr.StatusCode, }, - Reason: strings.ReplaceAll(http.StatusText(httpErr.StatusCode), " ", ""), - Message: errMsg, + Reason: strings.ReplaceAll(http.StatusText(httpErr.StatusCode), " ", ""), + Message: errMsg, + LastTransitionTime: time.Now(), }) return } klog.Warningf(errMsg) - c.Simple.UpdateStatus(controllerstatus.Summary{ - Operation: controllerstatus.PullingSCACerts, - Healthy: true, + c.StatusController.UpdateStatus(controllerstatus.Summary{ + Operation: controllerstatus.PullingSCACerts, + Healthy: true, + Reason: "NonHTTPError", + Message: errMsg, + LastTransitionTime: time.Now(), }) return } @@ -131,10 +139,12 @@ func (c *Controller) requestDataAndCheckSecret(endpoint string) { return } klog.Infof("%s secret successfully updated", secretName) - c.Simple.UpdateStatus(controllerstatus.Summary{ - Operation: controllerstatus.PullingSCACerts, - Message: fmt.Sprintf("SCA certs successfully updated in the %s secret", secretName), - Healthy: true, + c.StatusController.UpdateStatus(controllerstatus.Summary{ + Operation: controllerstatus.PullingSCACerts, + Message: fmt.Sprintf("SCA certs successfully updated in the %s secret", secretName), + Healthy: true, + LastTransitionTime: time.Now(), + Reason: AvailableReason, }) } @@ -203,7 +213,7 @@ func (c *Controller) requestSCAWithExpBackoff(endpoint string) ([]byte, error) { Duration: c.configurator.Config().OCMConfig.SCAInterval / 32, // 15 min by default Factor: 2, Jitter: 0, - Steps: status.OCMAPIFailureCountThreshold, + Steps: ocm.OCMAPIFailureCountThreshold, Cap: c.configurator.Config().OCMConfig.SCAInterval, } var data []byte