Skip to content

Commit 352c934

Browse files
authored
update registration of the metrics to work in techpreview too (openshift#808)
* update registration of the metrics to work in techpreview too * updates after review * forgotten zeroes...
1 parent fce828f commit 352c934

File tree

9 files changed

+93
-61
lines changed

9 files changed

+93
-61
lines changed

pkg/controller/gather_commands.go

+4-3
Original file line numberDiff line numberDiff line change
@@ -223,10 +223,11 @@ func (d *GatherJob) GatherAndUpload(kubeConfig, protoKubeConfig *rest.Config) er
223223
conditions = append(conditions, status.DataRecordedCondition(metav1.ConditionTrue, "AsExpected", ""))
224224

225225
// upload data
226-
insightsRequestID, err := uploader.Upload(ctx, lastArchive)
226+
insightsRequestID, statusCode, err := uploader.Upload(ctx, lastArchive)
227+
reason := fmt.Sprintf("HttpStatus%d", statusCode)
227228
if err != nil {
228229
klog.Error(err)
229-
conditions = append(conditions, status.DataUploadedCondition(metav1.ConditionFalse, "UploadFailed",
230+
conditions = append(conditions, status.DataUploadedCondition(metav1.ConditionFalse, reason,
230231
fmt.Sprintf("Failed to upload data: %v", err)))
231232
_, updateErr := updateDataGatherStatus(ctx, *insightClient, dataGatherCR, insightsv1alpha1.Failed, conditions)
232233
if updateErr != nil {
@@ -237,7 +238,7 @@ func (d *GatherJob) GatherAndUpload(kubeConfig, protoKubeConfig *rest.Config) er
237238
klog.Infof("Insights archive successfully uploaded with InsightsRequestID: %s", insightsRequestID)
238239

239240
dataGatherCR.Status.InsightsRequestID = insightsRequestID
240-
conditions = append(conditions, status.DataUploadedCondition(metav1.ConditionTrue, "AsExpected", ""))
241+
conditions = append(conditions, status.DataUploadedCondition(metav1.ConditionTrue, reason, ""))
241242

242243
_, err = updateDataGatherStatus(ctx, *insightClient, dataGatherCR, insightsv1alpha1.Completed, conditions)
243244
if err != nil {

pkg/controller/operator.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ func (s *Operator) Run(ctx context.Context, controller *controllercmd.Controller
180180
return err
181181
}
182182

183-
insightsClient := insightsclient.New(nil, 0, "default", authorizer, gatherConfigClient)
183+
insightsClient := insightsclient.New(nil, 0, "insights", authorizer, gatherConfigClient)
184184

185185
var periodicGather *periodic.Controller
186186
// the gatherers are periodically called to collect the data from the cluster
@@ -198,6 +198,7 @@ func (s *Operator) Run(ctx context.Context, controller *controllercmd.Controller
198198
periodicGather = periodic.NewWithTechPreview(reportRetriever, secretConfigObserver,
199199
insightsDataGatherObserver, gatherers, kubeClient, insightClient, operatorClient.InsightsOperators())
200200
statusReporter.AddSources(periodicGather.Sources()...)
201+
statusReporter.AddSources(reportRetriever)
201202
go periodicGather.PeriodicPrune(ctx)
202203
}
203204

pkg/controller/periodic/periodic.go

+39-7
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import (
44
"context"
55
"fmt"
66
"sort"
7+
"strconv"
8+
"strings"
79
"time"
810

911
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
@@ -22,6 +24,7 @@ import (
2224
"github.com/openshift/insights-operator/pkg/controllerstatus"
2325
"github.com/openshift/insights-operator/pkg/gather"
2426
"github.com/openshift/insights-operator/pkg/gatherers"
27+
"github.com/openshift/insights-operator/pkg/insights"
2528
"github.com/openshift/insights-operator/pkg/insights/insightsreport"
2629
"github.com/openshift/insights-operator/pkg/recorder"
2730
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -298,6 +301,7 @@ func (c *Controller) GatherJob() {
298301
return
299302
}
300303
dataGatheredOK := c.wasDataGatherSuccessful(dataGatherFinished)
304+
updateMetrics(dataGatherFinished)
301305
if !dataGatheredOK {
302306
klog.Errorf("Last data gathering %v was not successful", dataGatherFinished.Name)
303307
return
@@ -507,13 +511,7 @@ func (c *Controller) createDataGatherAttributeValues() ([]string, insightsv1alph
507511
// wasDataGatherSuccessful reads status conditions of the provided "dataGather" "datagather.insights.openshift.io"
508512
// custom resource and checks whether the data was successfully uploaded or not and updates status accordingly
509513
func (c *Controller) wasDataGatherSuccessful(dataGather *insightsv1alpha1.DataGather) bool {
510-
var dataUploadedCon *metav1.Condition
511-
for i := range dataGather.Status.Conditions {
512-
con := dataGather.Status.Conditions[i]
513-
if con.Type == status.DataUploaded {
514-
dataUploadedCon = &con
515-
}
516-
}
514+
dataUploadedCon := getConditionByStatus(dataGather, status.DataUploaded)
517515
statusSummary := controllerstatus.Summary{
518516
Operation: controllerstatus.Uploading,
519517
Healthy: true,
@@ -530,10 +528,44 @@ func (c *Controller) wasDataGatherSuccessful(dataGather *insightsv1alpha1.DataGa
530528
statusSummary.Reason = dataUploadedCon.Reason
531529
statusSummary.Message = dataUploadedCon.Message
532530
}
531+
533532
c.statuses["insightsuploader"].UpdateStatus(statusSummary)
534533
return statusSummary.Healthy
535534
}
536535

536+
// getConditionByStatus tries to get the condition with the provided condition status
537+
// from the provided "datagather" resource. Returns nil when no condition is found.
538+
func getConditionByStatus(dataGather *insightsv1alpha1.DataGather, conStatus string) *metav1.Condition {
539+
var dataUploadedCon *metav1.Condition
540+
for i := range dataGather.Status.Conditions {
541+
con := dataGather.Status.Conditions[i]
542+
if con.Type == conStatus {
543+
dataUploadedCon = &con
544+
}
545+
}
546+
return dataUploadedCon
547+
}
548+
549+
// updateMetrics reads the HTTP status code from the reason of the "DataUploaded" condition
550+
// from the provided "datagather" resource and increments
551+
// the "insightsclient_request_send_total" Prometheus metric accordingly.
552+
func updateMetrics(dataGather *insightsv1alpha1.DataGather) {
553+
dataUploadedCon := getConditionByStatus(dataGather, status.DataUploaded)
554+
var statusCode int
555+
if dataUploadedCon == nil {
556+
statusCode = 0
557+
} else {
558+
statusCodeStr, _ := strings.CutPrefix(dataUploadedCon.Reason, "HttpStatus")
559+
var err error
560+
statusCode, err = strconv.Atoi(statusCodeStr)
561+
if err != nil {
562+
klog.Error("failed to update the Prometheus metrics: %v", err)
563+
return
564+
}
565+
}
566+
insights.IncrementCounterRequestSend(statusCode)
567+
}
568+
537569
func mapToArray(m map[string]gather.GathererFunctionReport) []gather.GathererFunctionReport {
538570
a := make([]gather.GathererFunctionReport, 0, len(m))
539571
for _, v := range m {

pkg/insights/insightsclient/insightsclient.go

+1-5
Original file line numberDiff line numberDiff line change
@@ -248,16 +248,12 @@ func (c *Client) createAndWriteMIMEHeader(source *Source, mw *multipart.Writer,
248248
}
249249

250250
var (
251-
counterRequestSend = metrics.NewCounterVec(&metrics.CounterOpts{
252-
Name: "insightsclient_request_send_total",
253-
Help: "Tracks the number of archives sent",
254-
}, []string{"client", "status_code"})
255251
counterRequestRecvReport = metrics.NewCounterVec(&metrics.CounterOpts{
256252
Name: "insightsclient_request_recvreport_total",
257253
Help: "Tracks the number of insights reports received/downloaded",
258254
}, []string{"client", "status_code"})
259255
)
260256

261257
func init() {
262-
insights.MustRegisterMetrics(counterRequestSend, counterRequestRecvReport)
258+
insights.MustRegisterMetrics(counterRequestRecvReport)
263259
}

pkg/insights/insightsclient/requests.go

+21-15
Original file line numberDiff line numberDiff line change
@@ -7,26 +7,32 @@ import (
77
"io"
88
"mime/multipart"
99
"net/http"
10-
"strconv"
1110

1211
"k8s.io/klog/v2"
1312

1413
"github.com/openshift/insights-operator/pkg/authorizer"
14+
"github.com/openshift/insights-operator/pkg/insights"
1515
apierrors "k8s.io/apimachinery/pkg/api/errors"
1616
)
1717

18-
func (c *Client) SendAndGetID(ctx context.Context, endpoint string, source Source) (string, error) {
18+
var (
19+
// when there's no HTTP status response code then we send 0 to track
20+
// this request in the "insightsclient_request_send_total" Prometheus metrics
21+
noHttpStatusCode = 0
22+
)
23+
24+
func (c *Client) SendAndGetID(ctx context.Context, endpoint string, source Source) (string, int, error) {
1925
cv, err := c.GetClusterVersion()
2026
if apierrors.IsNotFound(err) {
21-
return "", ErrWaitingForVersion
27+
return "", noHttpStatusCode, ErrWaitingForVersion
2228
}
2329
if err != nil {
24-
return "", err
30+
return "", noHttpStatusCode, err
2531
}
2632

2733
req, err := c.prepareRequest(ctx, http.MethodPost, endpoint, cv)
2834
if err != nil {
29-
return "", err
35+
return "", noHttpStatusCode, err
3036
}
3137

3238
bytesRead := make(chan int64, 1)
@@ -42,9 +48,9 @@ func (c *Client) SendAndGetID(ctx context.Context, endpoint string, source Sourc
4248
resp, err := c.client.Do(req)
4349
if err != nil {
4450
klog.V(4).Infof("Unable to build a request, possible invalid token: %v", err)
45-
// if the request is not build, for example because of invalid endpoint,(maybe some problem with DNS), we want to have recorded about it in metrics as well.
46-
counterRequestSend.WithLabelValues(c.metricsName, "0").Inc()
47-
return "", fmt.Errorf("unable to build request to connect to Insights server: %v", err)
51+
// if the request is not build, for example because of invalid endpoint,(maybe some problem with DNS), we want to have record about it in metrics as well.
52+
insights.IncrementCounterRequestSend(noHttpStatusCode)
53+
return "", noHttpStatusCode, fmt.Errorf("unable to build request to connect to Insights server: %v", err)
4854
}
4955

5056
requestID := resp.Header.Get(insightsReqId)
@@ -58,36 +64,36 @@ func (c *Client) SendAndGetID(ctx context.Context, endpoint string, source Sourc
5864
}
5965
}()
6066

61-
counterRequestSend.WithLabelValues(c.metricsName, strconv.Itoa(resp.StatusCode)).Inc()
67+
insights.IncrementCounterRequestSend(resp.StatusCode)
6268

6369
if resp.StatusCode == http.StatusUnauthorized {
6470
klog.V(2).Infof("gateway server %s returned 401, %s=%s", resp.Request.URL, insightsReqId, requestID)
65-
return "", authorizer.Error{Err: fmt.Errorf("your Red Hat account is not enabled for remote support or your token has expired: %s", responseBody(resp))}
71+
return "", resp.StatusCode, authorizer.Error{Err: fmt.Errorf("your Red Hat account is not enabled for remote support or your token has expired: %s", responseBody(resp))}
6672
}
6773

6874
if resp.StatusCode == http.StatusForbidden {
6975
klog.V(2).Infof("gateway server %s returned 403, %s=%s", resp.Request.URL, insightsReqId, requestID)
70-
return "", authorizer.Error{Err: fmt.Errorf("your Red Hat account is not enabled for remote support")}
76+
return "", resp.StatusCode, authorizer.Error{Err: fmt.Errorf("your Red Hat account is not enabled for remote support")}
7177
}
7278

7379
if resp.StatusCode == http.StatusBadRequest {
74-
return "", fmt.Errorf("gateway server bad request: %s (request=%s): %s", resp.Request.URL, requestID, responseBody(resp))
80+
return "", resp.StatusCode, fmt.Errorf("gateway server bad request: %s (request=%s): %s", resp.Request.URL, requestID, responseBody(resp))
7581
}
7682

7783
if resp.StatusCode >= 300 || resp.StatusCode < 200 {
78-
return "", fmt.Errorf("gateway server reported unexpected error code: %d (request=%s): %s", resp.StatusCode, requestID, responseBody(resp))
84+
return "", resp.StatusCode, fmt.Errorf("gateway server reported unexpected error code: %d (request=%s): %s", resp.StatusCode, requestID, responseBody(resp))
7985
}
8086

8187
if len(requestID) > 0 {
8288
klog.V(2).Infof("Successfully reported id=%s %s=%s, wrote=%d", source.ID, insightsReqId, requestID, <-bytesRead)
8389
}
8490

85-
return requestID, nil
91+
return requestID, resp.StatusCode, nil
8692
}
8793

8894
// Send uploads archives to Ingress service
8995
func (c *Client) Send(ctx context.Context, endpoint string, source Source) error {
90-
_, err := c.SendAndGetID(ctx, endpoint, source)
96+
_, _, err := c.SendAndGetID(ctx, endpoint, source)
9197
return err
9298
}
9399

pkg/insights/insightsreport/insightsreport.go

+7-21
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,6 @@ type InsightsReporter interface {
4545
ArchiveUploaded() <-chan struct{}
4646
}
4747

48-
const (
49-
insightsLastGatherTimeName = "insightsclient_last_gather_time"
50-
)
51-
5248
var (
5349
// insightsStatus contains a metric with the latest report information
5450
insightsStatus = metrics.NewGaugeVec(&metrics.GaugeOpts{
@@ -59,11 +55,6 @@ var (
5955
}, []string{"metric"})
6056
// number of pulling report retries
6157
retryThreshold = 2
62-
63-
// insightsLastGatherTime contains time of the last Insights data gathering
64-
insightsLastGatherTime = metrics.NewGauge(&metrics.GaugeOpts{
65-
Name: insightsLastGatherTimeName,
66-
})
6758
)
6859

6960
// New initializes and returns a Gatherer
@@ -152,8 +143,8 @@ func (c *Controller) PullSmartProxy() (bool, error) {
152143
return true, fmt.Errorf("report not updated")
153144
}
154145

155-
recommendations, healthStatus, gatherTime := c.readInsightsReport(reportResponse.Report)
156-
updateInsightsMetrics(recommendations, healthStatus, gatherTime)
146+
recommendations, healthStatus := c.readInsightsReport(reportResponse.Report)
147+
updateInsightsMetrics(recommendations, healthStatus)
157148
err = c.updateOperatorStatusCR(reportResponse.Report, downloadTime)
158149
if err != nil {
159150
klog.Errorf("failed to update the Insights Operator CR status: %v", err)
@@ -278,7 +269,7 @@ type insightsReportClient interface {
278269
GetClusterVersion() (*configv1.ClusterVersion, error)
279270
}
280271

281-
func (c *Controller) readInsightsReport(report types.SmartProxyReport) ([]types.InsightsRecommendation, healthStatusCounts, time.Time) {
272+
func (c *Controller) readInsightsReport(report types.SmartProxyReport) ([]types.InsightsRecommendation, healthStatusCounts) {
282273
healthStatus := healthStatusCounts{}
283274
healthStatus.total = report.Meta.Count
284275
activeRecommendations := []types.InsightsRecommendation{}
@@ -323,23 +314,18 @@ func (c *Controller) readInsightsReport(report types.SmartProxyReport) ([]types.
323314
})
324315
}
325316

326-
t, err := time.Parse(time.RFC3339, string(report.Meta.GatheredAt))
327-
if err != nil {
328-
klog.Errorf("Metric %s not updated. Failed to parse time: %v", insightsLastGatherTimeName, err)
329-
}
330-
return activeRecommendations, healthStatus, t
317+
return activeRecommendations, healthStatus
331318
}
332319

333320
// updateInsightsMetrics update the Prometheus metrics from a report
334-
func updateInsightsMetrics(activeRecommendations []types.InsightsRecommendation, hsCount healthStatusCounts, gatherTime time.Time) {
321+
func updateInsightsMetrics(activeRecommendations []types.InsightsRecommendation, hsCount healthStatusCounts) {
335322
insights.RecommendationCollector.SetActiveRecommendations(activeRecommendations)
336323

337324
insightsStatus.WithLabelValues("low").Set(float64(hsCount.low))
338325
insightsStatus.WithLabelValues("moderate").Set(float64(hsCount.moderate))
339326
insightsStatus.WithLabelValues("important").Set(float64(hsCount.important))
340327
insightsStatus.WithLabelValues("critical").Set(float64(hsCount.critical))
341328
insightsStatus.WithLabelValues("total").Set(float64(hsCount.total))
342-
insightsLastGatherTime.Set(float64(gatherTime.Unix()))
343329
}
344330

345331
func (c *Controller) updateOperatorStatusCR(report types.SmartProxyReport, reportDownloadTime metav1.Time) error {
@@ -388,7 +374,7 @@ func extractErrorKeyFromRuleData(r types.RuleWithContentResponse) (string, error
388374

389375
errorKeyField, exists := extraDataMap["error_key"]
390376
if !exists {
391-
return "", fmt.Errorf("TemplateData of rule %q does not contain error_key", r.RuleID)
377+
return "", fmt.Errorf("templateData of rule %q does not contain error_key", r.RuleID)
392378
}
393379

394380
errorKeyStr, ok := errorKeyField.(string)
@@ -399,7 +385,7 @@ func extractErrorKeyFromRuleData(r types.RuleWithContentResponse) (string, error
399385
}
400386

401387
func init() {
402-
insights.MustRegisterMetrics(insightsStatus, insightsLastGatherTime)
388+
insights.MustRegisterMetrics(insightsStatus)
403389

404390
insightsStatus.WithLabelValues("low").Set(float64(-1))
405391
insightsStatus.WithLabelValues("moderate").Set(float64(-1))

pkg/insights/insightsreport/insightsreport_test.go

+2-3
Original file line numberDiff line numberDiff line change
@@ -242,10 +242,9 @@ func Test_readInsightsReport(t *testing.T) {
242242

243243
for _, tc := range tests {
244244
t.Run(tc.name, func(t *testing.T) {
245-
activeRecommendations, healthStatus, gatherTime := tc.testController.readInsightsReport(tc.report)
245+
activeRecommendations, healthStatus := tc.testController.readInsightsReport(tc.report)
246246
assert.Equal(t, tc.expectedActiveRecommendations, activeRecommendations)
247247
assert.Equal(t, tc.expectedHealthStatus, healthStatus)
248-
assert.Equal(t, tc.expectedGatherTime, gatherTime.String())
249248
})
250249
}
251250
}
@@ -286,7 +285,7 @@ func Test_extractErrorKeyFromRuleData(t *testing.T) {
286285
},
287286
},
288287
expectedErrorKey: "",
289-
expectedError: fmt.Errorf("TemplateData of rule \"%s\" does not contain error_key", testRuleID),
288+
expectedError: fmt.Errorf("templateData of rule \"%s\" does not contain error_key", testRuleID),
290289
},
291290
{
292291
name: "Rule response with wrong error_key type",

pkg/insights/insightsuploader/insightsuploader.go

+6-5
Original file line numberDiff line numberDiff line change
@@ -184,29 +184,30 @@ func (c *Controller) ArchiveUploaded() <-chan struct{} {
184184

185185
// Upload is an alternative simple upload method used only in TechPreview clusters.
186186
// Returns Insights request ID and error=nil in case of successful data upload.
187-
func (c *Controller) Upload(ctx context.Context, s *insightsclient.Source) (string, error) {
187+
func (c *Controller) Upload(ctx context.Context, s *insightsclient.Source) (string, int, error) {
188188
defer s.Contents.Close()
189189
start := time.Now()
190190
s.ID = start.Format(time.RFC3339)
191191
s.Type = "application/vnd.redhat.openshift.periodic"
192192
var requestID string
193+
var statusCode int
193194
err := wait.ExponentialBackoff(c.backoff, func() (done bool, err error) {
194-
requestID, err = c.client.SendAndGetID(ctx, c.secretConfigurator.Config().Endpoint, *s)
195+
requestID, statusCode, err = c.client.SendAndGetID(ctx, c.secretConfigurator.Config().Endpoint, *s)
195196
if err != nil {
196197
// do no return the error if it's not the last attempt
197198
if c.backoff.Steps > 1 {
198199
klog.V(2).Infof("Unable to upload report after %s: %v", time.Since(start).Truncate(time.Second/100), err)
199-
klog.Errorf("%v. Trying again in %s %d", err, c.backoff.Step(), c.backoff.Steps)
200+
klog.Errorf("%v. Trying again in %s", err, c.backoff.Step())
200201
return false, nil
201202
}
202203
}
203204
return true, err
204205
})
205206
if err != nil {
206-
return "", err
207+
return "", statusCode, err
207208
}
208209
klog.Infof("Uploaded report successfully in %s", time.Since(start))
209-
return requestID, nil
210+
return requestID, statusCode, nil
210211
}
211212

212213
func reportToLogs(source io.Reader, klog klog.Verbose) error {

0 commit comments

Comments
 (0)