Skip to content

Commit 5c580bb

Browse files
committed
Bug 1388493 - Better handling of metrics errors
If the first request for metrics fails, show an empty state error message. Otherwise show an alert if more than one consecutive request fails. The alert has a retry link. Fixes https://bugzilla.redhat.com/show_bug.cgi?id=1388493
1 parent 75f0b8d commit 5c580bb

File tree

7 files changed

+254
-121
lines changed

7 files changed

+254
-121
lines changed

app/scripts/directives/deploymentMetrics.js

+57-14
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ angular.module('openshiftConsole')
1717
// in case pods is empty.
1818
containers: '=',
1919
// Optional: set to 'compact' to show smaller charts (for the overview)
20-
profile: '@'
20+
profile: '@',
21+
alerts: '=?'
2122
},
2223
templateUrl: function(elem, attrs) {
2324
if (attrs.profile === 'compact') {
@@ -93,6 +94,9 @@ angular.module('openshiftConsole')
9394
scope.loaded = false;
9495
scope.noData = true;
9596

97+
// Track the number of consecutive failures.
98+
var failureCount = 0;
99+
96100
// Get the URL to show in error messages.
97101
MetricsService.getMetricsURL().then(function(url) {
98102
scope.metricsURL = url;
@@ -311,7 +315,8 @@ angular.module('openshiftConsole')
311315
return;
312316
}
313317

314-
scope.loaded = true;
318+
// Reset the number of failures on a successful request.
319+
failureCount = 0;
315320

316321
// Show an average instead of a multiline chart when there are many pods.
317322
scope.showAverage = _.size(scope.pods) > 5 || compact;
@@ -392,6 +397,50 @@ angular.module('openshiftConsole')
392397
return config;
393398
}
394399

400+
// If the first request for metrics fails, show an empty state error message.
401+
// Otherwise show an alert if more than one consecutive request fails.
402+
function metricsFailed(response) {
403+
if (destroyed) {
404+
return;
405+
}
406+
407+
failureCount++;
408+
if (scope.noData) {
409+
// Show an empty state message if the first request for data fails.
410+
scope.metricsError = {
411+
status: _.get(response, 'status', 0),
412+
details: _.get(response, 'data.errorMsg') ||
413+
_.get(response, 'statusText') ||
414+
"Status code " + _.get(response, 'status', 0)
415+
};
416+
return;
417+
}
418+
419+
// If this is the first failure and a previous request succeeded, wait and try again.
420+
if (failureCount < 2) {
421+
return;
422+
}
423+
424+
// Show an alert if we've failed more than once.
425+
// Use scope.$id in the alert ID so that it is unique on pages that
426+
// use the directive multiple times like monitoring.
427+
var alertID = 'metrics-failed-' + scope.uniqueID;
428+
scope.alerts[alertID] = {
429+
type: 'error',
430+
message: 'An error occurred updating metrics.',
431+
links: [{
432+
href: '',
433+
label: 'Retry',
434+
onClick: function() {
435+
delete scope.alerts[alertID];
436+
// Reset failure count to 1 to trigger a retry.
437+
failureCount = 1;
438+
update();
439+
}
440+
}]
441+
};
442+
}
443+
395444
// Make sure there are no errors or missing data before updating.
396445
function canUpdate() {
397446
var noPods = _.isEmpty(scope.pods);
@@ -400,7 +449,7 @@ angular.module('openshiftConsole')
400449
scope.loaded = true;
401450
return false;
402451
}
403-
return !scope.metricsError;
452+
return !scope.metricsError && failureCount < 2;
404453
}
405454

406455
function updateData(metricType, podName, podData) {
@@ -419,23 +468,17 @@ angular.module('openshiftConsole')
419468
_.set(data, [metricType, podName], updated);
420469
}
421470

422-
function handleError(response) {
423-
scope.loaded = true;
424-
scope.metricsError = {
425-
status: _.get(response, 'status', 0),
426-
details: _.get(response, 'data.errorMsg') ||
427-
_.get(response, 'statusText') ||
428-
"Status code " + _.get(response, 'status', 0)
429-
};
430-
}
431-
432471
function update() {
433472
if (paused || !canUpdate()) {
434473
return;
435474
}
436475
lastUpdated = Date.now();
437476
var config = getConfig();
438-
MetricsService.getPodMetrics(config).then(processData, handleError);
477+
MetricsService.getPodMetrics(config).then(processData, metricsFailed).finally(function() {
478+
// Even on errors mark metrics as loaded to replace the
479+
// "Loading..." message with "No metrics to display."
480+
scope.loaded = true;
481+
});
439482
}
440483

441484
// Updates immediately and then on options changes.

app/scripts/directives/podMetrics.js

+80-37
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ angular.module('openshiftConsole')
1616
pod: '=',
1717
sparklineWidth: '=?',
1818
sparklineHeight: '=?',
19-
includedMetrics: '=?' // defaults to ["cpu", "memory", "network"]
19+
includedMetrics: '=?', // defaults to ["cpu", "memory", "network"]
20+
alerts: '=?'
2021
},
2122
templateUrl: 'views/directives/pod-metrics.html',
2223
link: function(scope) {
@@ -27,6 +28,7 @@ angular.module('openshiftConsole')
2728
var getCPULimit = $parse('resources.limits.cpu');
2829

2930
var updateInterval = 60 * 1000; // 60 seconds
31+
3032
// Number of data points to display on the chart.
3133
var numDataPoints = 30;
3234

@@ -382,9 +384,61 @@ angular.module('openshiftConsole')
382384
return null;
383385
}
384386

387+
// Track the number of consecutive failures.
388+
var failureCount = 0;
389+
390+
function metricsSucceeded() {
391+
// Reset the number of failures on a successful request.
392+
failureCount = 0;
393+
}
394+
395+
// If the first request for metrics fails, show an empty state error message.
396+
// Otherwise show an alert if more than one consecutive request fails.
397+
function metricsFailed(response) {
398+
if (destroyed) {
399+
return;
400+
}
401+
402+
failureCount++;
403+
if (scope.noData) {
404+
// Show an empty state message if the first request for data fails.
405+
scope.metricsError = {
406+
status: _.get(response, 'status', 0),
407+
details: _.get(response, 'data.errorMsg') ||
408+
_.get(response, 'statusText') ||
409+
"Status code " + _.get(response, 'status', 0)
410+
};
411+
return;
412+
}
413+
414+
// If this is the first failure and a previous request succeeded, wait and try again.
415+
if (failureCount < 2) {
416+
return;
417+
}
418+
419+
// Show an alert if we've failed more than once.
420+
// Use scope.$id in the alert ID so that it is unique on pages that
421+
// use the directive multiple times like monitoring.
422+
var alertID = 'metrics-failed-' + scope.uniqueID;
423+
scope.alerts[alertID] = {
424+
type: 'error',
425+
message: 'An error occurred updating metrics for pod ' + _.get(scope, 'pod.metadata.name', '<unknown>') + '.',
426+
links: [{
427+
href: '',
428+
label: 'Retry',
429+
onClick: function() {
430+
delete scope.alerts[alertID];
431+
// Reset failure count to 1 to trigger a retry.
432+
failureCount = 1;
433+
update();
434+
}
435+
}]
436+
};
437+
}
438+
385439
// Make sure there are no errors or missing data before updating.
386440
function canUpdate() {
387-
if (scope.metricsError) {
441+
if (scope.metricsError || failureCount > 1) {
388442
return false;
389443
}
390444

@@ -419,8 +473,9 @@ angular.module('openshiftConsole')
419473
// time. This prevents an issue where the donut chart shows 0 for
420474
// current usage if the client clock is ahead of the server clock.
421475
var start = getStartTime();
476+
var allPromises = [];
422477
angular.forEach(scope.metrics, function(metric) {
423-
var promises = [];
478+
var datasetPromises = [];
424479

425480
// On metrics that require more than one set of data (e.g. network
426481
// incoming and outgoing traffic) we perform one request for each,
@@ -434,52 +489,40 @@ angular.module('openshiftConsole')
434489
if (!config) {
435490
return;
436491
}
437-
promises.push(MetricsService.get(config));
492+
var promise = MetricsService.get(config);
493+
datasetPromises.push(promise);
438494
});
439495

496+
allPromises = allPromises.concat(datasetPromises);
497+
440498
// Collect all promises from every metric requested into one, so we
441499
// have all data the chart wants at the time of the chart creation
442500
// (or timeout updates, etc).
443-
$q.all(promises).then(
444-
// success
445-
function(responses) {
446-
if (destroyed) {
447-
return;
448-
}
449-
450-
angular.forEach(responses, function(response) {
451-
if (!response) {
452-
return;
453-
}
501+
$q.all(datasetPromises).then(function(responses) {
502+
if (destroyed) {
503+
return;
504+
}
454505

455-
var dataset = _.find(metric.datasets, {
456-
id: response.metricID
457-
});
458-
updateData(dataset, response);
459-
});
460-
updateChart(metric);
461-
},
462-
// failure
463-
function(responses) {
464-
if (destroyed) {
506+
angular.forEach(responses, function(response) {
507+
if (!response) {
465508
return;
466509
}
467510

468-
angular.forEach(responses, function(response) {
469-
scope.metricsError = {
470-
status: _.get(response, 'status', 0),
471-
details: _.get(response, 'data.errorMsg') ||
472-
_.get(response, 'statusText') ||
473-
"Status code " + _.get(response, 'status', 0)
474-
};
511+
var dataset = _.find(metric.datasets, {
512+
id: response.metricID
475513
});
476-
}
477-
).finally(function() {
478-
// Even on errors mark metrics as loaded to replace the
479-
// "Loading..." message with "No metrics to display."
480-
scope.loaded = true;
514+
updateData(dataset, response);
515+
});
516+
updateChart(metric);
481517
});
482518
});
519+
520+
// Handle failures when any request fails.
521+
$q.all(allPromises).then(metricsSucceeded, metricsFailed).finally(function() {
522+
// Even on errors mark metrics as loaded to replace the
523+
// "Loading..." message with "No metrics to display."
524+
scope.loaded = true;
525+
});
483526
}
484527

485528
// Updates immediately and then on options changes.

app/views/browse/pod.html

+2-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,8 @@ <h3>Container {{container.name}} Environment Variables</h3>
7676
we don't update in the background. -->
7777
<pod-metrics
7878
ng-if="selectedTab.metrics"
79-
pod="pod">
79+
pod="pod"
80+
alerts="alerts">
8081
</pod-metrics>
8182
</uib-tab>
8283

app/views/browse/replica-set.html

+2-1
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ <h3>Container {{container.name}} Environment Variables</h3>
124124
we don't update in the background. -->
125125
<deployment-metrics
126126
ng-if="selectedTab.metrics && podsForDeployment"
127-
pods="podsForDeployment" containers="replicaSet.spec.template.spec.containers">
127+
pods="podsForDeployment" containers="replicaSet.spec.template.spec.containers"
128+
alerts="alerts">
128129
</deployment-metrics>
129130
</uib-tab>
130131
<uib-tab ng-if="deploymentConfigName && logOptions.version && ('deploymentconfigs/log' | canI : 'get')" active="selectedTab.logs">

app/views/monitoring.html

+5-3
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,8 @@ <h2>Deployments</h2>
207207
<div class="mar-top-lg" ng-if="metricsAvailable">
208208
<deployment-metrics
209209
pods="podsByOwnerUID[replicationController.metadata.uid]"
210-
containers="replicationController.spec.template.spec.containers">
210+
containers="replicationController.spec.template.spec.containers"
211+
alerts="alerts">
211212
</deployment-metrics>
212213
</div>
213214
</div>
@@ -257,7 +258,8 @@ <h2>Deployments</h2>
257258
<div class="mar-top-lg" ng-if="metricsAvailable">
258259
<deployment-metrics
259260
pods="podsByOwnerUID[replicaSet.metadata.uid]"
260-
containers="replicaSet.spec.template.spec.containers">
261+
containers="replicaSet.spec.template.spec.containers"
262+
alerts="alerts">
261263
</deployment-metrics>
262264
</div>
263265
</div>
@@ -351,7 +353,7 @@ <h2>Pods</h2>
351353
</log-viewer>
352354
<!-- Until the metrics directive pulls the metrics from the time range of the life of the pod, hide the metrics for old stuff -->
353355
<div class="mar-top-lg" ng-if="metricsAvailable">
354-
<pod-metrics pod="pod"></pod-metrics>
356+
<pod-metrics pod="pod" alerts="alerts"></pod-metrics>
355357
</div>
356358
</div>
357359
</div>

0 commit comments

Comments
 (0)