Skip to content

Commit 66ae612

Browse files
author
OpenShift Bot
authored
Merge pull request #930 from spadgett/metrics-retry
Merged by openshift-bot
2 parents 75f0b8d + 5c580bb commit 66ae612

File tree

7 files changed

+254
-121
lines changed

7 files changed

+254
-121
lines changed

Diff for: app/scripts/directives/deploymentMetrics.js

+57-14
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ angular.module('openshiftConsole')
1717
// in case pods is empty.
1818
containers: '=',
1919
// Optional: set to 'compact' to show smaller charts (for the overview)
20-
profile: '@'
20+
profile: '@',
21+
alerts: '=?'
2122
},
2223
templateUrl: function(elem, attrs) {
2324
if (attrs.profile === 'compact') {
@@ -93,6 +94,9 @@ angular.module('openshiftConsole')
9394
scope.loaded = false;
9495
scope.noData = true;
9596

97+
// Track the number of consecutive failures.
98+
var failureCount = 0;
99+
96100
// Get the URL to show in error messages.
97101
MetricsService.getMetricsURL().then(function(url) {
98102
scope.metricsURL = url;
@@ -311,7 +315,8 @@ angular.module('openshiftConsole')
311315
return;
312316
}
313317

314-
scope.loaded = true;
318+
// Reset the number of failures on a successful request.
319+
failureCount = 0;
315320

316321
// Show an average instead of a multiline chart when there are many pods.
317322
scope.showAverage = _.size(scope.pods) > 5 || compact;
@@ -392,6 +397,50 @@ angular.module('openshiftConsole')
392397
return config;
393398
}
394399

400+
// If the first request for metrics fails, show an empty state error message.
401+
// Otherwise show an alert if more than one consecutive request fails.
402+
function metricsFailed(response) {
403+
if (destroyed) {
404+
return;
405+
}
406+
407+
failureCount++;
408+
if (scope.noData) {
409+
// Show an empty state message if the first request for data fails.
410+
scope.metricsError = {
411+
status: _.get(response, 'status', 0),
412+
details: _.get(response, 'data.errorMsg') ||
413+
_.get(response, 'statusText') ||
414+
"Status code " + _.get(response, 'status', 0)
415+
};
416+
return;
417+
}
418+
419+
// If this is the first failure and a previous request succeeded, wait and try again.
420+
if (failureCount < 2) {
421+
return;
422+
}
423+
424+
// Show an alert if we've failed more than once.
425+
// Use scope.$id in the alert ID so that it is unique on pages that
426+
// use the directive multiple times like monitoring.
427+
var alertID = 'metrics-failed-' + scope.uniqueID;
428+
scope.alerts[alertID] = {
429+
type: 'error',
430+
message: 'An error occurred updating metrics.',
431+
links: [{
432+
href: '',
433+
label: 'Retry',
434+
onClick: function() {
435+
delete scope.alerts[alertID];
436+
// Reset failure count to 1 to trigger a retry.
437+
failureCount = 1;
438+
update();
439+
}
440+
}]
441+
};
442+
}
443+
395444
// Make sure there are no errors or missing data before updating.
396445
function canUpdate() {
397446
var noPods = _.isEmpty(scope.pods);
@@ -400,7 +449,7 @@ angular.module('openshiftConsole')
400449
scope.loaded = true;
401450
return false;
402451
}
403-
return !scope.metricsError;
452+
return !scope.metricsError && failureCount < 2;
404453
}
405454

406455
function updateData(metricType, podName, podData) {
@@ -419,23 +468,17 @@ angular.module('openshiftConsole')
419468
_.set(data, [metricType, podName], updated);
420469
}
421470

422-
function handleError(response) {
423-
scope.loaded = true;
424-
scope.metricsError = {
425-
status: _.get(response, 'status', 0),
426-
details: _.get(response, 'data.errorMsg') ||
427-
_.get(response, 'statusText') ||
428-
"Status code " + _.get(response, 'status', 0)
429-
};
430-
}
431-
432471
function update() {
433472
if (paused || !canUpdate()) {
434473
return;
435474
}
436475
lastUpdated = Date.now();
437476
var config = getConfig();
438-
MetricsService.getPodMetrics(config).then(processData, handleError);
477+
MetricsService.getPodMetrics(config).then(processData, metricsFailed).finally(function() {
478+
// Even on errors mark metrics as loaded to replace the
479+
// "Loading..." message with "No metrics to display."
480+
scope.loaded = true;
481+
});
439482
}
440483

441484
// Updates immediately and then on options changes.

Diff for: app/scripts/directives/podMetrics.js

+80-37
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ angular.module('openshiftConsole')
1616
pod: '=',
1717
sparklineWidth: '=?',
1818
sparklineHeight: '=?',
19-
includedMetrics: '=?' // defaults to ["cpu", "memory", "network"]
19+
includedMetrics: '=?', // defaults to ["cpu", "memory", "network"]
20+
alerts: '=?'
2021
},
2122
templateUrl: 'views/directives/pod-metrics.html',
2223
link: function(scope) {
@@ -27,6 +28,7 @@ angular.module('openshiftConsole')
2728
var getCPULimit = $parse('resources.limits.cpu');
2829

2930
var updateInterval = 60 * 1000; // 60 seconds
31+
3032
// Number of data points to display on the chart.
3133
var numDataPoints = 30;
3234

@@ -382,9 +384,61 @@ angular.module('openshiftConsole')
382384
return null;
383385
}
384386

387+
// Track the number of consecutive failures.
388+
var failureCount = 0;
389+
390+
function metricsSucceeded() {
391+
// Reset the number of failures on a successful request.
392+
failureCount = 0;
393+
}
394+
395+
// If the first request for metrics fails, show an empty state error message.
396+
// Otherwise show an alert if more than one consecutive request fails.
397+
function metricsFailed(response) {
398+
if (destroyed) {
399+
return;
400+
}
401+
402+
failureCount++;
403+
if (scope.noData) {
404+
// Show an empty state message if the first request for data fails.
405+
scope.metricsError = {
406+
status: _.get(response, 'status', 0),
407+
details: _.get(response, 'data.errorMsg') ||
408+
_.get(response, 'statusText') ||
409+
"Status code " + _.get(response, 'status', 0)
410+
};
411+
return;
412+
}
413+
414+
// If this is the first failure and a previous request succeeded, wait and try again.
415+
if (failureCount < 2) {
416+
return;
417+
}
418+
419+
// Show an alert if we've failed more than once.
420+
// Use scope.$id in the alert ID so that it is unique on pages that
421+
// use the directive multiple times like monitoring.
422+
var alertID = 'metrics-failed-' + scope.uniqueID;
423+
scope.alerts[alertID] = {
424+
type: 'error',
425+
message: 'An error occurred updating metrics for pod ' + _.get(scope, 'pod.metadata.name', '<unknown>') + '.',
426+
links: [{
427+
href: '',
428+
label: 'Retry',
429+
onClick: function() {
430+
delete scope.alerts[alertID];
431+
// Reset failure count to 1 to trigger a retry.
432+
failureCount = 1;
433+
update();
434+
}
435+
}]
436+
};
437+
}
438+
385439
// Make sure there are no errors or missing data before updating.
386440
function canUpdate() {
387-
if (scope.metricsError) {
441+
if (scope.metricsError || failureCount > 1) {
388442
return false;
389443
}
390444

@@ -419,8 +473,9 @@ angular.module('openshiftConsole')
419473
// time. This prevents an issue where the donut chart shows 0 for
420474
// current usage if the client clock is ahead of the server clock.
421475
var start = getStartTime();
476+
var allPromises = [];
422477
angular.forEach(scope.metrics, function(metric) {
423-
var promises = [];
478+
var datasetPromises = [];
424479

425480
// On metrics that require more than one set of data (e.g. network
426481
// incoming and outgoing traffic) we perform one request for each,
@@ -434,52 +489,40 @@ angular.module('openshiftConsole')
434489
if (!config) {
435490
return;
436491
}
437-
promises.push(MetricsService.get(config));
492+
var promise = MetricsService.get(config);
493+
datasetPromises.push(promise);
438494
});
439495

496+
allPromises = allPromises.concat(datasetPromises);
497+
440498
// Collect all promises from every metric requested into one, so we
441499
// have all data the chart wants at the time of the chart creation
442500
// (or timeout updates, etc).
443-
$q.all(promises).then(
444-
// success
445-
function(responses) {
446-
if (destroyed) {
447-
return;
448-
}
449-
450-
angular.forEach(responses, function(response) {
451-
if (!response) {
452-
return;
453-
}
501+
$q.all(datasetPromises).then(function(responses) {
502+
if (destroyed) {
503+
return;
504+
}
454505

455-
var dataset = _.find(metric.datasets, {
456-
id: response.metricID
457-
});
458-
updateData(dataset, response);
459-
});
460-
updateChart(metric);
461-
},
462-
// failure
463-
function(responses) {
464-
if (destroyed) {
506+
angular.forEach(responses, function(response) {
507+
if (!response) {
465508
return;
466509
}
467510

468-
angular.forEach(responses, function(response) {
469-
scope.metricsError = {
470-
status: _.get(response, 'status', 0),
471-
details: _.get(response, 'data.errorMsg') ||
472-
_.get(response, 'statusText') ||
473-
"Status code " + _.get(response, 'status', 0)
474-
};
511+
var dataset = _.find(metric.datasets, {
512+
id: response.metricID
475513
});
476-
}
477-
).finally(function() {
478-
// Even on errors mark metrics as loaded to replace the
479-
// "Loading..." message with "No metrics to display."
480-
scope.loaded = true;
514+
updateData(dataset, response);
515+
});
516+
updateChart(metric);
481517
});
482518
});
519+
520+
// Handle failures when any request fails.
521+
$q.all(allPromises).then(metricsSucceeded, metricsFailed).finally(function() {
522+
// Even on errors mark metrics as loaded to replace the
523+
// "Loading..." message with "No metrics to display."
524+
scope.loaded = true;
525+
});
483526
}
484527

485528
// Updates immediately and then on options changes.

Diff for: app/views/browse/pod.html

+2-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,8 @@ <h3>Container {{container.name}} Environment Variables</h3>
7676
we don't update in the background. -->
7777
<pod-metrics
7878
ng-if="selectedTab.metrics"
79-
pod="pod">
79+
pod="pod"
80+
alerts="alerts">
8081
</pod-metrics>
8182
</uib-tab>
8283

Diff for: app/views/browse/replica-set.html

+2-1
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ <h3>Container {{container.name}} Environment Variables</h3>
124124
we don't update in the background. -->
125125
<deployment-metrics
126126
ng-if="selectedTab.metrics && podsForDeployment"
127-
pods="podsForDeployment" containers="replicaSet.spec.template.spec.containers">
127+
pods="podsForDeployment" containers="replicaSet.spec.template.spec.containers"
128+
alerts="alerts">
128129
</deployment-metrics>
129130
</uib-tab>
130131
<uib-tab ng-if="deploymentConfigName && logOptions.version && ('deploymentconfigs/log' | canI : 'get')" active="selectedTab.logs">

Diff for: app/views/monitoring.html

+5-3
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,8 @@ <h2>Deployments</h2>
207207
<div class="mar-top-lg" ng-if="metricsAvailable">
208208
<deployment-metrics
209209
pods="podsByOwnerUID[replicationController.metadata.uid]"
210-
containers="replicationController.spec.template.spec.containers">
210+
containers="replicationController.spec.template.spec.containers"
211+
alerts="alerts">
211212
</deployment-metrics>
212213
</div>
213214
</div>
@@ -257,7 +258,8 @@ <h2>Deployments</h2>
257258
<div class="mar-top-lg" ng-if="metricsAvailable">
258259
<deployment-metrics
259260
pods="podsByOwnerUID[replicaSet.metadata.uid]"
260-
containers="replicaSet.spec.template.spec.containers">
261+
containers="replicaSet.spec.template.spec.containers"
262+
alerts="alerts">
261263
</deployment-metrics>
262264
</div>
263265
</div>
@@ -351,7 +353,7 @@ <h2>Pods</h2>
351353
</log-viewer>
352354
<!-- Until the metrics directive pulls the metrics from the time range of the life of the pod, hide the metrics for old stuff -->
353355
<div class="mar-top-lg" ng-if="metricsAvailable">
354-
<pod-metrics pod="pod"></pod-metrics>
356+
<pod-metrics pod="pod" alerts="alerts"></pod-metrics>
355357
</div>
356358
</div>
357359
</div>

0 commit comments

Comments
 (0)