Skip to content

Commit 0190c16

Browse files
committed
Allow partial metric updates
1 parent 12bcc9a commit 0190c16

File tree

2 files changed

+15
-9
lines changed

2 files changed

+15
-9
lines changed

pkg/epp/backend/metrics/metrics.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ type PodMetricsClientImpl struct {
3939
MetricMapping *MetricMapping
4040
}
4141

42-
// FetchMetrics fetches metrics from a given pod.
42+
// FetchMetrics fetches metrics from a given pod, clones the existing metrics object and returns an
43+
// updated one.
4344
func (p *PodMetricsClientImpl) FetchMetrics(
4445
ctx context.Context,
4546
pod *Pod,

pkg/epp/backend/metrics/pod_metrics.go

+13-8
Original file line numberDiff line numberDiff line change
@@ -116,16 +116,21 @@ func (pm *podMetrics) refreshMetrics() error {
116116
updated, err := pm.pmc.FetchMetrics(ctx, pm.GetPod(), pm.GetMetrics(), pool.Spec.TargetPortNumber)
117117
if err != nil {
118118
pm.logger.V(logutil.TRACE).Info("Failed to refreshed metrics:", "err", err)
119-
// As refresher is running in the background, it's possible that the pod is deleted but
120-
// the refresh goroutine doesn't read the done channel yet. In this case, we just return nil.
121-
// The refresher will be stopped after this interval.
122-
return nil
123119
}
124-
updated.UpdateTime = time.Now()
125-
126-
pm.logger.V(logutil.TRACE).Info("Refreshed metrics", "updated", updated)
120+
// Optimistically update metrics even if there was an error.
121+
// The FetchMetrics can return an error for the following reasons:
122+
// 1. As refresher is running in the background, it's possible that the pod is deleted but
123+
// the refresh goroutine doesn't read the done channel yet. In this case, the updated
124+
// metrics object will be nil. And the refresher will soon be stopped.
125+
// 2. The FetchMetrics call can partially fail. For example, due to one metric missing. In
126+
// this case, the updated metrics object will have partial updates. A partial update is
127+
// considered better than no updates.
128+
if updated != nil {
129+
updated.UpdateTime = time.Now()
130+
pm.logger.V(logutil.TRACE).Info("Refreshed metrics", "updated", updated)
131+
atomic.StorePointer(&pm.metrics, unsafe.Pointer(updated))
132+
}
127133

128-
atomic.StorePointer(&pm.metrics, unsafe.Pointer(updated))
129134
return nil
130135
}
131136

0 commit comments

Comments
 (0)