Skip to content

Commit 8f2307c

Browse files
UPSTREAM: <drop>: leaderelection settings
Extensive e2e tests revealed that our component might run into leader election timeouts during cluster bootstrap, causing sporadic alerts being generated. This commit uses recommended settings for leaderelection: LeaseDuration: 15s -> 137s RenewDeadline: 10s -> 107s RetryPeriod: 2s -> 26s Warning: This will increase potential down-time of components to 163s in the worst case (up from 17s). (LeaseDuration + RetryPeriod)
1 parent 637b45c commit 8f2307c

File tree

2 files changed

+17
-3
lines changed

2 files changed

+17
-3
lines changed

Diff for: catalogd/cmd/catalogd/main.go

+9-2
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ import (
4242
_ "k8s.io/client-go/plugin/pkg/client/auth"
4343
"k8s.io/klog/v2"
4444
"k8s.io/klog/v2/textlogger"
45+
"k8s.io/utils/ptr"
4546
ctrl "sigs.k8s.io/controller-runtime"
4647
crcache "sigs.k8s.io/controller-runtime/pkg/cache"
4748
"sigs.k8s.io/controller-runtime/pkg/certwatcher"
@@ -231,8 +232,14 @@ func main() {
231232
HealthProbeBindAddress: probeAddr,
232233
LeaderElection: enableLeaderElection,
233234
LeaderElectionID: "catalogd-operator-lock",
234-
WebhookServer: webhookServer,
235-
Cache: cacheOptions,
235+
// Recommended Leader Election values
236+
// https://github.com/openshift/enhancements/blob/61581dcd985130357d6e4b0e72b87ee35394bf6e/CONVENTIONS.md#handling-kube-apiserver-disruption
237+
LeaseDuration: ptr.To(137 * time.Second),
238+
RenewDeadline: ptr.To(107 * time.Second),
239+
RetryPeriod: ptr.To(26 * time.Second),
240+
241+
WebhookServer: webhookServer,
242+
Cache: cacheOptions,
236243
})
237244
if err != nil {
238245
setupLog.Error(err, "unable to create manager")

Diff for: cmd/operator-controller/main.go

+8-1
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ import (
4040
_ "k8s.io/client-go/plugin/pkg/client/auth"
4141
"k8s.io/klog/v2"
4242
"k8s.io/klog/v2/textlogger"
43+
"k8s.io/utils/ptr"
4344
ctrl "sigs.k8s.io/controller-runtime"
4445
crcache "sigs.k8s.io/controller-runtime/pkg/cache"
4546
"sigs.k8s.io/controller-runtime/pkg/certwatcher"
@@ -229,7 +230,13 @@ func main() {
229230
HealthProbeBindAddress: probeAddr,
230231
LeaderElection: enableLeaderElection,
231232
LeaderElectionID: "9c4404e7.operatorframework.io",
232-
Cache: cacheOptions,
233+
// Recommended Leader Election values
234+
// https://github.com/openshift/enhancements/blob/61581dcd985130357d6e4b0e72b87ee35394bf6e/CONVENTIONS.md#handling-kube-apiserver-disruption
235+
LeaseDuration: ptr.To(137 * time.Second),
236+
RenewDeadline: ptr.To(107 * time.Second),
237+
RetryPeriod: ptr.To(26 * time.Second),
238+
239+
Cache: cacheOptions,
233240
// LeaderElectionReleaseOnCancel defines if the leader should step down voluntarily
234241
// when the Manager ends. This requires the binary to immediately end when the
235242
// Manager is stopped, otherwise, this setting is unsafe. Setting this significantly

0 commit comments

Comments
 (0)