Skip to content

Commit 72514e8

Browse files
Make the master endpoint lease ttl configurable
Very large clusters can fail to update TTL in a reasonable time.
1 parent 05b9ea8 commit 72514e8

File tree

7 files changed

+41
-23
lines changed

7 files changed

+41
-23
lines changed

pkg/cmd/server/api/types.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1151,6 +1151,11 @@ type KubernetesMasterConfig struct {
11511151
// MasterCount is the number of expected masters that should be running. This value defaults to 1 and may be set to a positive integer,
11521152
// or if set to -1, indicates this is part of a cluster.
11531153
MasterCount int
1154+
// MasterEndpointReconcileTTL sets the time to live in seconds of an endpoint record recorded by each master. The endpoints are checked
1155+
// at an interval that is 2/3 of this value and this value defaults to 15s if unset. In very large clusters, this value may be increased to
1156+
// reduce the possibility that the master endpoint record expires (due to other load on the etcd server) and causes masters to drop in and
1157+
// out of the kubernetes service record. It is not recommended to set this value below 15s.
1158+
MasterEndpointReconcileTTL int
11541159
// ServicesSubnet is the subnet to use for assigning service IPs
11551160
ServicesSubnet string
11561161
// ServicesNodePortRange is the range to use for assigning service public ports on a host.

pkg/cmd/server/api/v1/conversions.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,9 @@ func addDefaultingFuncs(scheme *runtime.Scheme) error {
112112
if obj.MasterCount == 0 {
113113
obj.MasterCount = 1
114114
}
115+
if obj.MasterEndpointReconcileTTL == 0 {
116+
obj.MasterEndpointReconcileTTL = 15
117+
}
115118
if len(obj.APILevels) == 0 {
116119
obj.APILevels = internal.DefaultKubernetesAPILevels
117120
}

pkg/cmd/server/api/v1/swagger_doc.go

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -378,21 +378,22 @@ func (KubeletConnectionInfo) SwaggerDoc() map[string]string {
378378
}
379379

380380
var map_KubernetesMasterConfig = map[string]string{
381-
"": "KubernetesMasterConfig holds the necessary configuration options for the Kubernetes master",
382-
"apiLevels": "APILevels is a list of API levels that should be enabled on startup: v1 as examples",
383-
"disabledAPIGroupVersions": "DisabledAPIGroupVersions is a map of groups to the versions (or *) that should be disabled.",
384-
"masterIP": "MasterIP is the public IP address of kubernetes stuff. If empty, the first result from net.InterfaceAddrs will be used.",
385-
"masterCount": "MasterCount is the number of expected masters that should be running. This value defaults to 1 and may be set to a positive integer, or if set to -1, indicates this is part of a cluster.",
386-
"servicesSubnet": "ServicesSubnet is the subnet to use for assigning service IPs",
387-
"servicesNodePortRange": "ServicesNodePortRange is the range to use for assigning service public ports on a host.",
388-
"staticNodeNames": "StaticNodeNames is the list of nodes that are statically known",
389-
"schedulerConfigFile": "SchedulerConfigFile points to a file that describes how to set up the scheduler. If empty, you get the default scheduling rules.",
390-
"podEvictionTimeout": "PodEvictionTimeout controls grace period for deleting pods on failed nodes. It takes valid time duration string. If empty, you get the default pod eviction timeout.",
391-
"proxyClientInfo": "ProxyClientInfo specifies the client cert/key to use when proxying to pods",
392-
"admissionConfig": "AdmissionConfig contains admission control plugin configuration.",
393-
"apiServerArguments": "APIServerArguments are key value pairs that will be passed directly to the Kube apiserver that match the apiservers's command line arguments. These are not migrated, but if you reference a value that does not exist the server will not start. These values may override other settings in KubernetesMasterConfig which may cause invalid configurations.",
394-
"controllerArguments": "ControllerArguments are key value pairs that will be passed directly to the Kube controller manager that match the controller manager's command line arguments. These are not migrated, but if you reference a value that does not exist the server will not start. These values may override other settings in KubernetesMasterConfig which may cause invalid configurations.",
395-
"schedulerArguments": "SchedulerArguments are key value pairs that will be passed directly to the Kube scheduler that match the scheduler's command line arguments. These are not migrated, but if you reference a value that does not exist the server will not start. These values may override other settings in KubernetesMasterConfig which may cause invalid configurations.",
381+
"": "KubernetesMasterConfig holds the necessary configuration options for the Kubernetes master",
382+
"apiLevels": "APILevels is a list of API levels that should be enabled on startup: v1 as examples",
383+
"disabledAPIGroupVersions": "DisabledAPIGroupVersions is a map of groups to the versions (or *) that should be disabled.",
384+
"masterIP": "MasterIP is the public IP address of kubernetes stuff. If empty, the first result from net.InterfaceAddrs will be used.",
385+
"masterCount": "MasterCount is the number of expected masters that should be running. This value defaults to 1 and may be set to a positive integer, or if set to -1, indicates this is part of a cluster.",
386+
"masterEndpointReconcileTTL": "MasterEndpointReconcileTTL sets the time to live in seconds of an endpoint record recorded by each master. The endpoints are checked at an interval that is 2/3 of this value and this value defaults to 15s if unset. In very large clusters, this value may be increased to reduce the possibility that the master endpoint record expires (due to other load on the etcd server) and causes masters to drop in and out of the kubernetes service record. It is not recommended to set this value below 15s.",
387+
"servicesSubnet": "ServicesSubnet is the subnet to use for assigning service IPs",
388+
"servicesNodePortRange": "ServicesNodePortRange is the range to use for assigning service public ports on a host.",
389+
"staticNodeNames": "StaticNodeNames is the list of nodes that are statically known",
390+
"schedulerConfigFile": "SchedulerConfigFile points to a file that describes how to set up the scheduler. If empty, you get the default scheduling rules.",
391+
"podEvictionTimeout": "PodEvictionTimeout controls grace period for deleting pods on failed nodes. It takes valid time duration string. If empty, you get the default pod eviction timeout.",
392+
"proxyClientInfo": "ProxyClientInfo specifies the client cert/key to use when proxying to pods",
393+
"admissionConfig": "AdmissionConfig contains admission control plugin configuration.",
394+
"apiServerArguments": "APIServerArguments are key value pairs that will be passed directly to the Kube apiserver that match the apiservers's command line arguments. These are not migrated, but if you reference a value that does not exist the server will not start. These values may override other settings in KubernetesMasterConfig which may cause invalid configurations.",
395+
"controllerArguments": "ControllerArguments are key value pairs that will be passed directly to the Kube controller manager that match the controller manager's command line arguments. These are not migrated, but if you reference a value that does not exist the server will not start. These values may override other settings in KubernetesMasterConfig which may cause invalid configurations.",
396+
"schedulerArguments": "SchedulerArguments are key value pairs that will be passed directly to the Kube scheduler that match the scheduler's command line arguments. These are not migrated, but if you reference a value that does not exist the server will not start. These values may override other settings in KubernetesMasterConfig which may cause invalid configurations.",
396397
}
397398

398399
func (KubernetesMasterConfig) SwaggerDoc() map[string]string {

pkg/cmd/server/api/v1/types.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1082,6 +1082,11 @@ type KubernetesMasterConfig struct {
10821082
// MasterCount is the number of expected masters that should be running. This value defaults to 1 and may be set to a positive integer,
10831083
// or if set to -1, indicates this is part of a cluster.
10841084
MasterCount int `json:"masterCount"`
1085+
// MasterEndpointReconcileTTL sets the time to live in seconds of an endpoint record recorded by each master. The endpoints are checked
1086+
// at an interval that is 2/3 of this value and this value defaults to 15s if unset. In very large clusters, this value may be increased to
1087+
// reduce the possibility that the master endpoint record expires (due to other load on the etcd server) and causes masters to drop in and
1088+
// out of the kubernetes service record. It is not recommended to set this value below 15s.
1089+
MasterEndpointReconcileTTL int `json:"masterEndpointReconcileTTL"`
10851090
// ServicesSubnet is the subnet to use for assigning service IPs
10861091
ServicesSubnet string `json:"servicesSubnet"`
10871092
// ServicesNodePortRange is the range to use for assigning service public ports on a host.

pkg/cmd/server/api/v1/types_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ kubernetesMasterConfig:
198198
controllerArguments: null
199199
disabledAPIGroupVersions: null
200200
masterCount: 0
201+
masterEndpointReconcileTTL: 0
201202
masterIP: ""
202203
podEvictionTimeout: ""
203204
proxyClientInfo:

pkg/cmd/server/kubernetes/master/master_config.go

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -537,7 +537,11 @@ func buildKubeApiserverConfig(
537537
}
538538

539539
if kubeApiserverConfig.EnableCoreControllers {
540-
glog.V(2).Info("Using the lease endpoint reconciler")
540+
ttl := masterConfig.KubernetesMasterConfig.MasterEndpointReconcileTTL
541+
interval := ttl * 2 / 3
542+
543+
glog.V(2).Infof("Using the lease endpoint reconciler with TTL=%ds and interval=%ds", ttl, interval)
544+
541545
config, err := kubeApiserverConfig.StorageFactory.NewConfig(kapi.Resource("apiServerIPInfo"))
542546
if err != nil {
543547
return nil, err
@@ -546,7 +550,8 @@ func buildKubeApiserverConfig(
546550
if err != nil {
547551
return nil, err
548552
}
549-
masterLeases := newMasterLeases(leaseStorage)
553+
554+
masterLeases := newMasterLeases(leaseStorage, ttl)
550555

551556
endpointConfig, err := kubeApiserverConfig.StorageFactory.NewConfig(kapi.Resource("endpoints"))
552557
if err != nil {
@@ -563,7 +568,7 @@ func buildKubeApiserverConfig(
563568

564569
kubeApiserverConfig.EndpointReconcilerConfig = master.EndpointReconcilerConfig{
565570
Reconciler: election.NewLeaseEndpointReconciler(endpointRegistry, masterLeases),
566-
Interval: master.DefaultEndpointReconcilerInterval,
571+
Interval: time.Duration(interval) * time.Second,
567572
}
568573
}
569574

@@ -795,8 +800,6 @@ func readCAorNil(file string) ([]byte, error) {
795800
return ioutil.ReadFile(file)
796801
}
797802

798-
func newMasterLeases(storage storage.Interface) election.Leases {
799-
// leaseTTL is in seconds, i.e. 15 means 15 seconds; do NOT do 15*time.Second!
800-
leaseTTL := uint64((master.DefaultEndpointReconcilerInterval + 5*time.Second) / time.Second) // add 5 seconds for wiggle room
801-
return election.NewLeases(storage, "/masterleases/", leaseTTL)
803+
func newMasterLeases(storage storage.Interface, masterEndpointReconcileTTL int) election.Leases {
804+
return election.NewLeases(storage, "/masterleases/", uint64(masterEndpointReconcileTTL))
802805
}

pkg/cmd/server/kubernetes/master/master_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ func TestNewMasterLeasesHasCorrectTTL(t *testing.T) {
2929
storageInterface, _ := restOptions.Decorator(kapi.Scheme, restOptions.StorageConfig, &watchCacheDisabled, nil, "masterleases", nil, nil, nil, nil)
3030
defer server.Terminate(t)
3131

32-
masterLeases := newMasterLeases(storageInterface)
32+
masterLeases := newMasterLeases(storageInterface, 15)
3333
if err := masterLeases.UpdateLease("1.2.3.4"); err != nil {
3434
t.Fatalf("error updating lease: %v", err)
3535
}

0 commit comments

Comments
 (0)