diff --git a/pkg/cmd/server/api/types.go b/pkg/cmd/server/api/types.go index fd12890c0284..ee0066570993 100644 --- a/pkg/cmd/server/api/types.go +++ b/pkg/cmd/server/api/types.go @@ -1151,6 +1151,11 @@ type KubernetesMasterConfig struct { // MasterCount is the number of expected masters that should be running. This value defaults to 1 and may be set to a positive integer, // or if set to -1, indicates this is part of a cluster. MasterCount int + // MasterEndpointReconcileTTL sets the time to live in seconds of an endpoint record recorded by each master. The endpoints are checked + // at an interval that is 2/3 of this value and this value defaults to 15s if unset. In very large clusters, this value may be increased to + // reduce the possibility that the master endpoint record expires (due to other load on the etcd server) and causes masters to drop in and + // out of the kubernetes service record. It is not recommended to set this value below 15s. + MasterEndpointReconcileTTL int // ServicesSubnet is the subnet to use for assigning service IPs ServicesSubnet string // ServicesNodePortRange is the range to use for assigning service public ports on a host. diff --git a/pkg/cmd/server/api/v1/conversions.go b/pkg/cmd/server/api/v1/conversions.go index 29466832a8c6..5b09a5ef2e92 100644 --- a/pkg/cmd/server/api/v1/conversions.go +++ b/pkg/cmd/server/api/v1/conversions.go @@ -112,6 +112,9 @@ func addDefaultingFuncs(scheme *runtime.Scheme) error { if obj.MasterCount == 0 { obj.MasterCount = 1 } + if obj.MasterEndpointReconcileTTL == 0 { + obj.MasterEndpointReconcileTTL = 15 + } if len(obj.APILevels) == 0 { obj.APILevels = internal.DefaultKubernetesAPILevels } diff --git a/pkg/cmd/server/api/v1/swagger_doc.go b/pkg/cmd/server/api/v1/swagger_doc.go index 38500a3c133e..15041ad50c81 100644 --- a/pkg/cmd/server/api/v1/swagger_doc.go +++ b/pkg/cmd/server/api/v1/swagger_doc.go @@ -378,21 +378,22 @@ func (KubeletConnectionInfo) SwaggerDoc() map[string]string { } var map_KubernetesMasterConfig = map[string]string{ - "": "KubernetesMasterConfig holds the necessary configuration options for the Kubernetes master", - "apiLevels": "APILevels is a list of API levels that should be enabled on startup: v1 as examples", - "disabledAPIGroupVersions": "DisabledAPIGroupVersions is a map of groups to the versions (or *) that should be disabled.", - "masterIP": "MasterIP is the public IP address of kubernetes stuff. If empty, the first result from net.InterfaceAddrs will be used.", - "masterCount": "MasterCount is the number of expected masters that should be running. This value defaults to 1 and may be set to a positive integer, or if set to -1, indicates this is part of a cluster.", - "servicesSubnet": "ServicesSubnet is the subnet to use for assigning service IPs", - "servicesNodePortRange": "ServicesNodePortRange is the range to use for assigning service public ports on a host.", - "staticNodeNames": "StaticNodeNames is the list of nodes that are statically known", - "schedulerConfigFile": "SchedulerConfigFile points to a file that describes how to set up the scheduler. If empty, you get the default scheduling rules.", - "podEvictionTimeout": "PodEvictionTimeout controls grace period for deleting pods on failed nodes. It takes valid time duration string. If empty, you get the default pod eviction timeout.", - "proxyClientInfo": "ProxyClientInfo specifies the client cert/key to use when proxying to pods", - "admissionConfig": "AdmissionConfig contains admission control plugin configuration.", - "apiServerArguments": "APIServerArguments are key value pairs that will be passed directly to the Kube apiserver that match the apiservers's command line arguments. These are not migrated, but if you reference a value that does not exist the server will not start. These values may override other settings in KubernetesMasterConfig which may cause invalid configurations.", - "controllerArguments": "ControllerArguments are key value pairs that will be passed directly to the Kube controller manager that match the controller manager's command line arguments. These are not migrated, but if you reference a value that does not exist the server will not start. These values may override other settings in KubernetesMasterConfig which may cause invalid configurations.", - "schedulerArguments": "SchedulerArguments are key value pairs that will be passed directly to the Kube scheduler that match the scheduler's command line arguments. These are not migrated, but if you reference a value that does not exist the server will not start. These values may override other settings in KubernetesMasterConfig which may cause invalid configurations.", + "": "KubernetesMasterConfig holds the necessary configuration options for the Kubernetes master", + "apiLevels": "APILevels is a list of API levels that should be enabled on startup: v1 as examples", + "disabledAPIGroupVersions": "DisabledAPIGroupVersions is a map of groups to the versions (or *) that should be disabled.", + "masterIP": "MasterIP is the public IP address of kubernetes stuff. If empty, the first result from net.InterfaceAddrs will be used.", + "masterCount": "MasterCount is the number of expected masters that should be running. This value defaults to 1 and may be set to a positive integer, or if set to -1, indicates this is part of a cluster.", + "masterEndpointReconcileTTL": "MasterEndpointReconcileTTL sets the time to live in seconds of an endpoint record recorded by each master. The endpoints are checked at an interval that is 2/3 of this value and this value defaults to 15s if unset. In very large clusters, this value may be increased to reduce the possibility that the master endpoint record expires (due to other load on the etcd server) and causes masters to drop in and out of the kubernetes service record. It is not recommended to set this value below 15s.", + "servicesSubnet": "ServicesSubnet is the subnet to use for assigning service IPs", + "servicesNodePortRange": "ServicesNodePortRange is the range to use for assigning service public ports on a host.", + "staticNodeNames": "StaticNodeNames is the list of nodes that are statically known", + "schedulerConfigFile": "SchedulerConfigFile points to a file that describes how to set up the scheduler. If empty, you get the default scheduling rules.", + "podEvictionTimeout": "PodEvictionTimeout controls grace period for deleting pods on failed nodes. It takes valid time duration string. If empty, you get the default pod eviction timeout.", + "proxyClientInfo": "ProxyClientInfo specifies the client cert/key to use when proxying to pods", + "admissionConfig": "AdmissionConfig contains admission control plugin configuration.", + "apiServerArguments": "APIServerArguments are key value pairs that will be passed directly to the Kube apiserver that match the apiservers's command line arguments. These are not migrated, but if you reference a value that does not exist the server will not start. These values may override other settings in KubernetesMasterConfig which may cause invalid configurations.", + "controllerArguments": "ControllerArguments are key value pairs that will be passed directly to the Kube controller manager that match the controller manager's command line arguments. These are not migrated, but if you reference a value that does not exist the server will not start. These values may override other settings in KubernetesMasterConfig which may cause invalid configurations.", + "schedulerArguments": "SchedulerArguments are key value pairs that will be passed directly to the Kube scheduler that match the scheduler's command line arguments. These are not migrated, but if you reference a value that does not exist the server will not start. These values may override other settings in KubernetesMasterConfig which may cause invalid configurations.", } func (KubernetesMasterConfig) SwaggerDoc() map[string]string { diff --git a/pkg/cmd/server/api/v1/types.go b/pkg/cmd/server/api/v1/types.go index f43bec3c6ea3..62419003d38a 100644 --- a/pkg/cmd/server/api/v1/types.go +++ b/pkg/cmd/server/api/v1/types.go @@ -1082,6 +1082,11 @@ type KubernetesMasterConfig struct { // MasterCount is the number of expected masters that should be running. This value defaults to 1 and may be set to a positive integer, // or if set to -1, indicates this is part of a cluster. MasterCount int `json:"masterCount"` + // MasterEndpointReconcileTTL sets the time to live in seconds of an endpoint record recorded by each master. The endpoints are checked + // at an interval that is 2/3 of this value and this value defaults to 15s if unset. In very large clusters, this value may be increased to + // reduce the possibility that the master endpoint record expires (due to other load on the etcd server) and causes masters to drop in and + // out of the kubernetes service record. It is not recommended to set this value below 15s. + MasterEndpointReconcileTTL int `json:"masterEndpointReconcileTTL"` // ServicesSubnet is the subnet to use for assigning service IPs ServicesSubnet string `json:"servicesSubnet"` // ServicesNodePortRange is the range to use for assigning service public ports on a host. diff --git a/pkg/cmd/server/api/v1/types_test.go b/pkg/cmd/server/api/v1/types_test.go index b2359a945628..6e4a9b1c7f28 100644 --- a/pkg/cmd/server/api/v1/types_test.go +++ b/pkg/cmd/server/api/v1/types_test.go @@ -198,6 +198,7 @@ kubernetesMasterConfig: controllerArguments: null disabledAPIGroupVersions: null masterCount: 0 + masterEndpointReconcileTTL: 0 masterIP: "" podEvictionTimeout: "" proxyClientInfo: diff --git a/pkg/cmd/server/kubernetes/master/master_config.go b/pkg/cmd/server/kubernetes/master/master_config.go index db0d7b747975..acf8b135a9fe 100644 --- a/pkg/cmd/server/kubernetes/master/master_config.go +++ b/pkg/cmd/server/kubernetes/master/master_config.go @@ -537,7 +537,11 @@ func buildKubeApiserverConfig( } if kubeApiserverConfig.EnableCoreControllers { - glog.V(2).Info("Using the lease endpoint reconciler") + ttl := masterConfig.KubernetesMasterConfig.MasterEndpointReconcileTTL + interval := ttl * 2 / 3 + + glog.V(2).Infof("Using the lease endpoint reconciler with TTL=%ds and interval=%ds", ttl, interval) + config, err := kubeApiserverConfig.StorageFactory.NewConfig(kapi.Resource("apiServerIPInfo")) if err != nil { return nil, err @@ -546,7 +550,8 @@ func buildKubeApiserverConfig( if err != nil { return nil, err } - masterLeases := newMasterLeases(leaseStorage) + + masterLeases := newMasterLeases(leaseStorage, ttl) endpointConfig, err := kubeApiserverConfig.StorageFactory.NewConfig(kapi.Resource("endpoints")) if err != nil { @@ -563,7 +568,7 @@ func buildKubeApiserverConfig( kubeApiserverConfig.EndpointReconcilerConfig = master.EndpointReconcilerConfig{ Reconciler: election.NewLeaseEndpointReconciler(endpointRegistry, masterLeases), - Interval: master.DefaultEndpointReconcilerInterval, + Interval: time.Duration(interval) * time.Second, } } @@ -795,8 +800,6 @@ func readCAorNil(file string) ([]byte, error) { return ioutil.ReadFile(file) } -func newMasterLeases(storage storage.Interface) election.Leases { - // leaseTTL is in seconds, i.e. 15 means 15 seconds; do NOT do 15*time.Second! - leaseTTL := uint64((master.DefaultEndpointReconcilerInterval + 5*time.Second) / time.Second) // add 5 seconds for wiggle room - return election.NewLeases(storage, "/masterleases/", leaseTTL) +func newMasterLeases(storage storage.Interface, masterEndpointReconcileTTL int) election.Leases { + return election.NewLeases(storage, "/masterleases/", uint64(masterEndpointReconcileTTL)) } diff --git a/pkg/cmd/server/kubernetes/master/master_test.go b/pkg/cmd/server/kubernetes/master/master_test.go index e79bada15a2b..994904fae990 100644 --- a/pkg/cmd/server/kubernetes/master/master_test.go +++ b/pkg/cmd/server/kubernetes/master/master_test.go @@ -29,7 +29,7 @@ func TestNewMasterLeasesHasCorrectTTL(t *testing.T) { storageInterface, _ := restOptions.Decorator(kapi.Scheme, restOptions.StorageConfig, &watchCacheDisabled, nil, "masterleases", nil, nil, nil, nil) defer server.Terminate(t) - masterLeases := newMasterLeases(storageInterface) + masterLeases := newMasterLeases(storageInterface, 15) if err := masterLeases.UpdateLease("1.2.3.4"); err != nil { t.Fatalf("error updating lease: %v", err) }