Skip to content

Commit d44aa4c

Browse files
authored
Merge pull request #5894 from tstromberg/wait-nada
Make --wait=false non-blocking, --wait=true blocks on system pods
2 parents 484fdcc + e338ac0 commit d44aa4c

File tree

11 files changed

+118
-200
lines changed

11 files changed

+118
-200
lines changed

Diff for: cmd/minikube/cmd/start.go

+8-14
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ func initMinikubeFlags() {
170170
startCmd.Flags().String(criSocket, "", "The cri socket path to be used.")
171171
startCmd.Flags().String(networkPlugin, "", "The name of the network plugin.")
172172
startCmd.Flags().Bool(enableDefaultCNI, false, "Enable the default CNI plugin (/etc/cni/net.d/k8s.conf). Used in conjunction with \"--network-plugin=cni\".")
173-
startCmd.Flags().Bool(waitUntilHealthy, false, "Wait until Kubernetes core services are healthy before exiting.")
173+
startCmd.Flags().Bool(waitUntilHealthy, true, "Block until the apiserver is servicing API requests")
174174
startCmd.Flags().Duration(waitTimeout, 6*time.Minute, "max time to wait per Kubernetes core services to be healthy.")
175175
startCmd.Flags().Bool(nativeSSH, true, "Use native Golang SSH client (default true). Set to 'false' to use the command line 'ssh' command when accessing the docker machine. Useful for the machine drivers when they will not start with 'Waiting for SSH'.")
176176
startCmd.Flags().Bool(autoUpdate, true, "If set, automatically updates drivers to the latest version. Defaults to true.")
@@ -365,7 +365,13 @@ func runStart(cmd *cobra.Command, args []string) {
365365
if driverName == driver.None {
366366
prepareNone()
367367
}
368-
waitCluster(bs, config)
368+
369+
// Skip pre-existing, because we already waited for health
370+
if viper.GetBool(waitUntilHealthy) && !preExists {
371+
if err := bs.WaitForCluster(config.KubernetesConfig, viper.GetDuration(waitTimeout)); err != nil {
372+
exit.WithError("Wait failed", err)
373+
}
374+
}
369375
if err := showKubectlInfo(kubeconfig, k8sVersion, config.Name); err != nil {
370376
glog.Errorf("kubectl info: %v", err)
371377
}
@@ -389,18 +395,6 @@ func enableAddons() {
389395
}
390396
}
391397

392-
func waitCluster(bs bootstrapper.Bootstrapper, config cfg.MachineConfig) {
393-
var podsToWaitFor []string
394-
395-
if !viper.GetBool(waitUntilHealthy) {
396-
// only wait for apiserver if wait=false
397-
podsToWaitFor = []string{"apiserver"}
398-
}
399-
if err := bs.WaitForPods(config.KubernetesConfig, viper.GetDuration(waitTimeout), podsToWaitFor); err != nil {
400-
exit.WithError("Wait failed", err)
401-
}
402-
}
403-
404398
func displayVersion(version string) {
405399
prefix := ""
406400
if viper.GetString(cfg.MachineProfile) != constants.DefaultMachineName {

Diff for: pkg/minikube/bootstrapper/bootstrapper.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ type Bootstrapper interface {
4141
UpdateCluster(config.KubernetesConfig) error
4242
RestartCluster(config.KubernetesConfig) error
4343
DeleteCluster(config.KubernetesConfig) error
44-
WaitForPods(config.KubernetesConfig, time.Duration, []string) error
44+
WaitForCluster(config.KubernetesConfig, time.Duration) error
4545
// LogCommands returns a map of log type to a command which will display that log.
4646
LogCommands(LogOptions) map[string]string
4747
SetupCerts(cfg config.KubernetesConfig) error

Diff for: pkg/minikube/bootstrapper/kubeadm/kubeadm.go

+92-126
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,7 @@ import (
3838
"github.com/pkg/errors"
3939
"github.com/spf13/viper"
4040
"golang.org/x/sync/errgroup"
41-
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
42-
"k8s.io/apimachinery/pkg/labels"
41+
meta "k8s.io/apimachinery/pkg/apis/meta/v1"
4342
"k8s.io/apimachinery/pkg/util/wait"
4443
"k8s.io/client-go/kubernetes"
4544
kconst "k8s.io/kubernetes/cmd/kubeadm/app/constants"
@@ -66,7 +65,6 @@ const (
6665
defaultCNIConfigPath = "/etc/cni/net.d/k8s.conf"
6766
kubeletServiceFile = "/lib/systemd/system/kubelet.service"
6867
kubeletSystemdConfFile = "/etc/systemd/system/kubelet.service.d/10-kubeadm.conf"
69-
AllPods = "ALL_PODS"
7068
)
7169

7270
const (
@@ -96,22 +94,6 @@ var KubeadmExtraArgsWhitelist = map[int][]string{
9694
},
9795
}
9896

99-
type pod struct {
100-
// Human friendly name
101-
name string
102-
key string
103-
value string
104-
}
105-
106-
// PodsByLayer are queries we run when health checking, sorted roughly by dependency layer
107-
var PodsByLayer = []pod{
108-
{"proxy", "k8s-app", "kube-proxy"},
109-
{"etcd", "component", "etcd"},
110-
{"scheduler", "component", "kube-scheduler"},
111-
{"controller", "component", "kube-controller-manager"},
112-
{"dns", "k8s-app", "kube-dns"},
113-
}
114-
11597
// yamlConfigPath is the path to the kubeadm configuration
11698
var yamlConfigPath = path.Join(vmpath.GuestEphemeralDir, "kubeadm.yaml")
11799

@@ -166,12 +148,13 @@ func (k *Bootstrapper) GetAPIServerStatus(ip net.IP, apiserverPort int) (string,
166148
}
167149
client := &http.Client{Transport: tr}
168150
resp, err := client.Get(url)
169-
glog.Infof("%s response: %v %+v", url, err, resp)
170151
// Connection refused, usually.
171152
if err != nil {
153+
glog.Warningf("%s response: %v %+v", url, err, resp)
172154
return state.Stopped.String(), nil
173155
}
174156
if resp.StatusCode != http.StatusOK {
157+
glog.Warningf("%s response: %v %+v", url, err, resp)
175158
return state.Error.String(), nil
176159
}
177160
return state.Running.String(), nil
@@ -354,7 +337,6 @@ func addAddons(files *[]assets.CopyableFile, data interface{}) error {
354337

355338
// client returns a Kubernetes client to use to speak to a kubeadm launched apiserver
356339
func (k *Bootstrapper) client(k8s config.KubernetesConfig) (*kubernetes.Clientset, error) {
357-
// Catch case if WaitForPods was called with a stale ~/.kube/config
358340
config, err := kapi.ClientConfig(k.contextName)
359341
if err != nil {
360342
return nil, errors.Wrap(err, "client config")
@@ -369,67 +351,104 @@ func (k *Bootstrapper) client(k8s config.KubernetesConfig) (*kubernetes.Clientse
369351
return kubernetes.NewForConfig(config)
370352
}
371353

372-
// WaitForPods blocks until pods specified in podsToWaitFor appear to be healthy.
373-
func (k *Bootstrapper) WaitForPods(k8s config.KubernetesConfig, timeout time.Duration, podsToWaitFor []string) error {
374-
// Do not wait for "k8s-app" pods in the case of CNI, as they are managed
375-
// by a CNI plugin which is usually started after minikube has been brought
376-
// up. Otherwise, minikube won't start, as "k8s-app" pods are not ready.
377-
componentsOnly := k8s.NetworkPlugin == "cni"
378-
out.T(out.WaitingPods, "Waiting for:")
379-
380-
// Wait until the apiserver can answer queries properly. We don't care if the apiserver
381-
// pod shows up as registered, but need the webserver for all subsequent queries.
382-
383-
if shouldWaitForPod("apiserver", podsToWaitFor) {
384-
out.String(" apiserver")
385-
if err := k.waitForAPIServer(k8s); err != nil {
386-
return errors.Wrap(err, "waiting for apiserver")
354+
func (k *Bootstrapper) waitForAPIServerProcess(start time.Time, timeout time.Duration) error {
355+
glog.Infof("waiting for apiserver process to appear ...")
356+
err := wait.PollImmediate(time.Second*1, timeout, func() (bool, error) {
357+
if time.Since(start) > timeout {
358+
return false, fmt.Errorf("cluster wait timed out during process check")
387359
}
360+
rr, ierr := k.c.RunCmd(exec.Command("sudo", "pgrep", "kube-apiserver"))
361+
if ierr != nil {
362+
glog.Warningf("pgrep apiserver: %v cmd: %s", ierr, rr.Command())
363+
return false, nil
364+
}
365+
return true, nil
366+
})
367+
if err != nil {
368+
return fmt.Errorf("apiserver process never appeared")
388369
}
370+
glog.Infof("duration metric: took %s to wait for apiserver process to appear ...", time.Since(start))
371+
return nil
372+
}
373+
374+
func (k *Bootstrapper) waitForAPIServerHealthz(start time.Time, k8s config.KubernetesConfig, timeout time.Duration) error {
375+
glog.Infof("waiting for apiserver healthz status ...")
376+
hStart := time.Now()
377+
healthz := func() (bool, error) {
378+
if time.Since(start) > timeout {
379+
return false, fmt.Errorf("cluster wait timed out during healthz check")
380+
}
389381

382+
status, err := k.GetAPIServerStatus(net.ParseIP(k8s.NodeIP), k8s.NodePort)
383+
if err != nil {
384+
glog.Warningf("status: %v", err)
385+
return false, nil
386+
}
387+
if status != "Running" {
388+
return false, nil
389+
}
390+
return true, nil
391+
}
392+
393+
if err := wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, healthz); err != nil {
394+
return fmt.Errorf("apiserver healthz never reported healthy")
395+
}
396+
glog.Infof("duration metric: took %s to wait for apiserver healthz status ...", time.Since(hStart))
397+
return nil
398+
}
399+
400+
func (k *Bootstrapper) waitForSystemPods(start time.Time, k8s config.KubernetesConfig, timeout time.Duration) error {
401+
glog.Infof("waiting for kube-system pods to appear ...")
402+
pStart := time.Now()
390403
client, err := k.client(k8s)
391404
if err != nil {
392405
return errors.Wrap(err, "client")
393406
}
394407

395-
for _, p := range PodsByLayer {
396-
if componentsOnly && p.key != "component" { // skip component check if network plugin is cni
397-
continue
408+
podStart := time.Time{}
409+
podList := func() (bool, error) {
410+
if time.Since(start) > timeout {
411+
return false, fmt.Errorf("cluster wait timed out during pod check")
412+
}
413+
// Wait for any system pod, as waiting for apiserver may block until etcd
414+
pods, err := client.CoreV1().Pods("kube-system").List(meta.ListOptions{})
415+
if len(pods.Items) < 2 {
416+
podStart = time.Time{}
417+
return false, nil
418+
}
419+
if err != nil {
420+
podStart = time.Time{}
421+
return false, nil
398422
}
399-
if !shouldWaitForPod(p.name, podsToWaitFor) {
400-
continue
423+
if podStart.IsZero() {
424+
podStart = time.Now()
401425
}
402-
out.String(" %s", p.name)
403-
selector := labels.SelectorFromSet(labels.Set(map[string]string{p.key: p.value}))
404-
if err := kapi.WaitForPodsWithLabelRunning(client, "kube-system", selector, timeout); err != nil {
405-
return errors.Wrap(err, fmt.Sprintf("waiting for %s=%s", p.key, p.value))
426+
427+
glog.Infof("%d kube-system pods found since %s", len(pods.Items), podStart)
428+
if time.Since(podStart) > 2*kconst.APICallRetryInterval {
429+
glog.Infof("stability requirement met, returning")
430+
return true, nil
406431
}
432+
return false, nil
433+
}
434+
if err = wait.PollImmediate(kconst.APICallRetryInterval, kconst.DefaultControlPlaneTimeout, podList); err != nil {
435+
return fmt.Errorf("apiserver never returned a pod list")
407436
}
408-
out.Ln("")
437+
glog.Infof("duration metric: took %s to wait for pod list to return data ...", time.Since(pStart))
409438
return nil
410439
}
411440

412-
// shouldWaitForPod returns true if:
413-
// 1. podsToWaitFor is nil
414-
// 2. name is in podsToWaitFor
415-
// 3. ALL_PODS is in podsToWaitFor
416-
// else, return false
417-
func shouldWaitForPod(name string, podsToWaitFor []string) bool {
418-
if podsToWaitFor == nil {
419-
return true
420-
}
421-
if len(podsToWaitFor) == 0 {
422-
return false
423-
}
424-
for _, p := range podsToWaitFor {
425-
if p == AllPods {
426-
return true
427-
}
428-
if p == name {
429-
return true
430-
}
441+
// WaitForCluster blocks until the cluster appears to be healthy
442+
func (k *Bootstrapper) WaitForCluster(k8s config.KubernetesConfig, timeout time.Duration) error {
443+
start := time.Now()
444+
out.T(out.Waiting, "Waiting for cluster to come online ...")
445+
if err := k.waitForAPIServerProcess(start, timeout); err != nil {
446+
return err
431447
}
432-
return false
448+
if err := k.waitForAPIServerHealthz(start, k8s, timeout); err != nil {
449+
return err
450+
}
451+
return k.waitForSystemPods(start, k8s, timeout)
433452
}
434453

435454
// RestartCluster restarts the Kubernetes cluster configured by kubeadm
@@ -472,11 +491,15 @@ func (k *Bootstrapper) RestartCluster(k8s config.KubernetesConfig) error {
472491
}
473492
}
474493

475-
if err := k.waitForAPIServer(k8s); err != nil {
476-
return errors.Wrap(err, "waiting for apiserver")
494+
// We must ensure that the apiserver is healthy before proceeding
495+
if err := k.waitForAPIServerHealthz(time.Now(), k8s, kconst.DefaultControlPlaneTimeout); err != nil {
496+
return errors.Wrap(err, "apiserver healthz")
497+
}
498+
if err := k.waitForSystemPods(time.Now(), k8s, kconst.DefaultControlPlaneTimeout); err != nil {
499+
return errors.Wrap(err, "system pods")
477500
}
478501

479-
// restart the proxy and coredns
502+
// Explicitly re-enable kubeadm addons (proxy, coredns) so that they will check for IP or configuration changes.
480503
if rr, err := k.c.RunCmd(exec.Command("/bin/bash", "-c", fmt.Sprintf("%s phase addon all --config %s", baseCmd, yamlConfigPath))); err != nil {
481504
return errors.Wrapf(err, fmt.Sprintf("addon phase cmd:%q", rr.Command()))
482505
}
@@ -487,63 +510,6 @@ func (k *Bootstrapper) RestartCluster(k8s config.KubernetesConfig) error {
487510
return nil
488511
}
489512

490-
// waitForAPIServer waits for the apiserver to start up
491-
func (k *Bootstrapper) waitForAPIServer(k8s config.KubernetesConfig) error {
492-
start := time.Now()
493-
defer func() {
494-
glog.Infof("duration metric: took %s to wait for apiserver status ...", time.Since(start))
495-
}()
496-
497-
glog.Infof("Waiting for apiserver process ...")
498-
// To give a better error message, first check for process existence via ssh
499-
// Needs minutes in case the image isn't cached (such as with v1.10.x)
500-
err := wait.PollImmediate(time.Millisecond*300, time.Minute*3, func() (bool, error) {
501-
rr, ierr := k.c.RunCmd(exec.Command("sudo", "pgrep", "kube-apiserver"))
502-
if ierr != nil {
503-
glog.Warningf("pgrep apiserver: %v cmd: %s", ierr, rr.Command())
504-
return false, nil
505-
}
506-
return true, nil
507-
})
508-
if err != nil {
509-
return fmt.Errorf("apiserver process never appeared")
510-
}
511-
512-
glog.Infof("Waiting for apiserver to port healthy status ...")
513-
var client *kubernetes.Clientset
514-
f := func() (bool, error) {
515-
status, err := k.GetAPIServerStatus(net.ParseIP(k8s.NodeIP), k8s.NodePort)
516-
glog.Infof("apiserver status: %s, err: %v", status, err)
517-
if err != nil {
518-
glog.Warningf("status: %v", err)
519-
return false, nil
520-
}
521-
if status != "Running" {
522-
return false, nil
523-
}
524-
// Make sure apiserver pod is retrievable
525-
if client == nil {
526-
// We only want to get the clientset once, because this line takes ~1 second to complete
527-
client, err = k.client(k8s)
528-
if err != nil {
529-
glog.Warningf("get kubernetes client: %v", err)
530-
return false, nil
531-
}
532-
}
533-
534-
_, err = client.CoreV1().Pods("kube-system").Get("kube-apiserver-minikube", metav1.GetOptions{})
535-
if err != nil {
536-
return false, nil
537-
}
538-
539-
return true, nil
540-
// TODO: Check apiserver/kubelet logs for fatal errors so that users don't
541-
// need to wait minutes to find out their flag didn't work.
542-
}
543-
err = wait.PollImmediate(kconst.APICallRetryInterval, 2*kconst.DefaultControlPlaneTimeout, f)
544-
return err
545-
}
546-
547513
// DeleteCluster removes the components that were started earlier
548514
func (k *Bootstrapper) DeleteCluster(k8s config.KubernetesConfig) error {
549515
version, err := parseKubernetesVersion(k8s.KubernetesVersion)

Diff for: pkg/minikube/bootstrapper/kubeadm/kubeadm_test.go

-42
Original file line numberDiff line numberDiff line change
@@ -363,45 +363,3 @@ func TestGenerateConfig(t *testing.T) {
363363
}
364364
}
365365
}
366-
367-
func TestShouldWaitForPod(t *testing.T) {
368-
tests := []struct {
369-
description string
370-
pod string
371-
podsToWaitFor []string
372-
expected bool
373-
}{
374-
{
375-
description: "pods to wait for is nil",
376-
pod: "apiserver",
377-
expected: true,
378-
}, {
379-
description: "pods to wait for is empty",
380-
pod: "apiserver",
381-
podsToWaitFor: []string{},
382-
}, {
383-
description: "pod is in podsToWaitFor",
384-
pod: "apiserver",
385-
podsToWaitFor: []string{"etcd", "apiserver"},
386-
expected: true,
387-
}, {
388-
description: "pod is not in podsToWaitFor",
389-
pod: "apiserver",
390-
podsToWaitFor: []string{"etcd", "gvisor"},
391-
}, {
392-
description: "wait for all pods",
393-
pod: "apiserver",
394-
podsToWaitFor: []string{"ALL_PODS"},
395-
expected: true,
396-
},
397-
}
398-
399-
for _, test := range tests {
400-
t.Run(test.description, func(t *testing.T) {
401-
actual := shouldWaitForPod(test.pod, test.podsToWaitFor)
402-
if actual != test.expected {
403-
t.Fatalf("unexpected diff: got %t, expected %t", actual, test.expected)
404-
}
405-
})
406-
}
407-
}

0 commit comments

Comments
 (0)