Skip to content

Commit 42863bf

Browse files
authored
Merge pull request #3774 from tstromberg/wait-longer
Improve reliability of kube-proxy configmap updates (retry, block until pods are up)
2 parents dffdd82 + b9675da commit 42863bf

File tree

4 files changed

+119
-37
lines changed

4 files changed

+119
-37
lines changed

pkg/minikube/bootstrapper/kubeadm/kubeadm.go

+64-4
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import (
3535
download "github.com/jimmidyson/go-download"
3636
"github.com/pkg/errors"
3737
"golang.org/x/sync/errgroup"
38+
"k8s.io/apimachinery/pkg/labels"
3839
"k8s.io/minikube/pkg/minikube/assets"
3940
"k8s.io/minikube/pkg/minikube/bootstrapper"
4041
"k8s.io/minikube/pkg/minikube/config"
@@ -64,6 +65,24 @@ var SkipPreflights = []string{
6465
"CRI",
6566
}
6667

68+
type pod struct {
69+
// Human friendly name
70+
name string
71+
key string
72+
value string
73+
}
74+
75+
// PodsByLayer are queries we run when health checking, sorted roughly by dependency layer
76+
var PodsByLayer = []pod{
77+
{"apiserver", "component", "kube-apiserver"},
78+
{"proxy", "k8s-app", "kube-proxy"},
79+
{"etcd", "component", "etcd"},
80+
{"scheduler", "component", "kube-scheduler"},
81+
{"controller", "component", "kube-controller-manager"},
82+
{"addon-manager", "component", "kube-addon-manager"},
83+
{"dns", "k8s-app", "kube-dns"},
84+
}
85+
6786
// SkipAdditionalPreflights are additional preflights we skip depending on the runtime in use.
6887
var SkipAdditionalPreflights = map[string][]string{}
6988

@@ -175,12 +194,20 @@ func (k *KubeadmBootstrapper) StartCluster(k8s config.KubernetesConfig) error {
175194
}
176195
}
177196

178-
// NOTE: We have not yet asserted that we can access the apiserver. Now would be a great time to do so.
197+
if err := waitForPods(false); err != nil {
198+
return errors.Wrap(err, "wait")
199+
}
200+
179201
console.OutStyle("permissions", "Configuring cluster permissions ...")
180202
if err := util.RetryAfter(100, elevateKubeSystemPrivileges, time.Millisecond*500); err != nil {
181203
return errors.Wrap(err, "timed out waiting to elevate kube-system RBAC privileges")
182204
}
183205

206+
// Make sure elevating privileges didn't screw anything up
207+
if err := waitForPods(true); err != nil {
208+
return errors.Wrap(err, "wait")
209+
}
210+
184211
return nil
185212
}
186213

@@ -204,6 +231,31 @@ func addAddons(files *[]assets.CopyableFile) error {
204231
return nil
205232
}
206233

234+
// waitForPods waits until the important Kubernetes pods are in running state
235+
func waitForPods(quiet bool) error {
236+
if !quiet {
237+
console.OutStyle("waiting-pods", "Waiting for pods:")
238+
}
239+
client, err := util.GetClient()
240+
if err != nil {
241+
return errors.Wrap(err, "k8s client")
242+
}
243+
244+
for _, p := range PodsByLayer {
245+
if !quiet {
246+
console.Out(" %s", p.name)
247+
}
248+
selector := labels.SelectorFromSet(labels.Set(map[string]string{p.key: p.value}))
249+
if err := util.WaitForPodsWithLabelRunning(client, "kube-system", selector); err != nil {
250+
return errors.Wrap(err, fmt.Sprintf("waiting for %s=%s", p.key, p.value))
251+
}
252+
}
253+
if !quiet {
254+
console.OutLn("")
255+
}
256+
return nil
257+
}
258+
207259
// RestartCluster restarts the Kubernetes cluster configured by kubeadm
208260
func (k *KubeadmBootstrapper) RestartCluster(k8s config.KubernetesConfig) error {
209261
version, err := ParseKubernetesVersion(k8s.KubernetesVersion)
@@ -232,12 +284,20 @@ func (k *KubeadmBootstrapper) RestartCluster(k8s config.KubernetesConfig) error
232284
}
233285
}
234286

235-
// NOTE: Perhaps now would be a good time to check apiserver health?
236-
console.OutStyle("waiting", "Waiting for kube-proxy to come back up ...")
237-
if err := restartKubeProxy(k8s); err != nil {
287+
if err := waitForPods(false); err != nil {
288+
return errors.Wrap(err, "wait")
289+
}
290+
291+
console.OutStyle("reconfiguring", "Updating kube-proxy configuration ...")
292+
if err = util.RetryAfter(5, func() error { return updateKubeProxyConfigMap(k8s) }, 5*time.Second); err != nil {
238293
return errors.Wrap(err, "restarting kube-proxy")
239294
}
240295

296+
// Make sure the kube-proxy restart didn't screw anything up.
297+
if err := waitForPods(true); err != nil {
298+
return errors.Wrap(err, "wait")
299+
}
300+
241301
return nil
242302
}
243303

pkg/minikube/bootstrapper/kubeadm/util.go

+26-8
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ import (
2828
clientv1 "k8s.io/api/core/v1"
2929
rbacv1beta1 "k8s.io/api/rbac/v1beta1"
3030
apierrs "k8s.io/apimachinery/pkg/api/errors"
31-
"k8s.io/apimachinery/pkg/apis/meta/v1"
3231
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
32+
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3333
"k8s.io/apimachinery/pkg/labels"
3434
"k8s.io/apimachinery/pkg/types"
3535
"k8s.io/apimachinery/pkg/util/strategicpatch"
@@ -155,22 +155,23 @@ users:
155155
`
156156
)
157157

158-
func restartKubeProxy(k8s config.KubernetesConfig) error {
158+
// updateKubeProxyConfigMap updates the IP & port kube-proxy listens on, and restarts it.
159+
func updateKubeProxyConfigMap(k8s config.KubernetesConfig) error {
159160
client, err := util.GetClient()
160161
if err != nil {
161162
return errors.Wrap(err, "getting k8s client")
162163
}
163164

164165
selector := labels.SelectorFromSet(labels.Set(map[string]string{"k8s-app": "kube-proxy"}))
165166
if err := util.WaitForPodsWithLabelRunning(client, "kube-system", selector); err != nil {
166-
return errors.Wrap(err, "waiting for kube-proxy to be up for configmap update")
167+
return errors.Wrap(err, "kube-proxy not running")
167168
}
168169

169170
cfgMap, err := client.CoreV1().ConfigMaps("kube-system").Get("kube-proxy", metav1.GetOptions{})
170171
if err != nil {
171-
return errors.Wrap(err, "getting kube-proxy configmap")
172+
return &util.RetriableError{Err: errors.Wrap(err, "getting kube-proxy configmap")}
172173
}
173-
174+
glog.Infof("kube-proxy config: %v", cfgMap.Data[kubeconfigConf])
174175
t := template.Must(template.New("kubeProxyTmpl").Parse(kubeProxyConfigmapTmpl))
175176
opts := struct {
176177
AdvertiseAddress string
@@ -188,10 +189,21 @@ func restartKubeProxy(k8s config.KubernetesConfig) error {
188189
if cfgMap.Data == nil {
189190
cfgMap.Data = map[string]string{}
190191
}
191-
cfgMap.Data[kubeconfigConf] = strings.TrimSuffix(kubeconfig.String(), "\n")
192192

193+
updated := strings.TrimSuffix(kubeconfig.String(), "\n")
194+
glog.Infof("updated kube-proxy config: %s", updated)
195+
196+
// An optimization, but also one that's unlikely, as kubeadm writes the address as 'localhost'
197+
if cfgMap.Data[kubeconfigConf] == updated {
198+
glog.Infof("kube-proxy config appears to require no change, not restarting kube-proxy")
199+
return nil
200+
}
201+
cfgMap.Data[kubeconfigConf] = updated
202+
203+
// Make this step retriable, as it can fail with:
204+
// "Operation cannot be fulfilled on configmaps "kube-proxy": the object has been modified; please apply your changes to the latest version and try again"
193205
if _, err := client.CoreV1().ConfigMaps("kube-system").Update(cfgMap); err != nil {
194-
return errors.Wrap(err, "updating configmap")
206+
return &util.RetriableError{Err: errors.Wrap(err, "updating configmap")}
195207
}
196208

197209
pods, err := client.CoreV1().Pods("kube-system").List(metav1.ListOptions{
@@ -201,10 +213,16 @@ func restartKubeProxy(k8s config.KubernetesConfig) error {
201213
return errors.Wrap(err, "listing kube-proxy pods")
202214
}
203215
for _, pod := range pods.Items {
216+
// Retriable, as known to fail with: pods "<name>" not found
204217
if err := client.CoreV1().Pods(pod.Namespace).Delete(pod.Name, &metav1.DeleteOptions{}); err != nil {
205-
return errors.Wrapf(err, "deleting pod %+v", pod)
218+
return &util.RetriableError{Err: errors.Wrapf(err, "deleting pod %+v", pod)}
206219
}
207220
}
208221

222+
// Wait for the scheduler to restart kube-proxy
223+
if err := util.WaitForPodsWithLabelRunning(client, "kube-system", selector); err != nil {
224+
return errors.Wrap(err, "kube-proxy not running")
225+
}
226+
209227
return nil
210228
}

pkg/minikube/console/style.go

+25-23
Original file line numberDiff line numberDiff line change
@@ -41,29 +41,31 @@ type style struct {
4141
// styles is a map of style name to style struct
4242
// For consistency, ensure that emojis added render with the same width across platforms.
4343
var styles = map[string]style{
44-
"happy": {Prefix: "😄 ", LowPrefix: "o "},
45-
"success": {Prefix: "✅ "},
46-
"failure": {Prefix: "❌ ", LowPrefix: "X "},
47-
"conflict": {Prefix: "💥 ", LowPrefix: "x "},
48-
"fatal": {Prefix: "💣 ", LowPrefix: "! "},
49-
"notice": {Prefix: "📌 ", LowPrefix: "* "},
50-
"ready": {Prefix: "🏄 ", LowPrefix: "= "},
51-
"running": {Prefix: "🏃 ", LowPrefix: ": "},
52-
"provisioning": {Prefix: "🌱 ", LowPrefix: "> "},
53-
"restarting": {Prefix: "🔄 ", LowPrefix: ": "},
54-
"stopping": {Prefix: "✋ ", LowPrefix: ": "},
55-
"stopped": {Prefix: "🛑 "},
56-
"warning": {Prefix: "⚠️ ", LowPrefix: "! "},
57-
"waiting": {Prefix: "⌛ ", LowPrefix: ": "},
58-
"usage": {Prefix: "💡 "},
59-
"launch": {Prefix: "🚀 "},
60-
"sad": {Prefix: "😿 ", LowPrefix: "* "},
61-
"thumbs-up": {Prefix: "👍 "},
62-
"option": {Prefix: " ▪ "}, // Indented bullet
63-
"command": {Prefix: " ▪ "}, // Indented bullet
64-
"log-entry": {Prefix: " "}, // Indent
65-
"crushed": {Prefix: "💔 "},
66-
"url": {Prefix: "👉 "},
44+
"happy": {Prefix: "😄 ", LowPrefix: "o "},
45+
"success": {Prefix: "✅ "},
46+
"failure": {Prefix: "❌ ", LowPrefix: "X "},
47+
"conflict": {Prefix: "💥 ", LowPrefix: "x "},
48+
"fatal": {Prefix: "💣 ", LowPrefix: "! "},
49+
"notice": {Prefix: "📌 ", LowPrefix: "* "},
50+
"ready": {Prefix: "🏄 ", LowPrefix: "= "},
51+
"running": {Prefix: "🏃 ", LowPrefix: ": "},
52+
"provisioning": {Prefix: "🌱 ", LowPrefix: "> "},
53+
"restarting": {Prefix: "🔄 ", LowPrefix: ": "},
54+
"reconfiguring": {Prefix: "📯 ", LowPrefix: ": "},
55+
"stopping": {Prefix: "✋ ", LowPrefix: ": "},
56+
"stopped": {Prefix: "🛑 "},
57+
"warning": {Prefix: "⚠️ ", LowPrefix: "! "},
58+
"waiting": {Prefix: "⌛ ", LowPrefix: ": "},
59+
"waiting-pods": {Prefix: "⌛ ", LowPrefix: ": ", OmitNewline: true},
60+
"usage": {Prefix: "💡 "},
61+
"launch": {Prefix: "🚀 "},
62+
"sad": {Prefix: "😿 ", LowPrefix: "* "},
63+
"thumbs-up": {Prefix: "👍 "},
64+
"option": {Prefix: " ▪ "}, // Indented bullet
65+
"command": {Prefix: " ▪ "}, // Indented bullet
66+
"log-entry": {Prefix: " "}, // Indent
67+
"crushed": {Prefix: "💔 "},
68+
"url": {Prefix: "👉 "},
6769

6870
// Specialized purpose styles
6971
"iso-download": {Prefix: "💿 ", LowPrefix: "@ "},

pkg/util/kubernetes.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import (
2323
"github.com/golang/glog"
2424
"github.com/pkg/errors"
2525
appsv1 "k8s.io/api/apps/v1"
26-
"k8s.io/api/core/v1"
26+
v1 "k8s.io/api/core/v1"
2727
apierrs "k8s.io/apimachinery/pkg/api/errors"
2828
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2929
"k8s.io/apimachinery/pkg/fields"
@@ -39,8 +39,10 @@ import (
3939
)
4040

4141
var (
42+
// ReasonableMutateTime is how long to wait for basic object mutations, such as deletions, to show up
4243
ReasonableMutateTime = time.Minute * 1
43-
ReasonableStartTime = time.Minute * 5
44+
// ReasonableStartTime is how long to wait for pods to start, considering dependency chains & slow networks.
45+
ReasonableStartTime = time.Minute * 10
4446
)
4547

4648
type PodStore struct {

0 commit comments

Comments
 (0)