Skip to content

Commit d5281cb

Browse files
deads2kbertinatto
authored andcommitted
UPSTREAM: <carry>: when only this kube-apiserver can fulfill the kubernetes.default.svc, don't wait for aggregated availability
1 parent ddde68f commit d5281cb

File tree

2 files changed

+41
-10
lines changed

2 files changed

+41
-10
lines changed

openshift-kube-apiserver/openshiftkubeapiserver/patch.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ func OpenShiftKubeAPIServerConfigPatch(genericConfig *genericapiserver.Config, k
108108
}
109109
// END HANDLER CHAIN
110110

111-
openshiftAPIServiceReachabilityCheck := newOpenshiftAPIServiceReachabilityCheck()
112-
oauthAPIServiceReachabilityCheck := newOAuthPIServiceReachabilityCheck()
111+
openshiftAPIServiceReachabilityCheck := newOpenshiftAPIServiceReachabilityCheck(genericConfig.PublicAddress)
112+
oauthAPIServiceReachabilityCheck := newOAuthPIServiceReachabilityCheck(genericConfig.PublicAddress)
113113
genericConfig.ReadyzChecks = append(genericConfig.ReadyzChecks, openshiftAPIServiceReachabilityCheck, oauthAPIServiceReachabilityCheck)
114114

115115
genericConfig.AddPostStartHookOrDie("openshift.io-startkubeinformers", func(context genericapiserver.PostStartHookContext) error {

openshift-kube-apiserver/openshiftkubeapiserver/sdn_readyz_wait.go

+39-8
Original file line numberDiff line numberDiff line change
@@ -18,29 +18,34 @@ import (
1818
"k8s.io/klog/v2"
1919
)
2020

21-
func newOpenshiftAPIServiceReachabilityCheck() *aggregatedAPIServiceAvailabilityCheck {
22-
return newAggregatedAPIServiceReachabilityCheck("openshift-apiserver", "api")
21+
func newOpenshiftAPIServiceReachabilityCheck(ipForKubernetesDefaultService net.IP) *aggregatedAPIServiceAvailabilityCheck {
22+
return newAggregatedAPIServiceReachabilityCheck(ipForKubernetesDefaultService, "openshift-apiserver", "api")
2323
}
2424

25-
func newOAuthPIServiceReachabilityCheck() *aggregatedAPIServiceAvailabilityCheck {
26-
return newAggregatedAPIServiceReachabilityCheck("openshift-oauth-apiserver", "api")
25+
func newOAuthPIServiceReachabilityCheck(ipForKubernetesDefaultService net.IP) *aggregatedAPIServiceAvailabilityCheck {
26+
return newAggregatedAPIServiceReachabilityCheck(ipForKubernetesDefaultService, "openshift-oauth-apiserver", "api")
2727
}
2828

2929
// if the API service is not found, then this check returns quickly.
3030
// if the endpoint is not accessible within 60 seconds, we report ready no matter what
3131
// otherwise, wait for up to 60 seconds to be able to reach the apiserver
32-
func newAggregatedAPIServiceReachabilityCheck(namespace, service string) *aggregatedAPIServiceAvailabilityCheck {
32+
func newAggregatedAPIServiceReachabilityCheck(ipForKubernetesDefaultService net.IP, namespace, service string) *aggregatedAPIServiceAvailabilityCheck {
3333
return &aggregatedAPIServiceAvailabilityCheck{
34-
done: make(chan struct{}),
35-
namespace: namespace,
36-
serviceName: service,
34+
done: make(chan struct{}),
35+
ipForKubernetesDefaultService: ipForKubernetesDefaultService,
36+
namespace: namespace,
37+
serviceName: service,
3738
}
3839
}
3940

4041
type aggregatedAPIServiceAvailabilityCheck struct {
4142
// done indicates that this check is complete (success or failure) and the check should return true
4243
done chan struct{}
4344

45+
// ipForKubernetesDefaultService is used to determine whether this endpoint is the only one for the kubernetes.default.svc
46+
// if so, it will report reachable immediately because honoring some requests is better than honoring no requests.
47+
ipForKubernetesDefaultService net.IP
48+
4449
// namespace is the namespace hosting the service for the aggregated api
4550
namespace string
4651
// serviceName is used to get a list of endpoints to directly dial
@@ -78,6 +83,32 @@ func (c *aggregatedAPIServiceAvailabilityCheck) checkForConnection(context gener
7883
panic(err)
7984
}
8085

86+
ctx, cancel := gocontext.WithTimeout(gocontext.TODO(), 30*time.Second)
87+
defer cancel()
88+
89+
// if the kubernetes.default.svc needs an endpoint and this is the only apiserver than can fulfill it, then we don't
90+
// wait for reachability. We wait for other conditions, but unreachable apiservers correctly 503 for clients.
91+
kubeEndpoints, err := kubeClient.CoreV1().Endpoints("default").Get(ctx, "kubernetes", metav1.GetOptions{})
92+
switch {
93+
case apierrors.IsNotFound(err):
94+
utilruntime.HandleError(fmt.Errorf("%s did not find a kubernetes.default.svc endpoint", c.Name()))
95+
return
96+
case err != nil:
97+
utilruntime.HandleError(fmt.Errorf("%s unable to read a kubernetes.default.svc endpoint: %w", c.Name(), err))
98+
return
99+
case len(kubeEndpoints.Subsets) == 0:
100+
utilruntime.HandleError(fmt.Errorf("%s did not find any IPs for kubernetes.default.svc endpoint", c.Name()))
101+
return
102+
case len(kubeEndpoints.Subsets[0].Addresses) == 0:
103+
utilruntime.HandleError(fmt.Errorf("%s did not find any IPs for kubernetes.default.svc endpoint", c.Name()))
104+
return
105+
case len(kubeEndpoints.Subsets[0].Addresses) == 1:
106+
if kubeEndpoints.Subsets[0].Addresses[0].IP == c.ipForKubernetesDefaultService.String() {
107+
utilruntime.HandleError(fmt.Errorf("%s only found this kube-apiserver's IP (%v) in kubernetes.default.svc endpoint", c.Name(), c.ipForKubernetesDefaultService))
108+
return
109+
}
110+
}
111+
81112
// Start a thread which repeatedly tries to connect to any aggregated apiserver endpoint.
82113
// 1. if the aggregated apiserver endpoint doesn't exist, logs a warning and reports ready
83114
// 2. if a connection cannot be made, after 60 seconds logs an error and reports ready -- this avoids a rebootstrapping cycle

0 commit comments

Comments
 (0)