Skip to content

Commit 8558e88

Browse files
p0lyn0mialsoltysh
authored andcommitted
UPSTREAM: <carry>: skip posting failures to aggregated APIs to avoid getting false positives until the server becomes ready
the availability checks depend on fully initialized SDN OpenShift carries a few reachability checks that affect /readyz protocol we skip posting failures to avoid getting false positives until the server becomes ready UPSTREAM: <carry>: skip posting failures to aggregated APIs to avoid getting false positives until the server becomes ready marks availability of the server before checking the aggregate APIs as it can change as we are running the checks. in that case, skip posting failures to avoid false positives. note on the next rebase please squash with the previous commit openshift-rebase(v1.24):source=30214940c21 UPSTREAM: <carry>: expose HasBeenReady lifecycle signal openshift-rebase(v1.24):source=110aaa7c54c openshift-rebase(v1.24):source=110aaa7c54c openshift-rebase(v1.24):source=110aaa7c54c
1 parent bf2b5fa commit 8558e88

File tree

4 files changed

+36
-2
lines changed

4 files changed

+36
-2
lines changed

staging/src/k8s.io/apiserver/pkg/server/config.go

+5
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,11 @@ func (c *Config) DrainedNotify() <-chan struct{} {
549549
return c.lifecycleSignals.InFlightRequestsDrained.Signaled()
550550
}
551551

552+
// HasBeenReadySignal exposes a server's lifecycle signal which is signaled when the readyz endpoint succeeds for the first time.
553+
func (c *Config) HasBeenReadySignal() <-chan struct{} {
554+
return c.lifecycleSignals.HasBeenReady.Signaled()
555+
}
556+
552557
// Complete fills in any fields not set that are required to have valid data and can be derived
553558
// from other fields. If you're going to `ApplyOptions`, do that first. It's mutating the receiver.
554559
func (c *Config) Complete(informers informers.SharedInformerFactory) CompletedConfig {

staging/src/k8s.io/kube-aggregator/pkg/apiserver/apiserver.go

+1
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ func (c completedConfig) NewWithDelegate(delegationTarget genericapiserver.Deleg
289289
(func() ([]byte, []byte))(s.proxyCurrentCertKeyContent),
290290
s.serviceResolver,
291291
c.GenericConfig.EgressSelector,
292+
c.GenericConfig.HasBeenReadySignal(),
292293
)
293294
if err != nil {
294295
return nil, err

staging/src/k8s.io/kube-aggregator/pkg/controllers/status/available_controller.go

+22
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,9 @@ type AvailableConditionController struct {
100100

101101
// metrics registered into legacy registry
102102
metrics *availabilityMetrics
103+
104+
// hasBeenReady is signaled when the readyz endpoint succeeds for the first time.
105+
hasBeenReady <-chan struct{}
103106
}
104107

105108
type tlsTransportCache struct {
@@ -152,6 +155,7 @@ func NewAvailableConditionController(
152155
proxyCurrentCertKeyContent certKeyFunc,
153156
serviceResolver ServiceResolver,
154157
egressSelector *egressselector.EgressSelector,
158+
hasBeenReady <-chan struct{},
155159
) (*AvailableConditionController, error) {
156160
c := &AvailableConditionController{
157161
apiServiceClient: apiServiceClient,
@@ -171,6 +175,7 @@ func NewAvailableConditionController(
171175
proxyCurrentCertKeyContent: proxyCurrentCertKeyContent,
172176
tlsCache: &tlsTransportCache{transports: make(map[tlsCacheKey]http.RoundTripper)},
173177
metrics: newAvailabilityMetrics(),
178+
hasBeenReady: hasBeenReady,
174179
}
175180

176181
if egressSelector != nil {
@@ -233,6 +238,18 @@ func (c *AvailableConditionController) sync(key string) error {
233238
return err
234239
}
235240

241+
// the availability checks depend on fully initialized SDN
242+
// OpenShift carries a few reachability checks that affect /readyz protocol
243+
// record availability of the server so that we can
244+
// skip posting failures to avoid getting false positives until the server becomes ready
245+
hasBeenReady := false
246+
select {
247+
case <-c.hasBeenReady:
248+
hasBeenReady = true
249+
default:
250+
// continue, we will skip posting only potential failures
251+
}
252+
236253
// if a particular transport was specified, use that otherwise build one
237254
// construct an http client that will ignore TLS verification (if someone owns the network and messes with your status
238255
// that's not so bad) and sets a very short timeout. This is a best effort GET that provides no additional information
@@ -427,6 +444,11 @@ func (c *AvailableConditionController) sync(key string) error {
427444
}
428445

429446
if lastError != nil {
447+
if !hasBeenReady {
448+
// returning an error will requeue the item in an exponential fashion
449+
return fmt.Errorf("the server hasn't been ready yet, skipping updating availability of the aggreaged API until the server becomes ready to avoid false positives, lastError = %v", lastError)
450+
}
451+
430452
availableCondition.Status = apiregistrationv1.ConditionFalse
431453
availableCondition.Reason = "FailedDiscoveryCheck"
432454
availableCondition.Message = lastError.Error()

staging/src/k8s.io/kube-aggregator/pkg/controllers/status/available_controller_test.go

+8-2
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ func setupAPIServices(apiServices []*apiregistration.APIService) (*AvailableCond
121121
for _, o := range apiServices {
122122
apiServiceIndexer.Add(o)
123123
}
124+
alwaysReadyChan := make(chan struct{})
125+
close(alwaysReadyChan)
124126

125127
c := AvailableConditionController{
126128
apiServiceClient: fakeClient.ApiregistrationV1(),
@@ -134,8 +136,9 @@ func setupAPIServices(apiServices []*apiregistration.APIService) (*AvailableCond
134136
// the maximum disruption time to a minimum, but it does prevent hot loops.
135137
workqueue.NewItemExponentialFailureRateLimiter(5*time.Millisecond, 30*time.Second),
136138
"AvailableConditionController"),
137-
tlsCache: &tlsTransportCache{transports: make(map[tlsCacheKey]http.RoundTripper)},
138-
metrics: newAvailabilityMetrics(),
139+
tlsCache: &tlsTransportCache{transports: make(map[tlsCacheKey]http.RoundTripper)},
140+
metrics: newAvailabilityMetrics(),
141+
hasBeenReady: alwaysReadyChan,
139142
}
140143
for _, svc := range apiServices {
141144
c.addAPIService(svc)
@@ -400,6 +403,8 @@ func TestSync(t *testing.T) {
400403
w.WriteHeader(http.StatusForbidden)
401404
}))
402405
defer testServer.Close()
406+
alwaysReadyChan := make(chan struct{})
407+
close(alwaysReadyChan)
403408

404409
c := AvailableConditionController{
405410
apiServiceClient: fakeClient.ApiregistrationV1(),
@@ -410,6 +415,7 @@ func TestSync(t *testing.T) {
410415
proxyCurrentCertKeyContent: func() ([]byte, []byte) { return emptyCert(), emptyCert() },
411416
tlsCache: &tlsTransportCache{transports: make(map[tlsCacheKey]http.RoundTripper)},
412417
metrics: newAvailabilityMetrics(),
418+
hasBeenReady: alwaysReadyChan,
413419
}
414420
c.sync(tc.apiServiceName)
415421

0 commit comments

Comments
 (0)