Skip to content

Commit 4b2d6d8

Browse files
p0lyn0mialbertinatto
authored andcommitted
UPSTREAM: <carry>: skip posting failures to aggregated APIs to avoid getting false positives until the server becomes ready
the availability checks depend on fully initialized SDN OpenShift carries a few reachability checks that affect /readyz protocol we skip posting failures to avoid getting false positives until the server becomes ready UPSTREAM: <carry>: skip posting failures to aggregated APIs to avoid getting false positives until the server becomes ready marks availability of the server before checking the aggregate APIs as it can change as we are running the checks. in that case, skip posting failures to avoid false positives. note on the next rebase please squash with the previous commit UPSTREAM: <carry>: expose HasBeenReady lifecycle signal OpenShift-Rebase-Source: 8558e88
1 parent 598f46d commit 4b2d6d8

File tree

4 files changed

+36
-1
lines changed

4 files changed

+36
-1
lines changed

Diff for: staging/src/k8s.io/apiserver/pkg/server/config.go

+5
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,11 @@ func (c *Config) ShutdownInitiatedNotify() <-chan struct{} {
691691
return c.lifecycleSignals.ShutdownInitiated.Signaled()
692692
}
693693

694+
// HasBeenReadySignal exposes a server's lifecycle signal which is signaled when the readyz endpoint succeeds for the first time.
695+
func (c *Config) HasBeenReadySignal() <-chan struct{} {
696+
return c.lifecycleSignals.HasBeenReady.Signaled()
697+
}
698+
694699
// Complete fills in any fields not set that are required to have valid data and can be derived
695700
// from other fields. If you're going to `ApplyOptions`, do that first. It's mutating the receiver.
696701
func (c *Config) Complete(informers informers.SharedInformerFactory) CompletedConfig {

Diff for: staging/src/k8s.io/kube-aggregator/pkg/apiserver/apiserver.go

+1
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ func (c completedConfig) NewWithDelegate(delegationTarget genericapiserver.Deleg
358358
(func() ([]byte, []byte))(s.proxyCurrentCertKeyContent),
359359
s.serviceResolver,
360360
metrics,
361+
c.GenericConfig.HasBeenReadySignal(),
361362
)
362363
if err != nil {
363364
return nil, err

Diff for: staging/src/k8s.io/kube-aggregator/pkg/controllers/status/remote/remote_available_controller.go

+22
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,9 @@ type AvailableConditionController struct {
8686

8787
// metrics registered into legacy registry
8888
metrics *availabilitymetrics.Metrics
89+
90+
// hasBeenReady is signaled when the readyz endpoint succeeds for the first time.
91+
hasBeenReady <-chan struct{}
8992
}
9093

9194
// New returns a new remote APIService AvailableConditionController.
@@ -98,6 +101,7 @@ func New(
98101
proxyCurrentCertKeyContent certKeyFunc,
99102
serviceResolver ServiceResolver,
100103
metrics *availabilitymetrics.Metrics,
104+
hasBeenReady <-chan struct{},
101105
) (*AvailableConditionController, error) {
102106
c := &AvailableConditionController{
103107
apiServiceClient: apiServiceClient,
@@ -115,6 +119,7 @@ func New(
115119
proxyTransportDial: proxyTransportDial,
116120
proxyCurrentCertKeyContent: proxyCurrentCertKeyContent,
117121
metrics: metrics,
122+
hasBeenReady: hasBeenReady,
118123
}
119124

120125
// resync on this one because it is low cardinality and rechecking the actual discovery
@@ -164,6 +169,18 @@ func (c *AvailableConditionController) sync(key string) error {
164169
return nil
165170
}
166171

172+
// the availability checks depend on fully initialized SDN
173+
// OpenShift carries a few reachability checks that affect /readyz protocol
174+
// record availability of the server so that we can
175+
// skip posting failures to avoid getting false positives until the server becomes ready
176+
hasBeenReady := false
177+
select {
178+
case <-c.hasBeenReady:
179+
hasBeenReady = true
180+
default:
181+
// continue, we will skip posting only potential failures
182+
}
183+
167184
apiService := originalAPIService.DeepCopy()
168185

169186
// if a particular transport was specified, use that otherwise build one
@@ -347,6 +364,11 @@ func (c *AvailableConditionController) sync(key string) error {
347364
}
348365

349366
if lastError != nil {
367+
if !hasBeenReady {
368+
// returning an error will requeue the item in an exponential fashion
369+
return fmt.Errorf("the server hasn't been ready yet, skipping updating availability of the aggreaged API until the server becomes ready to avoid false positives, lastError = %v", lastError)
370+
}
371+
350372
availableCondition.Status = apiregistrationv1.ConditionFalse
351373
availableCondition.Reason = "FailedDiscoveryCheck"
352374
availableCondition.Message = lastError.Error()

Diff for: staging/src/k8s.io/kube-aggregator/pkg/controllers/status/remote/remote_available_controller_test.go

+8-1
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,9 @@ func setupAPIServices(t T, apiServices []runtime.Object) (*AvailableConditionCon
128128
}
129129
}
130130

131+
alwaysReadyChan := make(chan struct{})
132+
close(alwaysReadyChan)
133+
131134
c := AvailableConditionController{
132135
apiServiceClient: fakeClient.ApiregistrationV1(),
133136
apiServiceLister: listers.NewAPIServiceLister(apiServiceIndexer),
@@ -141,7 +144,8 @@ func setupAPIServices(t T, apiServices []runtime.Object) (*AvailableConditionCon
141144
workqueue.NewTypedItemExponentialFailureRateLimiter[string](5*time.Millisecond, 30*time.Second),
142145
workqueue.TypedRateLimitingQueueConfig[string]{Name: "AvailableConditionController"},
143146
),
144-
metrics: availabilitymetrics.New(),
147+
metrics: availabilitymetrics.New(),
148+
hasBeenReady: alwaysReadyChan,
145149
}
146150
for _, svc := range apiServices {
147151
c.addAPIService(svc)
@@ -401,6 +405,8 @@ func TestSync(t *testing.T) {
401405
w.WriteHeader(tc.backendStatus)
402406
}))
403407
defer testServer.Close()
408+
alwaysReadyChan := make(chan struct{})
409+
close(alwaysReadyChan)
404410

405411
c := AvailableConditionController{
406412
apiServiceClient: fakeClient.ApiregistrationV1(),
@@ -410,6 +416,7 @@ func TestSync(t *testing.T) {
410416
serviceResolver: &fakeServiceResolver{url: testServer.URL},
411417
proxyCurrentCertKeyContent: func() ([]byte, []byte) { return emptyCert(), emptyCert() },
412418
metrics: availabilitymetrics.New(),
419+
hasBeenReady: alwaysReadyChan,
413420
}
414421
err := c.sync(tc.apiServiceName)
415422
if tc.expectedSyncError != "" {

0 commit comments

Comments
 (0)