Skip to content

Commit 9e8f924

Browse files
Merge pull request #955 from p0lyn0mial/release-4.8-gate-availability-controller-until-ready
Bug 1994655: openshift-apiserver should not set Available=False APIServicesAvailable on update
2 parents 93ecb46 + 8a9f865 commit 9e8f924

File tree

4 files changed

+36
-2
lines changed

4 files changed

+36
-2
lines changed

staging/src/k8s.io/apiserver/pkg/server/config.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,11 @@ func (c *Config) AddPostStartHookOrDie(name string, hook PostStartHookFunc) {
464464
}
465465
}
466466

467+
// HasBeenReadySignal exposes a server's lifecycle signal which is signaled when the readyz endpoint succeeds for the first time.
468+
func (c *Config) HasBeenReadySignal() <-chan struct{} {
469+
return c.hasBeenReadyCh
470+
}
471+
467472
// Complete fills in any fields not set that are required to have valid data and can be derived
468473
// from other fields. If you're going to `ApplyOptions`, do that first. It's mutating the receiver.
469474
func (c *Config) Complete(informers informers.SharedInformerFactory) CompletedConfig {

staging/src/k8s.io/kube-aggregator/pkg/apiserver/apiserver.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@ func (c completedConfig) NewWithDelegate(delegationTarget genericapiserver.Deleg
257257
(func() ([]byte, []byte))(s.proxyCurrentCertKeyContent),
258258
s.serviceResolver,
259259
c.GenericConfig.EgressSelector,
260+
c.GenericConfig.HasBeenReadySignal(),
260261
)
261262
if err != nil {
262263
return nil, err

staging/src/k8s.io/kube-aggregator/pkg/controllers/status/available_controller.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,9 @@ type AvailableConditionController struct {
100100

101101
// metrics registered into legacy registry
102102
metrics *availabilityMetrics
103+
104+
// hasBeenReady is signaled when the readyz endpoint succeeds for the first time.
105+
hasBeenReady <-chan struct{}
103106
}
104107

105108
type tlsTransportCache struct {
@@ -152,6 +155,7 @@ func NewAvailableConditionController(
152155
proxyCurrentCertKeyContent certKeyFunc,
153156
serviceResolver ServiceResolver,
154157
egressSelector *egressselector.EgressSelector,
158+
hasBeenReady <-chan struct{},
155159
) (*AvailableConditionController, error) {
156160
c := &AvailableConditionController{
157161
apiServiceClient: apiServiceClient,
@@ -171,6 +175,7 @@ func NewAvailableConditionController(
171175
proxyCurrentCertKeyContent: proxyCurrentCertKeyContent,
172176
tlsCache: &tlsTransportCache{transports: make(map[tlsCacheKey]http.RoundTripper)},
173177
metrics: newAvailabilityMetrics(),
178+
hasBeenReady: hasBeenReady,
174179
}
175180

176181
if egressSelector != nil {
@@ -233,6 +238,18 @@ func (c *AvailableConditionController) sync(key string) error {
233238
return err
234239
}
235240

241+
// the availability checks depend on fully initialized SDN
242+
// OpenShift carries a few reachability checks that affect /readyz protocol
243+
// record availability of the server so that we can
244+
// skip posting failures to avoid getting false positives until the server becomes ready
245+
hasBeenReady := false
246+
select {
247+
case <-c.hasBeenReady:
248+
hasBeenReady = true
249+
default:
250+
// continue, we will skip posting only potential failures
251+
}
252+
236253
// if a particular transport was specified, use that otherwise build one
237254
// construct an http client that will ignore TLS verification (if someone owns the network and messes with your status
238255
// that's not so bad) and sets a very short timeout. This is a best effort GET that provides no additional information
@@ -427,6 +444,11 @@ func (c *AvailableConditionController) sync(key string) error {
427444
}
428445

429446
if lastError != nil {
447+
if !hasBeenReady {
448+
// returning an error will requeue the item in an exponential fashion
449+
return fmt.Errorf("the server hasn't been ready yet, skipping updating availability of the aggreaged API until the server becomes ready to avoid false positives, lastError = %v", lastError)
450+
}
451+
430452
availableCondition.Status = apiregistrationv1.ConditionFalse
431453
availableCondition.Reason = "FailedDiscoveryCheck"
432454
availableCondition.Message = lastError.Error()

staging/src/k8s.io/kube-aggregator/pkg/controllers/status/available_controller_test.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ func setupAPIServices(apiServices []*apiregistration.APIService) (*AvailableCond
121121
for _, o := range apiServices {
122122
apiServiceIndexer.Add(o)
123123
}
124+
alwaysReadyChan := make(chan struct{})
125+
close(alwaysReadyChan)
124126

125127
c := AvailableConditionController{
126128
apiServiceClient: fakeClient.ApiregistrationV1(),
@@ -134,8 +136,9 @@ func setupAPIServices(apiServices []*apiregistration.APIService) (*AvailableCond
134136
// the maximum disruption time to a minimum, but it does prevent hot loops.
135137
workqueue.NewItemExponentialFailureRateLimiter(5*time.Millisecond, 30*time.Second),
136138
"AvailableConditionController"),
137-
tlsCache: &tlsTransportCache{transports: make(map[tlsCacheKey]http.RoundTripper)},
138-
metrics: newAvailabilityMetrics(),
139+
tlsCache: &tlsTransportCache{transports: make(map[tlsCacheKey]http.RoundTripper)},
140+
metrics: newAvailabilityMetrics(),
141+
hasBeenReady: alwaysReadyChan,
139142
}
140143
for _, svc := range apiServices {
141144
c.addAPIService(svc)
@@ -400,6 +403,8 @@ func TestSync(t *testing.T) {
400403
w.WriteHeader(http.StatusForbidden)
401404
}))
402405
defer testServer.Close()
406+
alwaysReadyChan := make(chan struct{})
407+
close(alwaysReadyChan)
403408

404409
c := AvailableConditionController{
405410
apiServiceClient: fakeClient.ApiregistrationV1(),
@@ -410,6 +415,7 @@ func TestSync(t *testing.T) {
410415
proxyCurrentCertKeyContent: func() ([]byte, []byte) { return emptyCert(), emptyCert() },
411416
tlsCache: &tlsTransportCache{transports: make(map[tlsCacheKey]http.RoundTripper)},
412417
metrics: newAvailabilityMetrics(),
418+
hasBeenReady: alwaysReadyChan,
413419
}
414420
c.sync(tc.apiServiceName)
415421

0 commit comments

Comments
 (0)