Skip to content

Commit bfb2a3b

Browse files
authored
Merge pull request #3129 from JoelSpeed/mhc-integrate-remote-cache
🌱 Integrate shared remote cluster watching into MHC
2 parents 35fdce2 + f31f0ce commit bfb2a3b

6 files changed

+138
-96
lines changed

controllers/machinehealthcheck_controller.go

Lines changed: 19 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package controllers
1919
import (
2020
"context"
2121
"fmt"
22-
"sync"
2322
"time"
2423

2524
"github.com/go-logr/logr"
@@ -65,14 +64,13 @@ const (
6564

6665
// MachineHealthCheckReconciler reconciles a MachineHealthCheck object
6766
type MachineHealthCheckReconciler struct {
68-
Client client.Client
69-
Log logr.Logger
70-
71-
controller controller.Controller
72-
recorder record.EventRecorder
73-
scheme *runtime.Scheme
74-
clusterCaches map[client.ObjectKey]cache.Cache
75-
clusterCachesLock sync.RWMutex
67+
Client client.Client
68+
Log logr.Logger
69+
Tracker *remote.ClusterCacheTracker
70+
71+
controller controller.Controller
72+
recorder record.EventRecorder
73+
scheme *runtime.Scheme
7674
}
7775

7876
func (r *MachineHealthCheckReconciler) SetupWithManager(mgr ctrl.Manager, options controller.Options) error {
@@ -117,7 +115,6 @@ func (r *MachineHealthCheckReconciler) SetupWithManager(mgr ctrl.Manager, option
117115
r.controller = controller
118116
r.recorder = mgr.GetEventRecorderFor("machinehealthcheck-controller")
119117
r.scheme = mgr.GetScheme()
120-
r.clusterCaches = make(map[client.ObjectKey]cache.Cache)
121118
return nil
122119
}
123120

@@ -203,7 +200,7 @@ func (r *MachineHealthCheckReconciler) reconcile(ctx context.Context, cluster *c
203200
return ctrl.Result{}, err
204201
}
205202

206-
if err := r.watchClusterNodes(ctx, r.Client, cluster); err != nil {
203+
if err := r.watchClusterNodes(ctx, cluster); err != nil {
207204
logger.Error(err, "Error watching nodes on target cluster")
208205
return ctrl.Result{}, err
209206
}
@@ -376,54 +373,21 @@ func (r *MachineHealthCheckReconciler) getMachineFromNode(nodeName string) (*clu
376373
return &machineList.Items[0], nil
377374
}
378375

379-
func (r *MachineHealthCheckReconciler) watchClusterNodes(ctx context.Context, c client.Client, cluster *clusterv1.Cluster) error {
380-
key := util.ObjectKey(cluster)
381-
if _, ok := r.getClusterCache(key); ok {
382-
// watch was already set up for this cluster
376+
func (r *MachineHealthCheckReconciler) watchClusterNodes(ctx context.Context, cluster *clusterv1.Cluster) error {
377+
// If there is no tracker, don't watch remote nodes
378+
if r.Tracker == nil {
383379
return nil
384380
}
385381

386-
return r.createClusterCache(ctx, c, key)
387-
}
388-
389-
func (r *MachineHealthCheckReconciler) getClusterCache(key client.ObjectKey) (cache.Cache, bool) {
390-
r.clusterCachesLock.RLock()
391-
defer r.clusterCachesLock.RUnlock()
392-
393-
c, ok := r.clusterCaches[key]
394-
return c, ok
395-
}
396-
397-
func (r *MachineHealthCheckReconciler) createClusterCache(ctx context.Context, c client.Client, key client.ObjectKey) error {
398-
r.clusterCachesLock.Lock()
399-
defer r.clusterCachesLock.Unlock()
400-
401-
// Double check the key still doesn't exist under write lock
402-
if _, ok := r.clusterCaches[key]; ok {
403-
// An informer was created while waiting for the lock
404-
return nil
382+
if err := r.Tracker.Watch(ctx, remote.WatchInput{
383+
Cluster: util.ObjectKey(cluster),
384+
Watcher: r.controller,
385+
Kind: &corev1.Node{},
386+
CacheOptions: cache.Options{},
387+
EventHandler: &handler.EnqueueRequestsFromMapFunc{ToRequests: handler.ToRequestsFunc(r.nodeToMachineHealthCheck)},
388+
}); err != nil {
389+
return err
405390
}
406-
407-
config, err := remote.RESTConfig(ctx, c, key)
408-
if err != nil {
409-
return errors.Wrap(err, "error fetching remote cluster config")
410-
}
411-
412-
clusterCache, err := cache.New(config, cache.Options{})
413-
if err != nil {
414-
return errors.Wrap(err, "error creating cache for remote cluster")
415-
}
416-
go clusterCache.Start(ctx.Done())
417-
418-
err = r.controller.Watch(
419-
source.NewKindWithCache(&corev1.Node{}, clusterCache),
420-
&handler.EnqueueRequestsFromMapFunc{ToRequests: handler.ToRequestsFunc(r.nodeToMachineHealthCheck)},
421-
)
422-
if err != nil {
423-
return errors.Wrap(err, "error watching nodes on target cluster")
424-
}
425-
426-
r.clusterCaches[key] = clusterCache
427391
return nil
428392
}
429393

controllers/machinehealthcheck_controller_test.go

Lines changed: 62 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ var _ = Describe("MachineHealthCheck Reconciler", func() {
5050

5151
var clusterName = "test-cluster"
5252
var clusterKubeconfigName = "test-cluster-kubeconfig"
53+
var clusterUID types.UID
5354
var namespaceName string
5455

5556
BeforeEach(func() {
@@ -63,6 +64,7 @@ var _ = Describe("MachineHealthCheck Reconciler", func() {
6364
By("Creating the Cluster")
6465
testCluster.Namespace = namespaceName
6566
Expect(testEnv.Create(ctx, testCluster)).To(Succeed())
67+
clusterUID = testCluster.UID
6668

6769
By("Creating the remote Cluster kubeconfig")
6870
Expect(testEnv.CreateKubeconfigSecret(testCluster)).To(Succeed())
@@ -216,6 +218,17 @@ var _ = Describe("MachineHealthCheck Reconciler", func() {
216218
}, timeout).Should(Succeed())
217219
}
218220

221+
// getMHCStatus is a function to be used in Eventually() matchers to check the MHC status
222+
getMHCStatus := func(namespace, name string) func() clusterv1.MachineHealthCheckStatus {
223+
return func() clusterv1.MachineHealthCheckStatus {
224+
mhc := &clusterv1.MachineHealthCheck{}
225+
if err := testEnv.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, mhc); err != nil {
226+
return clusterv1.MachineHealthCheckStatus{}
227+
}
228+
return mhc.Status
229+
}
230+
}
231+
219232
type reconcileTestCase struct {
220233
mhc func() *clusterv1.MachineHealthCheck
221234
nodes func() []*corev1.Node
@@ -316,13 +329,7 @@ var _ = Describe("MachineHealthCheck Reconciler", func() {
316329
Expect(testEnv.Create(ctx, mhc)).To(Succeed())
317330

318331
By("Verifying the status has been updated")
319-
Eventually(func() clusterv1.MachineHealthCheckStatus {
320-
mhc := &clusterv1.MachineHealthCheck{}
321-
if err := testEnv.Get(ctx, types.NamespacedName{Namespace: namespaceName, Name: rtc.mhc().Name}, mhc); err != nil {
322-
return clusterv1.MachineHealthCheckStatus{}
323-
}
324-
return mhc.Status
325-
}, timeout).Should(Equal(rtc.expectedStatus))
332+
Eventually(getMHCStatus(namespaceName, rtc.mhc().Name), timeout).Should(Equal(rtc.expectedStatus))
326333

327334
// Status has been updated, a reconcile has occurred, should no longer need async assertions
328335

@@ -442,6 +449,54 @@ var _ = Describe("MachineHealthCheck Reconciler", func() {
442449
expectedStatus: clusterv1.MachineHealthCheckStatus{ExpectedMachines: 0, CurrentHealthy: 0},
443450
}),
444451
)
452+
453+
Context("when a remote Node is modified", func() {
454+
It("should react to the updated Node", func() {
455+
By("Creating a Node")
456+
remoteNode := newTestNode("remote-node-1")
457+
remoteNode.Status.Conditions = []corev1.NodeCondition{healthyNodeCondition}
458+
Expect(testEnv.Create(ctx, remoteNode)).To(Succeed())
459+
460+
By("Creating a Machine")
461+
// Set up the Machine to reduce events triggered by other controllers updating the Machine
462+
clusterOR := metav1.OwnerReference{APIVersion: clusterv1.GroupVersion.String(), Kind: "Cluster", Name: clusterName, UID: clusterUID}
463+
remoteMachine := newTestMachine("remote-machine-1", namespaceName, clusterName, remoteNode.Name, labels)
464+
remoteMachine.SetOwnerReferences([]metav1.OwnerReference{machineSetOR, clusterOR})
465+
now := metav1.NewTime(time.Now())
466+
remoteMachine.SetFinalizers([]string{"machine.cluster.x-k8s.io"})
467+
remoteMachine.Status.LastUpdated = &now
468+
remoteMachine.Status.Phase = "Provisioned"
469+
createMachine(remoteMachine)
470+
471+
By("Creating a MachineHealthCheck")
472+
mhc := newTestMachineHealthCheck("remote-test-mhc", namespaceName, clusterName, labels)
473+
maxUnhealthy := intstr.Parse("1")
474+
mhc.Spec.MaxUnhealthy = &maxUnhealthy
475+
mhc.Default()
476+
Expect(testEnv.Create(ctx, mhc)).To(Succeed())
477+
478+
By("Verifying the status has been updated, and the machine is currently healthy")
479+
Eventually(getMHCStatus(namespaceName, mhc.Name), timeout).Should(Equal(clusterv1.MachineHealthCheckStatus{ExpectedMachines: 1, CurrentHealthy: 1}))
480+
// Make sure the status is stable before making any changes, this allows in-flight reconciles to finish
481+
Consistently(getMHCStatus(namespaceName, mhc.Name), 100*time.Millisecond).Should(Equal(clusterv1.MachineHealthCheckStatus{ExpectedMachines: 1, CurrentHealthy: 1}))
482+
483+
By("Updating the node to make it unhealthy")
484+
Eventually(func() error {
485+
node := &corev1.Node{}
486+
if err := testEnv.Get(ctx, util.ObjectKey(remoteNode), node); err != nil {
487+
return err
488+
}
489+
node.Status.Conditions = []corev1.NodeCondition{unhealthyNodeCondition}
490+
if err := testEnv.Status().Update(ctx, node); err != nil {
491+
return err
492+
}
493+
return nil
494+
}, timeout).Should(Succeed())
495+
496+
By("Verifying the status has been updated, and the machine is now unhealthy")
497+
Eventually(getMHCStatus(namespaceName, mhc.Name), timeout).Should(Equal(clusterv1.MachineHealthCheckStatus{ExpectedMachines: 1, CurrentHealthy: 0}))
498+
})
499+
})
445500
})
446501
})
447502

controllers/remote/cluster_cache.go

Lines changed: 12 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -366,47 +366,34 @@ func healthCheckPath(sourceCfg *rest.Config, requestTimeout time.Duration, path
366366
// ClusterCacheReconciler is responsible for stopping remote cluster caches when
367367
// the cluster for the remote cache is being deleted.
368368
type ClusterCacheReconciler struct {
369-
log logr.Logger
370-
client client.Client
371-
tracker *ClusterCacheTracker
369+
Log logr.Logger
370+
Client client.Client
371+
Tracker *ClusterCacheTracker
372372
}
373373

374-
func NewClusterCacheReconciler(
375-
log logr.Logger,
376-
mgr ctrl.Manager,
377-
controllerOptions controller.Options,
378-
cct *ClusterCacheTracker,
379-
) (*ClusterCacheReconciler, error) {
380-
r := &ClusterCacheReconciler{
381-
log: log,
382-
client: mgr.GetClient(),
383-
tracker: cct,
384-
}
385-
386-
// Watch Clusters so we can stop and remove caches when Clusters are deleted.
374+
func (r *ClusterCacheReconciler) SetupWithManager(mgr ctrl.Manager, options controller.Options) error {
387375
_, err := ctrl.NewControllerManagedBy(mgr).
388376
For(&clusterv1.Cluster{}).
389-
WithOptions(controllerOptions).
377+
WithOptions(options).
390378
Build(r)
391379

392380
if err != nil {
393-
return nil, errors.Wrap(err, "failed to create cluster cache manager controller")
381+
return errors.Wrap(err, "failed setting up with a controller manager")
394382
}
395-
396-
return r, nil
383+
return nil
397384
}
398385

399386
// Reconcile reconciles Clusters and removes ClusterCaches for any Cluster that cannot be retrieved from the
400387
// management cluster.
401388
func (r *ClusterCacheReconciler) Reconcile(req reconcile.Request) (reconcile.Result, error) {
402389
ctx := context.Background()
403390

404-
log := r.log.WithValues("namespace", req.Namespace, "name", req.Name)
391+
log := r.Log.WithValues("namespace", req.Namespace, "name", req.Name)
405392
log.V(4).Info("Reconciling")
406393

407394
var cluster clusterv1.Cluster
408395

409-
err := r.client.Get(ctx, req.NamespacedName, &cluster)
396+
err := r.Client.Get(ctx, req.NamespacedName, &cluster)
410397
if err == nil {
411398
log.V(4).Info("Cluster still exists")
412399
return reconcile.Result{}, nil
@@ -417,7 +404,7 @@ func (r *ClusterCacheReconciler) Reconcile(req reconcile.Request) (reconcile.Res
417404

418405
log.V(4).Info("Cluster no longer exists")
419406

420-
c := r.tracker.getClusterCache(req.NamespacedName)
407+
c := r.Tracker.getClusterCache(req.NamespacedName)
421408
if c == nil {
422409
log.V(4).Info("No current cluster cache exists - nothing to do")
423410
return reconcile.Result{}, nil
@@ -426,10 +413,10 @@ func (r *ClusterCacheReconciler) Reconcile(req reconcile.Request) (reconcile.Res
426413
log.V(4).Info("Stopping cluster cache")
427414
c.Stop()
428415

429-
r.tracker.deleteClusterCache(req.NamespacedName)
416+
r.Tracker.deleteClusterCache(req.NamespacedName)
430417

431418
log.V(4).Info("Deleting watches for cluster cache")
432-
r.tracker.deleteWatchesForCluster(req.NamespacedName)
419+
r.Tracker.deleteWatchesForCluster(req.NamespacedName)
433420

434421
return reconcile.Result{}, nil
435422
}

controllers/remote/cluster_cache_reconciler_test.go

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -129,13 +129,12 @@ var _ = Describe("ClusterCache Reconciler suite", func() {
129129
Expect(k8sClient.Create(ctx, testNamespace)).To(Succeed())
130130

131131
By("Starting the ClusterCacheReconciler")
132-
r, err := NewClusterCacheReconciler(
133-
&log.NullLogger{},
134-
mgr,
135-
controller.Options{},
136-
cct,
137-
)
138-
Expect(err).ToNot(HaveOccurred())
132+
r := &ClusterCacheReconciler{
133+
Log: &log.NullLogger{},
134+
Client: mgr.GetClient(),
135+
Tracker: cct,
136+
}
137+
Expect(r.SetupWithManager(mgr, controller.Options{})).To(Succeed())
139138

140139
By("Creating clusters to test with")
141140
clusterRequest1, clusterCache1 = createAndWatchCluster("cluster-1")

controllers/suite_test.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"sigs.k8s.io/controller-runtime/pkg/envtest/printer"
3131

3232
"sigs.k8s.io/cluster-api/cmd/clusterctl/log"
33+
"sigs.k8s.io/cluster-api/controllers/remote"
3334
"sigs.k8s.io/cluster-api/test/helpers"
3435
// +kubebuilder:scaffold:imports
3536
)
@@ -66,6 +67,20 @@ var _ = BeforeSuite(func(done Done) {
6667
testEnv, err = helpers.NewTestEnvironment()
6768
Expect(err).NotTo(HaveOccurred())
6869

70+
// Set up a ClusterCacheTracker and ClusterCacheReconciler to provide to controllers
71+
// requiring a connection to a remote cluster
72+
tracker, err := remote.NewClusterCacheTracker(
73+
log.Log,
74+
testEnv.Manager,
75+
)
76+
Expect(err).ToNot(HaveOccurred())
77+
78+
Expect((&remote.ClusterCacheReconciler{
79+
Client: testEnv,
80+
Log: log.Log,
81+
Tracker: tracker,
82+
}).SetupWithManager(testEnv.Manager, controller.Options{MaxConcurrentReconciles: 1})).To(Succeed())
83+
6984
clusterReconciler = &ClusterReconciler{
7085
Client: testEnv,
7186
Log: log.Log,
@@ -90,6 +105,7 @@ var _ = BeforeSuite(func(done Done) {
90105
Expect((&MachineHealthCheckReconciler{
91106
Client: testEnv,
92107
Log: log.Log,
108+
Tracker: tracker,
93109
recorder: testEnv.GetEventRecorderFor("machinehealthcheck-controller"),
94110
}).SetupWithManager(testEnv.Manager, controller.Options{MaxConcurrentReconciles: 1})).To(Succeed())
95111

main.go

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import (
3333
clusterv1alpha3 "sigs.k8s.io/cluster-api/api/v1alpha3"
3434
"sigs.k8s.io/cluster-api/cmd/version"
3535
"sigs.k8s.io/cluster-api/controllers"
36+
"sigs.k8s.io/cluster-api/controllers/remote"
3637
expv1alpha3 "sigs.k8s.io/cluster-api/exp/api/v1alpha3"
3738
expcontrollers "sigs.k8s.io/cluster-api/exp/controllers"
3839
"sigs.k8s.io/cluster-api/feature"
@@ -194,6 +195,25 @@ func setupReconcilers(mgr ctrl.Manager) {
194195
return
195196
}
196197

198+
// Set up a ClusterCacheTracker and ClusterCacheReconciler to provide to controllers
199+
// requiring a connection to a remote cluster
200+
tracker, err := remote.NewClusterCacheTracker(
201+
ctrl.Log.WithName("remote").WithName("ClusterCacheTracker"),
202+
mgr,
203+
)
204+
if err != nil {
205+
setupLog.Error(err, "unable to create cluster cache tracker")
206+
os.Exit(1)
207+
}
208+
if err := (&remote.ClusterCacheReconciler{
209+
Client: mgr.GetClient(),
210+
Log: ctrl.Log.WithName("remote").WithName("ClusterCacheReconciler"),
211+
Tracker: tracker,
212+
}).SetupWithManager(mgr, concurrency(clusterConcurrency)); err != nil {
213+
setupLog.Error(err, "unable to create controller", "controller", "ClusterCacheReconciler")
214+
os.Exit(1)
215+
}
216+
197217
if err := (&controllers.ClusterReconciler{
198218
Client: mgr.GetClient(),
199219
Log: ctrl.Log.WithName("controllers").WithName("Cluster"),
@@ -233,8 +253,9 @@ func setupReconcilers(mgr ctrl.Manager) {
233253
}
234254
}
235255
if err := (&controllers.MachineHealthCheckReconciler{
236-
Client: mgr.GetClient(),
237-
Log: ctrl.Log.WithName("controllers").WithName("MachineHealthCheck"),
256+
Client: mgr.GetClient(),
257+
Log: ctrl.Log.WithName("controllers").WithName("MachineHealthCheck"),
258+
Tracker: tracker,
238259
}).SetupWithManager(mgr, concurrency(machineHealthCheckConcurrency)); err != nil {
239260
setupLog.Error(err, "unable to create controller", "controller", "MachineHealthCheck")
240261
os.Exit(1)

0 commit comments

Comments
 (0)