review fixes

sbueringer · sbueringer · commit 45f007c66121 · 2022-11-14T16:11:45.000+01:00
diff --git a/controllers/remote/cluster_cache_healthcheck_test.go b/controllers/remote/cluster_cache_healthcheck_test.go
@@ -140,7 +140,10 @@ func TestClusterCacheHealthCheck(t *testing.T) {
 			})
 
 			// Make sure this passes for at least for some seconds, to give the health check goroutine time to run.
-			g.Consistently(func() bool { return cct.clusterAccessorExists(testClusterKey) }, 5*time.Second, 1*time.Second).Should(BeTrue())
+			g.Consistently(func() bool {
+				_, ok := cct.loadAccessor(testClusterKey)
+				return ok
+			}, 5*time.Second, 1*time.Second).Should(BeTrue())
 		})
 
 		t.Run("with an invalid path", func(t *testing.T) {
@@ -162,7 +165,10 @@ func TestClusterCacheHealthCheck(t *testing.T) {
 				})
 
 			// This should succeed after N consecutive failed requests.
-			g.Eventually(func() bool { return cct.clusterAccessorExists(testClusterKey) }, 5*time.Second, 1*time.Second).Should(BeFalse())
+			g.Eventually(func() bool {
+				_, ok := cct.loadAccessor(testClusterKey)
+				return ok
+			}, 5*time.Second, 1*time.Second).Should(BeFalse())
 		})
 
 		t.Run("with an invalid config", func(t *testing.T) {
@@ -193,7 +199,10 @@ func TestClusterCacheHealthCheck(t *testing.T) {
 			})
 
 			// This should succeed after N consecutive failed requests.
-			g.Eventually(func() bool { return cct.clusterAccessorExists(testClusterKey) }, 5*time.Second, 1*time.Second).Should(BeFalse())
+			g.Eventually(func() bool {
+				_, ok := cct.loadAccessor(testClusterKey)
+				return ok
+			}, 5*time.Second, 1*time.Second).Should(BeFalse())
 		})
 	})
 }
diff --git a/controllers/remote/cluster_cache_tracker.go b/controllers/remote/cluster_cache_tracker.go
@@ -58,6 +58,10 @@ const (
 	clusterCacheControllerName    = "cluster-cache-tracker"
 )
 
+// ErrClusterLocked is returned in methods that require cluster-level locking
+// if the cluster is already locked by another concurrent call.
+var ErrClusterLocked = errors.New("cluster is locked already")
+
 // ClusterCacheTracker manages client caches for workload clusters.
 type ClusterCacheTracker struct {
 	log                   logr.Logger
@@ -67,10 +71,10 @@ type ClusterCacheTracker struct {
 
 	// clusterAccessorsLock is used to lock the access to the clusterAccessors map.
 	clusterAccessorsLock sync.RWMutex
-	// clusterAccessors is the map of clusterAccessor by cluster.
+	// clusterAccessors is the map of clusterAccessors by cluster.
 	clusterAccessors map[client.ObjectKey]*clusterAccessor
-	// clusterLock is a per-cluster lock used whenever we lock per-cluster actions
-	// like creating a client or adding watches.
+	// clusterLock is a per-cluster lock used whenever we're locking for a specific cluster.
+	// E.g. for actions like creating a client or adding watches.
 	clusterLock *keyedMutex
 
 	indexes []Index
@@ -178,6 +182,23 @@ func (t *ClusterCacheTracker) clusterAccessorExists(cluster client.ObjectKey) bo
 	return exists
 }
 
+// loadAccessor loads a clusterAccessor.
+func (t *ClusterCacheTracker) loadAccessor(cluster client.ObjectKey) (*clusterAccessor, bool) {
+	t.clusterAccessorsLock.RLock()
+	defer t.clusterAccessorsLock.RUnlock()
+
+	accessor, ok := t.clusterAccessors[cluster]
+	return accessor, ok
+}
+
+// storeAccessor stores a clusterAccessor.
+func (t *ClusterCacheTracker) storeAccessor(cluster client.ObjectKey, accessor *clusterAccessor) {
+	t.clusterAccessorsLock.Lock()
+	defer t.clusterAccessorsLock.Unlock()
+
+	t.clusterAccessors[cluster] = accessor
+}
+
 // getClusterAccessor returns a clusterAccessor for cluster.
 // It first tries to return an already-created clusterAccessor.
 // It then falls back to create a new clusterAccessor if needed.
@@ -186,49 +207,36 @@ func (t *ClusterCacheTracker) clusterAccessorExists(cluster client.ObjectKey) bo
 func (t *ClusterCacheTracker) getClusterAccessor(ctx context.Context, cluster client.ObjectKey, indexes ...Index) (*clusterAccessor, error) {
 	log := ctrl.LoggerFrom(ctx, "cluster", klog.KRef(cluster.Namespace, cluster.Name))
 
-	loadExistingAccessor := func() *clusterAccessor {
-		t.clusterAccessorsLock.RLock()
-		defer t.clusterAccessorsLock.RUnlock()
-		return t.clusterAccessors[cluster]
-	}
-	storeAccessor := func(a *clusterAccessor) {
-		t.clusterAccessorsLock.Lock()
-		defer t.clusterAccessorsLock.Unlock()
-		t.clusterAccessors[cluster] = a
-	}
-
 	// If the clusterAccessor already exists, return early.
-	a := loadExistingAccessor()
-	if a != nil {
-		return a, nil
+	if accessor, ok := t.loadAccessor(cluster); ok {
+		return accessor, nil
 	}
 
 	// clusterAccessor doesn't exist yet, we might have to initialize one.
 	// Lock on the cluster to ensure only one clusterAccessor is initialized
 	// for the cluster at the same time.
 	// Return an error if another go routine already tries to create a clusterAccessor.
-	unlockCluster, ok := t.clusterLock.TryLock(cluster)
-	if !ok {
-		return nil, errors.Errorf("error creating new cluster accessor: another go routine is already trying to create the cluster accessor for this cluster")
+	if ok := t.clusterLock.TryLock(cluster); !ok {
+		return nil, errors.Wrapf(ErrClusterLocked, "failed to create cluster accessor: failed to get lock for cluster")
 	}
-	defer unlockCluster()
+	defer t.clusterLock.Unlock(cluster)
 
 	// Until we got the cluster lock a different goroutine might have initialized the clusterAccessor
 	// for this cluster successfully already. If this is the case we return it.
-	a = loadExistingAccessor()
-	if a != nil {
-		return a, nil
+	if accessor, ok := t.loadAccessor(cluster); ok {
+		return accessor, nil
 	}
 
 	// We are the go routine who has to initialize the clusterAccessor.
 	log.V(4).Info("Creating new cluster accessor")
-	a, err := t.newClusterAccessor(ctx, cluster, indexes...)
+	accessor, err := t.newClusterAccessor(ctx, cluster, indexes...)
 	if err != nil {
-		return nil, errors.Wrap(err, "error creating new cluster accessor")
+		return nil, errors.Wrap(err, "failed to create cluster accessor")
 	}
+
 	log.V(4).Info("Storing new cluster accessor")
-	storeAccessor(a)
-	return a, nil
+	t.storeAccessor(cluster, accessor)
+	return accessor, nil
 }
 
 // newClusterAccessor creates a new clusterAccessor.
@@ -435,11 +443,11 @@ func (t *ClusterCacheTracker) Watch(ctx context.Context, input WatchInput) error
 	}
 
 	// We have to lock the cluster, so that the watch is not created multiple times in parallel.
-	unlock, ok := t.clusterLock.TryLock(input.Cluster)
+	ok := t.clusterLock.TryLock(input.Cluster)
 	if !ok {
-		return errors.Errorf("failed to add watch: another go routine is already trying to create the cluster accessor")
+		return errors.Wrapf(ErrClusterLocked, "failed to add watch: error getting lock for cluster")
 	}
-	defer unlock()
+	defer t.clusterLock.Unlock(input.Cluster)
 
 	if a.watches.Has(input.Name) {
 		t.log.V(6).Info("Watch already exists", "Cluster", klog.KRef(input.Cluster.Namespace, input.Cluster.Name), "name", input.Name)
@@ -518,7 +526,7 @@ func (t *ClusterCacheTracker) healthCheckCluster(ctx context.Context, in *health
 			return false, nil
 		}
 
-		if !t.clusterAccessorExists(in.cluster) {
+		if _, ok := t.loadAccessor(in.cluster); !ok {
 			// Cache for this cluster has already been cleaned up.
 			// Nothing to do, so return true.
 			return true, nil
diff --git a/controllers/remote/keyedmutex.go b/controllers/remote/keyedmutex.go
@@ -16,63 +16,55 @@ limitations under the License.
 
 package remote
 
-import "sync"
+import (
+	"sync"
+
+	"sigs.k8s.io/controller-runtime/pkg/client"
+)
 
 // keyedMutex is a mutex locking on the key provided to the Lock function.
 // Only one caller can hold the lock for a specific key at a time.
 // A second Lock call if the lock is already held for a key returns false.
 type keyedMutex struct {
 	locksMtx sync.Mutex
-	locks    map[interface{}]*sync.Mutex
+	locks    map[client.ObjectKey]*sync.Mutex
 }
 
 // newKeyedMutex creates a new keyed mutex ready for use.
 func newKeyedMutex() *keyedMutex {
 	return &keyedMutex{
-		locks: make(map[interface{}]*sync.Mutex),
+		locks: make(map[client.ObjectKey]*sync.Mutex),
 	}
 }
 
-// unlock unlocks a currently locked key.
-type unlock func()
-
 // TryLock locks the passed in key if it's not already locked.
-// Returns the unlock function to release the lock on the key.
 // A second Lock call if the lock is already held for a key returns false.
 // In the ClusterCacheTracker case the key is the ObjectKey for a cluster.
-func (k *keyedMutex) TryLock(key interface{}) (unlock, bool) {
-	// Get the lock if it doesn't exist already.
-	// If it does exist, return false.
-	l, ok := func() (*sync.Mutex, bool) {
-		k.locksMtx.Lock()
-		defer k.locksMtx.Unlock()
-
-		_, ok := k.locks[key]
-		if !ok {
-			// Lock doesn't exist yet, create one and return it.
-			l := &sync.Mutex{}
-			k.locks[key] = l
-			return l, true
-		}
-
-		// Lock already exists, return false.
-		return nil, false
-	}()
+func (k *keyedMutex) TryLock(key client.ObjectKey) bool {
+	k.locksMtx.Lock()
+	defer k.locksMtx.Unlock()
 
-	// Return false if another go routine already holds the lock for this key (e.g. Cluster).
-	if !ok {
-		return nil, false
+	// Check if there is already a lock for this key (e.g. Cluster).
+	if _, ok := k.locks[key]; ok {
+		// There is already a lock, return false.
+		return false
 	}
 
-	// Lock for the current key (e.g. Cluster).
+	// Lock doesn't exist yet, create and lock the lock.
+	l := &sync.Mutex{}
+	k.locks[key] = l
 	l.Lock()
 
-	// Unlock the key (e.g. Cluster) and remove it from the lock map.
-	return func() {
-		k.locksMtx.Lock()
-		defer k.locksMtx.Unlock()
+	return true
+}
+
+// Unlock unlocks the key.
+func (k *keyedMutex) Unlock(key client.ObjectKey) {
+	k.locksMtx.Lock()
+	defer k.locksMtx.Unlock()
 
+	if l, ok := k.locks[key]; ok {
 		l.Unlock()
 		delete(k.locks, key)
-	}, true
+	}
 }
diff --git a/controllers/remote/keyedmutex_test.go b/controllers/remote/keyedmutex_test.go
@@ -34,16 +34,14 @@ func TestKeyedMutex(t *testing.T) {
 
 		// Try to lock cluster1.
 		// Should work as nobody currently holds the lock for cluster1.
-		unlock, ok := km.TryLock(cluster1)
-		g.Expect(ok).To(BeTrue())
+		g.Expect(km.TryLock(cluster1)).To(BeTrue())
 
 		// Try to lock cluster1 again.
 		// Shouldn't work as cluster1 is already locked.
-		_, ok = km.TryLock(cluster1)
-		g.Expect(ok).To(BeFalse())
+		g.Expect(km.TryLock(cluster1)).To(BeFalse())
 
 		// Unlock cluster1.
-		unlock()
+		km.Unlock(cluster1)
 
 		// Ensure that the lock was cleaned up from the internal map.
 		g.Expect(km.locks).To(HaveLen(0))
@@ -62,24 +60,19 @@ func TestKeyedMutex(t *testing.T) {
 		// Run this twice to ensure Clusters can be locked again
 		// after they have been unlocked.
 		for i := 0; i < 2; i++ {
-			unlocks := make([]unlock, 0, len(clusters))
-
 			// Lock all Clusters (should work).
 			for _, key := range clusters {
-				unlock, ok := km.TryLock(key)
-				g.Expect(ok).To(BeTrue())
-				unlocks = append(unlocks, unlock)
+				g.Expect(km.TryLock(key)).To(BeTrue())
 			}
 
 			// Ensure Clusters can't be locked again.
 			for _, key := range clusters {
-				_, ok := km.TryLock(key)
-				g.Expect(ok).To(BeFalse())
+				g.Expect(km.TryLock(key)).To(BeFalse())
 			}
 
 			// Unlock all Clusters.
-			for _, unlock := range unlocks {
-				unlock()
+			for _, key := range clusters {
+				km.Unlock(key)
 			}
 		}