Skip to content

Commit 151fd64

Browse files
committed
Machine: ignore attached Volumes referred by pods ignored during drain
1 parent 274d7e2 commit 151fd64

File tree

4 files changed

+204
-5
lines changed

4 files changed

+204
-5
lines changed

internal/controllers/machine/drain/filters.go

+11
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,17 @@ func (l *PodDeleteList) Pods() []*corev1.Pod {
6262
return pods
6363
}
6464

65+
// IgnoredPods returns a list of Pods that have to be ignored before the Node can be considered completely drained.
66+
func (l *PodDeleteList) IgnoredPods() []*corev1.Pod {
67+
pods := []*corev1.Pod{}
68+
for _, i := range l.items {
69+
if !i.Status.Delete {
70+
pods = append(pods, i.Pod)
71+
}
72+
}
73+
return pods
74+
}
75+
6576
func (l *PodDeleteList) errors() []error {
6677
failedPods := make(map[string][]string)
6778
for _, i := range l.items {

internal/controllers/machine/machine_controller.go

+179-3
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,13 @@ import (
2424

2525
"github.com/pkg/errors"
2626
corev1 "k8s.io/api/core/v1"
27+
storagev1 "k8s.io/api/storage/v1"
2728
apierrors "k8s.io/apimachinery/pkg/api/errors"
2829
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2930
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
3031
"k8s.io/apimachinery/pkg/types"
3132
kerrors "k8s.io/apimachinery/pkg/util/errors"
33+
"k8s.io/apimachinery/pkg/util/sets"
3234
"k8s.io/apimachinery/pkg/util/wait"
3335
"k8s.io/client-go/tools/record"
3436
"k8s.io/klog/v2"
@@ -808,12 +810,52 @@ func (r *Reconciler) shouldWaitForNodeVolumes(ctx context.Context, cluster *clus
808810
return false, nil
809811
}
810812

811-
if len(node.Status.VolumesAttached) != 0 {
812-
log.Info("Waiting for Node volumes to be detached")
813+
waiter, err := newAttachedVolumeHelper(ctx, remoteClient, node)
814+
if err != nil {
815+
return true, err
816+
}
817+
818+
if !waiter.hasAttachedVolumes() {
819+
// No attached volumes to wait for.
820+
return false, nil
821+
}
822+
823+
// Get all PVCs which are okay to ignore because they got skipped during drain
824+
pvcsToIgnoreFromPods, err := getPersistentVolumeClaimsFromIgnoredPods(ctx, remoteClient, nodeName)
825+
if err != nil {
826+
return true, err
827+
}
828+
829+
if len(pvcsToIgnoreFromPods) == 0 {
830+
// There are attached volumes to wait for and none to ignore.
813831
return true, nil
814832
}
815833

816-
return false, nil
834+
// Get all PersistentVolumes which are referred by Pods ignored during drain via PersistentVolumeClaims.
835+
pvs, err := getPersistentVolumes(ctx, remoteClient, func(pv *corev1.PersistentVolume) bool {
836+
if pv.Spec.ClaimRef != nil && pv.Spec.ClaimRef.Kind == "PersistentVolumeClaim" {
837+
if pvcsToIgnoreFromPods.Has(types.NamespacedName{Namespace: pv.Spec.ClaimRef.Namespace, Name: pv.Spec.ClaimRef.Name}) {
838+
return true
839+
}
840+
}
841+
return false
842+
})
843+
if err != nil {
844+
return true, err
845+
}
846+
847+
// Add all of this PVs to the ignore list.
848+
for _, pv := range pvs {
849+
waiter.ignore(pv)
850+
}
851+
852+
if !waiter.hasAttachedVolumes() {
853+
// No attached volumes to wait for.
854+
return false, nil
855+
}
856+
857+
log.Info("Waiting for Node volumes to be detached")
858+
return true, nil
817859
}
818860

819861
func (r *Reconciler) deleteNode(ctx context.Context, cluster *clusterv1.Cluster, name string) error {
@@ -1016,3 +1058,137 @@ func (r *Reconciler) nodeToMachine(ctx context.Context, o client.Object) []recon
10161058

10171059
return nil
10181060
}
1061+
1062+
type attachedVolumeHelper struct {
1063+
// attachedVolumeHandles contains VolumeHandles extracted from Node.Status.VolumesAttached.
1064+
attachedVolumeHandles sets.Set[string]
1065+
// attachedPVNames contains names of PersistentVolumes mapped from VolumeAttachments.
1066+
attachedPVNames sets.Set[string]
1067+
1068+
ignoredVolumeHandles sets.Set[string]
1069+
ignoredPVNames sets.Set[string]
1070+
}
1071+
1072+
func newAttachedVolumeHelper(ctx context.Context, remoteClient client.Client, node *corev1.Node) (*attachedVolumeHelper, error) {
1073+
w := &attachedVolumeHelper{
1074+
attachedVolumeHandles: sets.Set[string]{},
1075+
attachedPVNames: sets.Set[string]{},
1076+
}
1077+
1078+
for _, attachedVolume := range node.Status.VolumesAttached {
1079+
// PVs from CSI have a string like `kubernetes.io/csi/<driver>^<volumeHandle>`
1080+
splitted := strings.Split(string(attachedVolume.Name), "^")
1081+
if len(splitted) != 2 {
1082+
return nil, errors.Errorf("unable to extract VolumeHandle from node's VolumesAttached name %q", attachedVolume.Name)
1083+
}
1084+
1085+
w.attachedPVNames.Insert(splitted[1])
1086+
}
1087+
1088+
volumeAttachments, err := getVolumeAttachments(ctx, remoteClient, func(va *storagev1.VolumeAttachment) bool {
1089+
return va.Status.Attached && va.Spec.NodeName == node.GetName()
1090+
})
1091+
if err != nil {
1092+
return nil, err
1093+
}
1094+
1095+
for _, va := range volumeAttachments {
1096+
if va.Spec.Source.PersistentVolumeName == nil {
1097+
return nil, errors.Errorf("PersistentVolumeName for VolumeAttachment %s is nil", va.GetName())
1098+
}
1099+
w.attachedPVNames.Insert(*va.Spec.Source.PersistentVolumeName)
1100+
}
1101+
1102+
return w, nil
1103+
}
1104+
1105+
func (w *attachedVolumeHelper) hasAttachedVolumes() bool {
1106+
return len(w.attachedVolumeHandles.Difference(w.ignoredVolumeHandles)) != 0 || len(w.attachedPVNames.Difference(w.ignoredPVNames)) != 0
1107+
}
1108+
1109+
func (w *attachedVolumeHelper) ignore(pv *corev1.PersistentVolume) {
1110+
w.ignoredPVNames.Insert(pv.GetName())
1111+
w.ignoredVolumeHandles.Insert(pv.Spec.CSI.VolumeHandle)
1112+
}
1113+
1114+
func getPersistentVolumeClaimsFromIgnoredPods(ctx context.Context, remoteClient client.Client, nodeName string) (sets.Set[types.NamespacedName], error) {
1115+
drainHelper := drain.Helper{
1116+
Client: remoteClient,
1117+
}
1118+
1119+
pods, err := drainHelper.GetPodsForEviction(ctx, nodeName)
1120+
if err != nil {
1121+
return nil, err
1122+
}
1123+
1124+
ignoredPods := pods.IgnoredPods()
1125+
1126+
pvcsToIgnore := sets.Set[types.NamespacedName]{}
1127+
1128+
for _, pod := range ignoredPods {
1129+
for _, volume := range pod.Spec.Volumes {
1130+
if volume.PersistentVolumeClaim == nil {
1131+
continue
1132+
}
1133+
1134+
key := types.NamespacedName{Namespace: pod.GetNamespace(), Name: volume.PersistentVolumeClaim.ClaimName}
1135+
pvcsToIgnore.Insert(key)
1136+
}
1137+
}
1138+
1139+
return pvcsToIgnore, nil
1140+
}
1141+
1142+
func getVolumeAttachments(ctx context.Context, c client.Client, filter func(*storagev1.VolumeAttachment) bool) ([]*storagev1.VolumeAttachment, error) {
1143+
volumeAttachments := []*storagev1.VolumeAttachment{}
1144+
volumeAttachmentList := &storagev1.VolumeAttachmentList{}
1145+
for {
1146+
listOpts := []client.ListOption{
1147+
client.Continue(volumeAttachmentList.GetContinue()),
1148+
client.Limit(100),
1149+
}
1150+
if err := c.List(ctx, volumeAttachmentList, listOpts...); err != nil {
1151+
return nil, errors.Wrapf(err, "failed to list %T", volumeAttachmentList)
1152+
}
1153+
1154+
for _, volumeAttachment := range volumeAttachmentList.Items {
1155+
if filter != nil && !filter(&volumeAttachment) {
1156+
continue
1157+
}
1158+
volumeAttachments = append(volumeAttachments, &volumeAttachment)
1159+
}
1160+
1161+
if volumeAttachmentList.GetContinue() == "" {
1162+
break
1163+
}
1164+
}
1165+
1166+
return volumeAttachments, nil
1167+
}
1168+
1169+
func getPersistentVolumes(ctx context.Context, c client.Client, filter func(*corev1.PersistentVolume) bool) ([]*corev1.PersistentVolume, error) {
1170+
persistentVolumes := []*corev1.PersistentVolume{}
1171+
persistentVolumeList := &corev1.PersistentVolumeList{}
1172+
for {
1173+
listOpts := []client.ListOption{
1174+
client.Continue(persistentVolumeList.GetContinue()),
1175+
client.Limit(100),
1176+
}
1177+
if err := c.List(ctx, persistentVolumeList, listOpts...); err != nil {
1178+
return nil, errors.Wrapf(err, "failed to list %T", persistentVolumeList)
1179+
}
1180+
1181+
for _, persistentVolume := range persistentVolumeList.Items {
1182+
if filter != nil && !filter(&persistentVolume) {
1183+
continue
1184+
}
1185+
persistentVolumes = append(persistentVolumes, &persistentVolume)
1186+
}
1187+
1188+
if persistentVolumeList.GetContinue() == "" {
1189+
break
1190+
}
1191+
}
1192+
1193+
return persistentVolumes, nil
1194+
}

internal/controllers/machine/machine_controller_test.go

+9-2
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
apierrors "k8s.io/apimachinery/pkg/api/errors"
3030
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3131
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
32+
"k8s.io/apimachinery/pkg/runtime"
3233
"k8s.io/client-go/kubernetes/scheme"
3334
"k8s.io/client-go/tools/record"
3435
"k8s.io/utils/ptr"
@@ -1985,14 +1986,15 @@ func TestShouldWaitForNodeVolumes(t *testing.T) {
19851986

19861987
attachedVolumes := []corev1.AttachedVolume{
19871988
{
1988-
Name: "test-volume",
1989+
Name: "test-volume^foo",
19891990
DevicePath: "test-path",
19901991
},
19911992
}
19921993

19931994
tests := []struct {
19941995
name string
19951996
node *corev1.Node
1997+
objs *runtime.Object
19961998
expected bool
19971999
}{
19982000
{
@@ -2073,7 +2075,8 @@ func TestShouldWaitForNodeVolumes(t *testing.T) {
20732075
var objs []client.Object
20742076
objs = append(objs, testCluster, tt.node)
20752077

2076-
c := fake.NewClientBuilder().WithObjects(objs...).Build()
2078+
c := fake.NewClientBuilder().WithIndex(&corev1.Pod{}, "spec.nodeName", nodeNameIndex).
2079+
WithObjects(objs...).Build()
20772080
tracker := remote.NewTestClusterCacheTracker(ctrl.Log, c, c, fakeScheme, client.ObjectKeyFromObject(testCluster))
20782081
r := &Reconciler{
20792082
Client: c,
@@ -2087,6 +2090,10 @@ func TestShouldWaitForNodeVolumes(t *testing.T) {
20872090
}
20882091
}
20892092

2093+
func nodeNameIndex(o client.Object) []string {
2094+
return []string{o.(*corev1.Pod).Spec.NodeName}
2095+
}
2096+
20902097
func TestIsDeleteNodeAllowed(t *testing.T) {
20912098
deletionts := metav1.Now()
20922099

main.go

+5
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
"github.com/spf13/pflag"
2929
appsv1 "k8s.io/api/apps/v1"
3030
corev1 "k8s.io/api/core/v1"
31+
storagev1 "k8s.io/api/storage/v1"
3132
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
3233
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3334
"k8s.io/apimachinery/pkg/labels"
@@ -122,6 +123,7 @@ var (
122123
func init() {
123124
_ = clientgoscheme.AddToScheme(scheme)
124125
_ = apiextensionsv1.AddToScheme(scheme)
126+
_ = storagev1.AddToScheme(scheme)
125127

126128
_ = clusterv1alpha3.AddToScheme(scheme)
127129
_ = clusterv1alpha4.AddToScheme(scheme)
@@ -412,6 +414,9 @@ func setupReconcilers(ctx context.Context, mgr ctrl.Manager, watchNamespaces map
412414
// Don't cache Pods & DaemonSets (we get/list them e.g. during drain).
413415
&corev1.Pod{},
414416
&appsv1.DaemonSet{},
417+
// Don;t cache PersistentVolumes and VolumeAttachments (we get/list them e.g. during wait for volumes to detach)
418+
&storagev1.VolumeAttachment{},
419+
&corev1.PersistentVolumeClaim{},
415420
},
416421
Indexes: []remote.Index{remote.NodeProviderIDIndex},
417422
ClientQPS: clusterCacheTrackerClientQPS,

0 commit comments

Comments
 (0)