Skip to content

Commit 00f7b95

Browse files
committed
fix: flaky TestPrepareCandidate
1 parent 210f129 commit 00f7b95

File tree

1 file changed

+81
-20
lines changed

1 file changed

+81
-20
lines changed

pkg/scheduler/framework/preemption/preemption_test.go

+81-20
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"fmt"
2323
"reflect"
2424
"sort"
25+
"strings"
2526
"sync"
2627
"testing"
2728
"time"
@@ -441,12 +442,16 @@ func TestPrepareCandidate(t *testing.T) {
441442
)
442443

443444
tests := []struct {
444-
name string
445-
nodeNames []string
446-
candidate *fakeCandidate
447-
preemptor *v1.Pod
448-
testPods []*v1.Pod
449-
expectedDeletedPods []string
445+
name string
446+
nodeNames []string
447+
candidate *fakeCandidate
448+
preemptor *v1.Pod
449+
testPods []*v1.Pod
450+
// expectedDeletedPod is the pod name that is expected to be deleted.
451+
//
452+
// You can set multiple pod name if there're multiple possibilities.
453+
// Both empty and "" means no pod is expected to be deleted.
454+
expectedDeletedPod []string
450455
expectedDeletionError bool
451456
expectedPatchError bool
452457
// Only compared when async preemption is disabled.
@@ -457,7 +462,6 @@ func TestPrepareCandidate(t *testing.T) {
457462
}{
458463
{
459464
name: "no victims",
460-
461465
candidate: &fakeCandidate{
462466
victims: &extenderv1.Victims{},
463467
},
@@ -485,7 +489,7 @@ func TestPrepareCandidate(t *testing.T) {
485489
victim1,
486490
},
487491
nodeNames: []string{node1Name},
488-
expectedDeletedPods: []string{"victim1"},
492+
expectedDeletedPod: []string{"victim1"},
489493
expectedStatus: nil,
490494
expectedPreemptingMap: sets.New(types.UID("preemptor")),
491495
},
@@ -505,7 +509,7 @@ func TestPrepareCandidate(t *testing.T) {
505509
victim1WithMatchingCondition,
506510
},
507511
nodeNames: []string{node1Name},
508-
expectedDeletedPods: []string{"victim1"},
512+
expectedDeletedPod: []string{"victim1"},
509513
expectedStatus: nil,
510514
expectedPreemptingMap: sets.New(types.UID("preemptor")),
511515
},
@@ -523,7 +527,7 @@ func TestPrepareCandidate(t *testing.T) {
523527
preemptor: preemptor,
524528
testPods: []*v1.Pod{},
525529
nodeNames: []string{node1Name},
526-
expectedDeletedPods: []string{"victim1"},
530+
expectedDeletedPod: []string{"victim1"},
527531
expectedStatus: nil,
528532
expectedPreemptingMap: sets.New(types.UID("preemptor")),
529533
},
@@ -560,7 +564,7 @@ func TestPrepareCandidate(t *testing.T) {
560564
preemptor: preemptor,
561565
testPods: []*v1.Pod{},
562566
nodeNames: []string{node1Name},
563-
expectedDeletedPods: []string{"victim1"},
567+
expectedDeletedPod: []string{"victim1"},
564568
expectedStatus: nil,
565569
expectedPreemptingMap: sets.New(types.UID("preemptor")),
566570
},
@@ -599,9 +603,14 @@ func TestPrepareCandidate(t *testing.T) {
599603
testPods: []*v1.Pod{
600604
victim1,
601605
},
602-
nodeNames: []string{node1Name},
603-
expectedPatchError: true,
604-
expectedDeletedPods: []string{"victim2"},
606+
nodeNames: []string{node1Name},
607+
expectedPatchError: true,
608+
expectedDeletedPod: []string{
609+
"victim2",
610+
// The first victim could fail before the deletion of the second victim happens,
611+
// which results in the second victim not being deleted.
612+
"",
613+
},
605614
expectedStatus: framework.AsStatus(errors.New("patch pod status failed")),
606615
expectedPreemptingMap: sets.New(types.UID("preemptor")),
607616
expectedActivatedPods: map[string]*v1.Pod{preemptor.Name: preemptor},
@@ -629,15 +638,13 @@ func TestPrepareCandidate(t *testing.T) {
629638
objs = append(objs, pod)
630639
}
631640

632-
requestStopper := make(chan struct{})
633641
mu := &sync.RWMutex{}
634642
deletedPods := sets.New[string]()
635643
deletionFailure := false // whether any request to delete pod failed
636644
patchFailure := false // whether any request to patch pod status failed
637645

638646
cs := clientsetfake.NewClientset(objs...)
639647
cs.PrependReactor("delete", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
640-
<-requestStopper
641648
mu.Lock()
642649
defer mu.Unlock()
643650
name := action.(clienttesting.DeleteAction).GetName()
@@ -651,7 +658,6 @@ func TestPrepareCandidate(t *testing.T) {
651658
})
652659

653660
cs.PrependReactor("patch", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
654-
<-requestStopper
655661
mu.Lock()
656662
defer mu.Unlock()
657663
if action.(clienttesting.PatchAction).GetName() == "fail-victim" {
@@ -664,6 +670,15 @@ func TestPrepareCandidate(t *testing.T) {
664670
informerFactory := informers.NewSharedInformerFactory(cs, 0)
665671
eventBroadcaster := events.NewBroadcaster(&events.EventSinkImpl{Interface: cs.EventsV1()})
666672
fakeActivator := &fakePodActivator{activatedPods: make(map[string]*v1.Pod), mu: mu}
673+
674+
// Note: NominatedPodsForNode is called at the beginning of the goroutine in any case.
675+
// fakePodNominator can delay the response of NominatedPodsForNode until the channel is closed,
676+
// which allows us to test the preempting map before the goroutine does nothing yet.
677+
requestStopper := make(chan struct{})
678+
nominator := &fakePodNominator{
679+
SchedulingQueue: internalqueue.NewSchedulingQueue(nil, informerFactory),
680+
requestStopper: requestStopper,
681+
}
667682
fwk, err := tf.NewFramework(
668683
ctx,
669684
registeredPlugins, "",
@@ -672,7 +687,7 @@ func TestPrepareCandidate(t *testing.T) {
672687
frameworkruntime.WithInformerFactory(informerFactory),
673688
frameworkruntime.WithWaitingPods(frameworkruntime.NewWaitingPodsMap()),
674689
frameworkruntime.WithSnapshotSharedLister(internalcache.NewSnapshot(tt.testPods, nodes)),
675-
frameworkruntime.WithPodNominator(internalqueue.NewSchedulingQueue(nil, informerFactory)),
690+
frameworkruntime.WithPodNominator(nominator),
676691
frameworkruntime.WithEventRecorder(eventBroadcaster.NewRecorder(scheme.Scheme, "test-scheduler")),
677692
frameworkruntime.WithPodActivator(fakeActivator),
678693
)
@@ -720,10 +735,15 @@ func TestPrepareCandidate(t *testing.T) {
720735
if err := wait.PollUntilContextTimeout(ctx, time.Millisecond*200, wait.ForeverTestTimeout, false, func(ctx context.Context) (bool, error) {
721736
mu.RLock()
722737
defer mu.RUnlock()
723-
if !deletedPods.Equal(sets.New(tt.expectedDeletedPods...)) {
724-
lastErrMsg = fmt.Sprintf("expected deleted pods %v, got %v", tt.expectedDeletedPods, deletedPods.UnsortedList())
738+
739+
pe.mu.Lock()
740+
defer pe.mu.Unlock()
741+
if len(pe.preempting) != 0 {
742+
// The preempting map should be empty after the goroutine in all test cases.
743+
lastErrMsg = fmt.Sprintf("expected no preempting pods, got %v", pe.preempting)
725744
return false, nil
726745
}
746+
727747
if tt.expectedDeletionError != deletionFailure {
728748
lastErrMsg = fmt.Sprintf("expected deletion error %v, got %v", tt.expectedDeletionError, deletionFailure)
729749
return false, nil
@@ -744,6 +764,34 @@ func TestPrepareCandidate(t *testing.T) {
744764
}
745765
}
746766

767+
if deletedPods.Len() > 1 {
768+
// For now, we only expect at most one pod to be deleted in all test cases.
769+
// If we need to test multiple pods deletion, we need to update the test table definition.
770+
return false, fmt.Errorf("expected at most one pod to be deleted, got %v", deletedPods.UnsortedList())
771+
}
772+
773+
if len(tt.expectedDeletedPod) == 0 {
774+
if deletedPods.Len() != 0 {
775+
// When tt.expectedDeletedPod is empty, we expect no pod to be deleted.
776+
return false, fmt.Errorf("expected no pod to be deleted, got %v", deletedPods.UnsortedList())
777+
}
778+
// nothing further to check.
779+
return true, nil
780+
}
781+
782+
found := false
783+
for _, podName := range tt.expectedDeletedPod {
784+
if deletedPods.Has(podName) ||
785+
// If podName is empty, we expect no pod to be deleted.
786+
(deletedPods.Len() == 0 && podName == "") {
787+
found = true
788+
}
789+
}
790+
if !found {
791+
lastErrMsg = fmt.Sprintf("expected pod %v to be deleted, but %v is deleted", strings.Join(tt.expectedDeletedPod, " or "), deletedPods.UnsortedList())
792+
return false, nil
793+
}
794+
747795
return true, nil
748796
}); err != nil {
749797
t.Fatal(lastErrMsg)
@@ -753,6 +801,19 @@ func TestPrepareCandidate(t *testing.T) {
753801
}
754802
}
755803

804+
type fakePodNominator struct {
805+
// embed it so that we can only override NominatedPodsForNode
806+
internalqueue.SchedulingQueue
807+
808+
// fakePodNominator doesn't respond to NominatedPodsForNode() until the channel is closed.
809+
requestStopper chan struct{}
810+
}
811+
812+
func (f *fakePodNominator) NominatedPodsForNode(nodeName string) []*framework.PodInfo {
813+
<-f.requestStopper
814+
return nil
815+
}
816+
756817
type fakeExtender struct {
757818
ignorable bool
758819
errProcessPreemption bool

0 commit comments

Comments
 (0)