Skip to content

Commit c8c2844

Browse files
committed
Fix race for sending errors in watch
1 parent 16abcd7 commit c8c2844

File tree

3 files changed

+112
-31
lines changed

3 files changed

+112
-31
lines changed

staging/src/k8s.io/apiserver/pkg/storage/etcd3/watcher.go

+40-31
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,12 @@ func (wc *watchChan) serialProcessEvents(wg *sync.WaitGroup) {
438438
for {
439439
select {
440440
case e := <-wc.incomingEventChan:
441-
res := wc.transform(e)
441+
res, err := wc.transform(e)
442+
if err != nil {
443+
wc.sendError(err)
444+
return
445+
}
446+
442447
if res == nil {
443448
continue
444449
}
@@ -461,10 +466,8 @@ func (wc *watchChan) serialProcessEvents(wg *sync.WaitGroup) {
461466

462467
func (wc *watchChan) concurrentProcessEvents(wg *sync.WaitGroup) {
463468
p := concurrentOrderedEventProcessing{
464-
input: wc.incomingEventChan,
465-
processFunc: wc.transform,
466-
output: wc.resultChan,
467-
processingQueue: make(chan chan *watch.Event, processEventConcurrency-1),
469+
wc: wc,
470+
processingQueue: make(chan chan *processingResult, processEventConcurrency-1),
468471

469472
objectType: wc.watcher.objectType,
470473
groupResource: wc.watcher.groupResource,
@@ -481,12 +484,15 @@ func (wc *watchChan) concurrentProcessEvents(wg *sync.WaitGroup) {
481484
}()
482485
}
483486

487+
type processingResult struct {
488+
event *watch.Event
489+
err error
490+
}
491+
484492
type concurrentOrderedEventProcessing struct {
485-
input chan *event
486-
processFunc func(*event) *watch.Event
487-
output chan watch.Event
493+
wc *watchChan
488494

489-
processingQueue chan chan *watch.Event
495+
processingQueue chan chan *processingResult
490496
// Metadata for logging
491497
objectType string
492498
groupResource schema.GroupResource
@@ -498,28 +504,29 @@ func (p *concurrentOrderedEventProcessing) scheduleEventProcessing(ctx context.C
498504
select {
499505
case <-ctx.Done():
500506
return
501-
case e = <-p.input:
507+
case e = <-p.wc.incomingEventChan:
502508
}
503-
processingResponse := make(chan *watch.Event, 1)
509+
processingResponse := make(chan *processingResult, 1)
504510
select {
505511
case <-ctx.Done():
506512
return
507513
case p.processingQueue <- processingResponse:
508514
}
509515
wg.Add(1)
510-
go func(e *event, response chan<- *watch.Event) {
516+
go func(e *event, response chan<- *processingResult) {
511517
defer wg.Done()
518+
responseEvent, err := p.wc.transform(e)
512519
select {
513520
case <-ctx.Done():
514-
case response <- p.processFunc(e):
521+
case response <- &processingResult{event: responseEvent, err: err}:
515522
}
516523
}(e, processingResponse)
517524
}
518525
}
519526

520527
func (p *concurrentOrderedEventProcessing) collectEventProcessing(ctx context.Context) {
521-
var processingResponse chan *watch.Event
522-
var e *watch.Event
528+
var processingResponse chan *processingResult
529+
var r *processingResult
523530
for {
524531
select {
525532
case <-ctx.Done():
@@ -529,21 +536,25 @@ func (p *concurrentOrderedEventProcessing) collectEventProcessing(ctx context.Co
529536
select {
530537
case <-ctx.Done():
531538
return
532-
case e = <-processingResponse:
539+
case r = <-processingResponse:
533540
}
534-
if e == nil {
541+
if r.err != nil {
542+
p.wc.sendError(r.err)
543+
return
544+
}
545+
if r.event == nil {
535546
continue
536547
}
537-
if len(p.output) == cap(p.output) {
538-
klog.V(3).InfoS("Fast watcher, slow processing. Probably caused by slow dispatching events to watchers", "outgoingEvents", outgoingBufSize, "objectType", p.objectType, "groupResource", p.groupResource)
548+
if len(p.wc.resultChan) == cap(p.wc.resultChan) {
549+
klog.V(3).InfoS("Fast watcher, slow processing. Probably caused by slow dispatching events to watchers", "outgoingEvents", outgoingBufSize, "objectType", p.wc.watcher.objectType, "groupResource", p.wc.watcher.groupResource)
539550
}
540551
// If user couldn't receive results fast enough, we also block incoming events from watcher.
541552
// Because storing events in local will cause more memory usage.
542553
// The worst case would be closing the fast watcher.
543554
select {
544-
case <-ctx.Done():
555+
case p.wc.resultChan <- *r.event:
556+
case <-p.wc.ctx.Done():
545557
return
546-
case p.output <- *e:
547558
}
548559
}
549560
}
@@ -561,25 +572,23 @@ func (wc *watchChan) acceptAll() bool {
561572
}
562573

563574
// transform transforms an event into a result for user if not filtered.
564-
func (wc *watchChan) transform(e *event) (res *watch.Event) {
575+
func (wc *watchChan) transform(e *event) (res *watch.Event, err error) {
565576
curObj, oldObj, err := wc.prepareObjs(e)
566577
if err != nil {
567578
klog.Errorf("failed to prepare current and previous objects: %v", err)
568-
wc.sendError(err)
569-
return nil
579+
return nil, err
570580
}
571581

572582
switch {
573583
case e.isProgressNotify:
574584
object := wc.watcher.newFunc()
575585
if err := wc.watcher.versioner.UpdateObject(object, uint64(e.rev)); err != nil {
576586
klog.Errorf("failed to propagate object version: %v", err)
577-
return nil
587+
return nil, fmt.Errorf("failed to propagate object resource version: %w", err)
578588
}
579589
if e.isInitialEventsEndBookmark {
580590
if err := storage.AnnotateInitialEventsEndBookmark(object); err != nil {
581-
wc.sendError(fmt.Errorf("error while accessing object's metadata gr: %v, type: %v, obj: %#v, err: %v", wc.watcher.groupResource, wc.watcher.objectType, object, err))
582-
return nil
591+
return nil, fmt.Errorf("error while accessing object's metadata gr: %v, type: %v, obj: %#v, err: %w", wc.watcher.groupResource, wc.watcher.objectType, object, err)
583592
}
584593
}
585594
res = &watch.Event{
@@ -588,15 +597,15 @@ func (wc *watchChan) transform(e *event) (res *watch.Event) {
588597
}
589598
case e.isDeleted:
590599
if !wc.filter(oldObj) {
591-
return nil
600+
return nil, nil
592601
}
593602
res = &watch.Event{
594603
Type: watch.Deleted,
595604
Object: oldObj,
596605
}
597606
case e.isCreated:
598607
if !wc.filter(curObj) {
599-
return nil
608+
return nil, nil
600609
}
601610
res = &watch.Event{
602611
Type: watch.Added,
@@ -608,7 +617,7 @@ func (wc *watchChan) transform(e *event) (res *watch.Event) {
608617
Type: watch.Modified,
609618
Object: curObj,
610619
}
611-
return res
620+
return res, nil
612621
}
613622
curObjPasses := wc.filter(curObj)
614623
oldObjPasses := wc.filter(oldObj)
@@ -630,7 +639,7 @@ func (wc *watchChan) transform(e *event) (res *watch.Event) {
630639
}
631640
}
632641
}
633-
return res
642+
return res, nil
634643
}
635644

636645
func transformErrorToEvent(err error) *watch.Event {

staging/src/k8s.io/apiserver/pkg/storage/etcd3/watcher_test.go

+5
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,11 @@ func TestWatchListMatchSingle(t *testing.T) {
155155
storagetesting.RunWatchListMatchSingle(ctx, t, store)
156156
}
157157

158+
func TestWatchErrorEventIsBlockingFurtherEvent(t *testing.T) {
159+
ctx, store, _ := testSetup(t)
160+
storagetesting.RunWatchErrorIsBlockingFurtherEvents(ctx, t, &storeWithPrefixTransformer{store})
161+
}
162+
158163
// =======================================================================
159164
// Implementation-specific tests are following.
160165
// The following tests are exercising the details of the implementation

staging/src/k8s.io/apiserver/pkg/storage/testing/watcher_tests.go

+67
Original file line numberDiff line numberDiff line change
@@ -1698,6 +1698,73 @@ func RunWatchListMatchSingle(ctx context.Context, t *testing.T, store storage.In
16981698
TestCheckNoMoreResultsWithIgnoreFunc(t, w, nil)
16991699
}
17001700

1701+
func RunWatchErrorIsBlockingFurtherEvents(ctx context.Context, t *testing.T, store InterfaceWithPrefixTransformer) {
1702+
foo := &example.Pod{ObjectMeta: metav1.ObjectMeta{Namespace: "test", Name: "foo"}}
1703+
fooKey := fmt.Sprintf("/pods/%s/%s", foo.Namespace, foo.Name)
1704+
fooCreated := &example.Pod{}
1705+
if err := store.Create(context.Background(), fooKey, foo, fooCreated, 0); err != nil {
1706+
t.Errorf("failed to create object: %v", err)
1707+
}
1708+
bar := &example.Pod{ObjectMeta: metav1.ObjectMeta{Namespace: "test", Name: "bar"}}
1709+
barKey := fmt.Sprintf("/pods/%s/%s", bar.Namespace, bar.Name)
1710+
barCreated := &example.Pod{}
1711+
if err := store.Create(context.Background(), barKey, bar, barCreated, 0); err != nil {
1712+
t.Errorf("failed to create object: %v", err)
1713+
}
1714+
1715+
// Update transformer to ensure that foo will become effectively corrupted.
1716+
revertTransformer := store.UpdatePrefixTransformer(
1717+
func(transformer *PrefixTransformer) value.Transformer {
1718+
transformer.prefix = []byte("other-prefix")
1719+
return transformer
1720+
})
1721+
defer revertTransformer()
1722+
1723+
baz := &example.Pod{ObjectMeta: metav1.ObjectMeta{Namespace: "test", Name: "baz"}}
1724+
bazKey := fmt.Sprintf("/pods/%s/%s", baz.Namespace, baz.Name)
1725+
bazCreated := &example.Pod{}
1726+
if err := store.Create(context.Background(), bazKey, baz, bazCreated, 0); err != nil {
1727+
t.Errorf("failed to create object: %v", err)
1728+
}
1729+
1730+
opts := storage.ListOptions{
1731+
ResourceVersion: fooCreated.ResourceVersion,
1732+
Predicate: storage.Everything,
1733+
Recursive: true,
1734+
}
1735+
1736+
// Run N concurrent watches. Given the asynchronous nature, we increase the
1737+
// probability of hitting the race in at least one of those watches.
1738+
concurrentWatches := 10
1739+
wg := sync.WaitGroup{}
1740+
for i := 0; i < concurrentWatches; i++ {
1741+
wg.Add(1)
1742+
go func() {
1743+
defer wg.Done()
1744+
w, err := store.Watch(ctx, "/pods", opts)
1745+
if err != nil {
1746+
t.Errorf("failed to create watch: %v", err)
1747+
return
1748+
}
1749+
1750+
// We issue the watch starting from object bar.
1751+
// The object fails TransformFromStorage and generates ERROR watch event.
1752+
// The further events (i.e. ADDED event for baz object) should not be
1753+
// emitted, so we verify no events other than ERROR type are emitted.
1754+
for {
1755+
event, ok := <-w.ResultChan()
1756+
if !ok {
1757+
break
1758+
}
1759+
if event.Type != watch.Error {
1760+
t.Errorf("unexpected event: %#v", event)
1761+
}
1762+
}
1763+
}()
1764+
}
1765+
wg.Wait()
1766+
}
1767+
17011768
func makePod(namePrefix string) *example.Pod {
17021769
return &example.Pod{
17031770
ObjectMeta: metav1.ObjectMeta{

0 commit comments

Comments
 (0)