Skip to content

Commit 27e49cd

Browse files
Properly flush unique queues on startup (#23154)
There have been a number of reports of PRs being blocked whilst being checked which have been difficult to debug. In investigating #23050 I have realised that whilst the Warn there is somewhat of a miscall there was a real bug in the way that the LevelUniqueQueue was being restored on start-up of the PersistableChannelUniqueQueue. Next there is a conflict in the setting of the internal leveldb queue name - This wasn't being set so it was being overridden by other unique queues. This PR fixes these bugs and adds a testcase. Thanks to @brechtvl for noticing the second issue. Fix #23050 and others --------- Signed-off-by: Andrew Thornton <[email protected]> Co-authored-by: techknowlogick <[email protected]>
1 parent 04347eb commit 27e49cd

7 files changed

+332
-21
lines changed

Diff for: modules/queue/queue_channel.go

+4-1
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,10 @@ func (q *ChannelQueue) Shutdown() {
124124
log.Trace("ChannelQueue: %s Flushing", q.name)
125125
// We can't use Cleanup here because that will close the channel
126126
if err := q.FlushWithContext(q.terminateCtx); err != nil {
127-
log.Warn("ChannelQueue: %s Terminated before completed flushing", q.name)
127+
count := atomic.LoadInt64(&q.numInQueue)
128+
if count > 0 {
129+
log.Warn("ChannelQueue: %s Terminated before completed flushing", q.name)
130+
}
128131
return
129132
}
130133
log.Debug("ChannelQueue: %s Flushed", q.name)

Diff for: modules/queue/queue_disk_channel.go

+22-7
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,8 @@ func NewPersistableChannelQueue(handle HandlerFunc, cfg, exemplar interface{}) (
9494
},
9595
Workers: 0,
9696
},
97-
DataDir: config.DataDir,
97+
DataDir: config.DataDir,
98+
QueueName: config.Name + "-level",
9899
}
99100

100101
levelQueue, err := NewLevelQueue(wrappedHandle, levelCfg, exemplar)
@@ -172,16 +173,18 @@ func (q *PersistableChannelQueue) Run(atShutdown, atTerminate func(func())) {
172173
atShutdown(q.Shutdown)
173174
atTerminate(q.Terminate)
174175

175-
if lq, ok := q.internal.(*LevelQueue); ok && lq.byteFIFO.Len(lq.shutdownCtx) != 0 {
176+
if lq, ok := q.internal.(*LevelQueue); ok && lq.byteFIFO.Len(lq.terminateCtx) != 0 {
176177
// Just run the level queue - we shut it down once it's flushed
177178
go q.internal.Run(func(_ func()) {}, func(_ func()) {})
178179
go func() {
179-
for !q.IsEmpty() {
180-
_ = q.internal.Flush(0)
180+
for !lq.IsEmpty() {
181+
_ = lq.Flush(0)
181182
select {
182183
case <-time.After(100 * time.Millisecond):
183-
case <-q.internal.(*LevelQueue).shutdownCtx.Done():
184-
log.Warn("LevelQueue: %s shut down before completely flushed", q.internal.(*LevelQueue).Name())
184+
case <-lq.shutdownCtx.Done():
185+
if lq.byteFIFO.Len(lq.terminateCtx) > 0 {
186+
log.Warn("LevelQueue: %s shut down before completely flushed", q.internal.(*LevelQueue).Name())
187+
}
185188
return
186189
}
187190
}
@@ -316,10 +319,22 @@ func (q *PersistableChannelQueue) Shutdown() {
316319
// Redirect all remaining data in the chan to the internal channel
317320
log.Trace("PersistableChannelQueue: %s Redirecting remaining data", q.delayedStarter.name)
318321
close(q.channelQueue.dataChan)
322+
countOK, countLost := 0, 0
319323
for data := range q.channelQueue.dataChan {
320-
_ = q.internal.Push(data)
324+
err := q.internal.Push(data)
325+
if err != nil {
326+
log.Error("PersistableChannelQueue: %s Unable redirect %v due to: %v", q.delayedStarter.name, data, err)
327+
countLost++
328+
} else {
329+
countOK++
330+
}
321331
atomic.AddInt64(&q.channelQueue.numInQueue, -1)
322332
}
333+
if countLost > 0 {
334+
log.Warn("PersistableChannelQueue: %s %d will be restored on restart, %d lost", q.delayedStarter.name, countOK, countLost)
335+
} else if countOK > 0 {
336+
log.Warn("PersistableChannelQueue: %s %d will be restored on restart", q.delayedStarter.name, countOK)
337+
}
323338
log.Trace("PersistableChannelQueue: %s Done Redirecting remaining data", q.delayedStarter.name)
324339

325340
log.Debug("PersistableChannelQueue: %s Shutdown", q.delayedStarter.name)

Diff for: modules/queue/queue_disk_channel_test.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ func TestPersistableChannelQueue(t *testing.T) {
3939
Workers: 1,
4040
BoostWorkers: 0,
4141
MaxWorkers: 10,
42-
Name: "first",
42+
Name: "test-queue",
4343
}, &testData{})
4444
assert.NoError(t, err)
4545

@@ -135,7 +135,7 @@ func TestPersistableChannelQueue(t *testing.T) {
135135
Workers: 1,
136136
BoostWorkers: 0,
137137
MaxWorkers: 10,
138-
Name: "second",
138+
Name: "test-queue",
139139
}, &testData{})
140140
assert.NoError(t, err)
141141

@@ -227,7 +227,7 @@ func TestPersistableChannelQueue_Pause(t *testing.T) {
227227
Workers: 1,
228228
BoostWorkers: 0,
229229
MaxWorkers: 10,
230-
Name: "first",
230+
Name: "test-queue",
231231
}, &testData{})
232232
assert.NoError(t, err)
233233

@@ -433,7 +433,7 @@ func TestPersistableChannelQueue_Pause(t *testing.T) {
433433
Workers: 1,
434434
BoostWorkers: 0,
435435
MaxWorkers: 10,
436-
Name: "second",
436+
Name: "test-queue",
437437
}, &testData{})
438438
assert.NoError(t, err)
439439
pausable, ok = queue.(Pausable)

Diff for: modules/queue/unique_queue_channel.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,9 @@ func (q *ChannelUniqueQueue) Shutdown() {
177177
go func() {
178178
log.Trace("ChannelUniqueQueue: %s Flushing", q.name)
179179
if err := q.FlushWithContext(q.terminateCtx); err != nil {
180-
log.Warn("ChannelUniqueQueue: %s Terminated before completed flushing", q.name)
180+
if !q.IsEmpty() {
181+
log.Warn("ChannelUniqueQueue: %s Terminated before completed flushing", q.name)
182+
}
181183
return
182184
}
183185
log.Debug("ChannelUniqueQueue: %s Flushed", q.name)

Diff for: modules/queue/unique_queue_channel_test.go

+7
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,13 @@ import (
88
"testing"
99
"time"
1010

11+
"code.gitea.io/gitea/modules/log"
12+
1113
"github.com/stretchr/testify/assert"
1214
)
1315

1416
func TestChannelUniqueQueue(t *testing.T) {
17+
_ = log.NewLogger(1000, "console", "console", `{"level":"warn","stacktracelevel":"NONE","stderr":true}`)
1518
handleChan := make(chan *testData)
1619
handle := func(data ...Data) []Data {
1720
for _, datum := range data {
@@ -52,6 +55,8 @@ func TestChannelUniqueQueue(t *testing.T) {
5255
}
5356

5457
func TestChannelUniqueQueue_Batch(t *testing.T) {
58+
_ = log.NewLogger(1000, "console", "console", `{"level":"warn","stacktracelevel":"NONE","stderr":true}`)
59+
5560
handleChan := make(chan *testData)
5661
handle := func(data ...Data) []Data {
5762
for _, datum := range data {
@@ -98,6 +103,8 @@ func TestChannelUniqueQueue_Batch(t *testing.T) {
98103
}
99104

100105
func TestChannelUniqueQueue_Pause(t *testing.T) {
106+
_ = log.NewLogger(1000, "console", "console", `{"level":"warn","stacktracelevel":"NONE","stderr":true}`)
107+
101108
lock := sync.Mutex{}
102109
var queue Queue
103110
var err error

Diff for: modules/queue/unique_queue_disk_channel.go

+33-8
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,8 @@ func NewPersistableChannelUniqueQueue(handle HandlerFunc, cfg, exemplar interfac
9494
},
9595
Workers: 0,
9696
},
97-
DataDir: config.DataDir,
97+
DataDir: config.DataDir,
98+
QueueName: config.Name + "-level",
9899
}
99100

100101
queue.channelQueue = channelUniqueQueue.(*ChannelUniqueQueue)
@@ -209,17 +210,29 @@ func (q *PersistableChannelUniqueQueue) Run(atShutdown, atTerminate func(func())
209210
atTerminate(q.Terminate)
210211
_ = q.channelQueue.AddWorkers(q.channelQueue.workers, 0)
211212

212-
if luq, ok := q.internal.(*LevelUniqueQueue); ok && luq.ByteFIFOUniqueQueue.byteFIFO.Len(luq.shutdownCtx) != 0 {
213+
if luq, ok := q.internal.(*LevelUniqueQueue); ok && !luq.IsEmpty() {
213214
// Just run the level queue - we shut it down once it's flushed
214-
go q.internal.Run(func(_ func()) {}, func(_ func()) {})
215+
go luq.Run(func(_ func()) {}, func(_ func()) {})
215216
go func() {
216-
_ = q.internal.Flush(0)
217-
log.Debug("LevelUniqueQueue: %s flushed so shutting down", q.internal.(*LevelUniqueQueue).Name())
218-
q.internal.(*LevelUniqueQueue).Shutdown()
219-
GetManager().Remove(q.internal.(*LevelUniqueQueue).qid)
217+
_ = luq.Flush(0)
218+
for !luq.IsEmpty() {
219+
_ = luq.Flush(0)
220+
select {
221+
case <-time.After(100 * time.Millisecond):
222+
case <-luq.shutdownCtx.Done():
223+
if luq.byteFIFO.Len(luq.terminateCtx) > 0 {
224+
log.Warn("LevelUniqueQueue: %s shut down before completely flushed", luq.Name())
225+
}
226+
return
227+
}
228+
}
229+
log.Debug("LevelUniqueQueue: %s flushed so shutting down", luq.Name())
230+
luq.Shutdown()
231+
GetManager().Remove(luq.qid)
220232
}()
221233
} else {
222234
log.Debug("PersistableChannelUniqueQueue: %s Skipping running the empty level queue", q.delayedStarter.name)
235+
_ = q.internal.Flush(0)
223236
q.internal.(*LevelUniqueQueue).Shutdown()
224237
GetManager().Remove(q.internal.(*LevelUniqueQueue).qid)
225238
}
@@ -285,8 +298,20 @@ func (q *PersistableChannelUniqueQueue) Shutdown() {
285298
// Redirect all remaining data in the chan to the internal channel
286299
close(q.channelQueue.dataChan)
287300
log.Trace("PersistableChannelUniqueQueue: %s Redirecting remaining data", q.delayedStarter.name)
301+
countOK, countLost := 0, 0
288302
for data := range q.channelQueue.dataChan {
289-
_ = q.internal.Push(data)
303+
err := q.internal.(*LevelUniqueQueue).Push(data)
304+
if err != nil {
305+
log.Error("PersistableChannelUniqueQueue: %s Unable redirect %v due to: %v", q.delayedStarter.name, data, err)
306+
countLost++
307+
} else {
308+
countOK++
309+
}
310+
}
311+
if countLost > 0 {
312+
log.Warn("PersistableChannelUniqueQueue: %s %d will be restored on restart, %d lost", q.delayedStarter.name, countOK, countLost)
313+
} else if countOK > 0 {
314+
log.Warn("PersistableChannelUniqueQueue: %s %d will be restored on restart", q.delayedStarter.name, countOK)
290315
}
291316
log.Trace("PersistableChannelUniqueQueue: %s Done Redirecting remaining data", q.delayedStarter.name)
292317

0 commit comments

Comments
 (0)