Skip to content

Commit f7b139e

Browse files
committed
Fixed data race in schedule
1 parent ab48faa commit f7b139e

File tree

2 files changed

+70
-66
lines changed

2 files changed

+70
-66
lines changed

scheduler/scheduler.go

+43-27
Original file line numberDiff line numberDiff line change
@@ -111,18 +111,18 @@ func (s *Scheduler) work() {
111111
r := <-s.scheduledRuns
112112

113113
// Prepare execution and start it
114-
s.prepareAndExec(&r)
114+
s.prepareAndExec(r)
115115
}
116116
}
117117

118118
// prepareAndExec does the real preparation and start the execution.
119-
func (s *Scheduler) prepareAndExec(r *gaia.PipelineRun) {
119+
func (s *Scheduler) prepareAndExec(r gaia.PipelineRun) {
120120
// Mark the scheduled run as running
121121
r.Status = gaia.RunRunning
122122
r.StartDate = time.Now()
123123

124124
// Update entry in store
125-
err := s.storeService.PipelinePutRun(r)
125+
err := s.storeService.PipelinePutRun(&r)
126126
if err != nil {
127127
gaia.Cfg.Logger.Debug("could not put pipeline run into store during executing work", "error", err.Error())
128128
return
@@ -138,14 +138,14 @@ func (s *Scheduler) prepareAndExec(r *gaia.PipelineRun) {
138138

139139
// Update store
140140
r.Status = gaia.RunFailed
141-
s.storeService.PipelinePutRun(r)
141+
s.storeService.PipelinePutRun(&r)
142142
return
143143
}
144144

145145
// Check if this pipeline has jobs declared
146146
if len(r.Jobs) == 0 {
147147
// Finish pipeline run
148-
s.finishPipelineRun(r, gaia.RunSuccess)
148+
s.finishPipelineRun(&r, gaia.RunSuccess)
149149
return
150150
}
151151

@@ -160,7 +160,7 @@ func (s *Scheduler) prepareAndExec(r *gaia.PipelineRun) {
160160
c := createPipelineCmd(pipeline)
161161
if c == nil {
162162
gaia.Cfg.Logger.Debug("cannot create pipeline start command", "error", errCreateCMDForPipeline.Error())
163-
s.finishPipelineRun(r, gaia.RunFailed)
163+
s.finishPipelineRun(&r, gaia.RunFailed)
164164
return
165165
}
166166

@@ -171,14 +171,14 @@ func (s *Scheduler) prepareAndExec(r *gaia.PipelineRun) {
171171
path = filepath.Join(path, gaia.LogsFileName)
172172
if err := pS.Connect(c, &path); err != nil {
173173
gaia.Cfg.Logger.Debug("cannot connect to pipeline", "error", err.Error(), "pipeline", pipeline)
174-
s.finishPipelineRun(r, gaia.RunFailed)
174+
s.finishPipelineRun(&r, gaia.RunFailed)
175175
return
176176
}
177177
defer pS.Close()
178178

179179
// Schedule jobs and execute them.
180180
// Also update the run in the store.
181-
s.scheduleJobsByPriority(r, pipeline, pS)
181+
s.scheduleJobsByPriority(r, pS)
182182
}
183183

184184
// schedule looks in the store for new work and schedules it.
@@ -249,21 +249,22 @@ func (s *Scheduler) SchedulePipeline(p *gaia.Pipeline) (*gaia.PipelineRun, error
249249

250250
// executeJob executes a single job.
251251
// This method is blocking.
252-
func executeJob(job *gaia.Job, pS Plugin, wg *sync.WaitGroup, triggerSave chan bool) {
252+
func executeJob(job gaia.Job, pS Plugin, wg *sync.WaitGroup, triggerSave chan gaia.Job) {
253253
defer wg.Done()
254254
defer func() {
255-
triggerSave <- true
255+
triggerSave <- job
256256
}()
257257

258258
// Set Job to running and trigger save
259259
job.Status = gaia.JobRunning
260-
triggerSave <- true
260+
triggerSave <- job
261261

262262
// Execute job
263-
if err := pS.Execute(job); err != nil {
263+
if err := pS.Execute(&job); err != nil {
264264
// TODO: Show it to user
265265
gaia.Cfg.Logger.Debug("error during job execution", "error", err.Error(), "job", job)
266266
job.Status = gaia.JobFailed
267+
return
267268
}
268269

269270
// If we are here, the job execution was ok
@@ -273,7 +274,7 @@ func executeJob(job *gaia.Job, pS Plugin, wg *sync.WaitGroup, triggerSave chan b
273274
// scheduleJobsByPriority schedules the given jobs by their respective
274275
// priority. This method is designed to be recursive and blocking.
275276
// If jobs have the same priority, they will be executed in parallel.
276-
func (s *Scheduler) scheduleJobsByPriority(r *gaia.PipelineRun, p *gaia.Pipeline, pS Plugin) {
277+
func (s *Scheduler) scheduleJobsByPriority(r gaia.PipelineRun, pS Plugin) {
277278
// Do a prescheduling and set it to the first waiting job
278279
var lowestPrio int64
279280
for _, job := range r.Jobs {
@@ -293,31 +294,54 @@ func (s *Scheduler) scheduleJobsByPriority(r *gaia.PipelineRun, p *gaia.Pipeline
293294
// We might have multiple jobs with the same priority.
294295
// It means these jobs should be started in parallel.
295296
var wg sync.WaitGroup
296-
triggerSave := make(chan bool)
297+
triggerSave := make(chan gaia.Job)
298+
done := make(chan bool)
297299
for id, job := range r.Jobs {
298300
if job.Priority == lowestPrio && job.Status == gaia.JobWaitingExec {
299301
// Increase wait group by one
300302
wg.Add(1)
301303

302304
// Execute this job in a separate goroutine
303-
go executeJob(&r.Jobs[id], pS, &wg, triggerSave)
305+
go executeJob(r.Jobs[id], pS, &wg, triggerSave)
304306
}
305307
}
306308

307309
// Create channel for storing job run results and spawn results routine
308-
go s.getJobResultsAndStore(triggerSave, r)
310+
go func() {
311+
for {
312+
j, open := <-triggerSave
313+
314+
// Channel has been closed
315+
if !open {
316+
done <- true
317+
return
318+
}
319+
320+
// Filter out the job
321+
for id, job := range r.Jobs {
322+
if job.ID == j.ID {
323+
r.Jobs[id].Status = j.Status
324+
break
325+
}
326+
}
327+
328+
// Store update
329+
s.storeService.PipelinePutRun(&r)
330+
}
331+
}()
309332

310333
// Wait until all jobs have been finished and close results channel
311334
wg.Wait()
312335
close(triggerSave)
336+
<-done
313337

314338
// Check if a job has been failed. If so, stop execution.
315339
// We also check if all jobs has been executed.
316340
var notExecJob bool
317341
for _, job := range r.Jobs {
318342
switch job.Status {
319343
case gaia.JobFailed:
320-
s.finishPipelineRun(r, gaia.RunFailed)
344+
s.finishPipelineRun(&r, gaia.RunFailed)
321345
return
322346
case gaia.JobWaitingExec:
323347
notExecJob = true
@@ -326,20 +350,12 @@ func (s *Scheduler) scheduleJobsByPriority(r *gaia.PipelineRun, p *gaia.Pipeline
326350

327351
// All jobs have been executed
328352
if !notExecJob {
329-
s.finishPipelineRun(r, gaia.RunSuccess)
353+
s.finishPipelineRun(&r, gaia.RunSuccess)
330354
return
331355
}
332356

333357
// Run scheduleJobsByPriority again until all jobs have been executed
334-
s.scheduleJobsByPriority(r, p, pS)
335-
}
336-
337-
// getJobResultsAndStore
338-
func (s *Scheduler) getJobResultsAndStore(triggerSave chan bool, r *gaia.PipelineRun) {
339-
for range triggerSave {
340-
// Store update
341-
s.storeService.PipelinePutRun(r)
342-
}
358+
s.scheduleJobsByPriority(r, pS)
343359
}
344360

345361
// getPipelineJobs uses the plugin system to get all jobs from the given pipeline.

scheduler/scheduler_test.go

+27-39
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,12 @@ import (
1414
uuid "github.com/satori/go.uuid"
1515
)
1616

17-
type PluginFake struct {
18-
// Fake struct
19-
jobs []gaia.Job
20-
}
21-
22-
var pluginFake *PluginFake
17+
type PluginFake struct{}
2318

24-
func (p *PluginFake) NewPlugin() Plugin {
25-
return &PluginFake{}
26-
}
19+
func (p *PluginFake) NewPlugin() Plugin { return &PluginFake{} }
2720
func (p *PluginFake) Connect(cmd *exec.Cmd, logPath *string) error { return nil }
2821
func (p *PluginFake) Execute(j *gaia.Job) error { return nil }
29-
func (p *PluginFake) GetJobs() ([]gaia.Job, error) { return pluginFake.jobs, nil }
22+
func (p *PluginFake) GetJobs() ([]gaia.Job, error) { return prepareJobs(), nil }
3023
func (p *PluginFake) Close() {}
3124

3225
func TestInit(t *testing.T) {
@@ -44,8 +37,7 @@ func TestInit(t *testing.T) {
4437
if err := storeInstance.Init(); err != nil {
4538
t.Fatal(err)
4639
}
47-
pluginFake = &PluginFake{}
48-
s := NewScheduler(storeInstance, pluginFake)
40+
s := NewScheduler(storeInstance, &PluginFake{})
4941
err := s.Init()
5042
if err != nil {
5143
t.Fatal(err)
@@ -72,10 +64,8 @@ func TestPrepareAndExec(t *testing.T) {
7264
t.Fatal(err)
7365
}
7466
p, r := prepareTestData()
75-
storeInstance.PipelinePut(p)
76-
pluginFake = &PluginFake{}
77-
pluginFake.jobs = p.Jobs
78-
s := NewScheduler(storeInstance, pluginFake)
67+
storeInstance.PipelinePut(&p)
68+
s := NewScheduler(storeInstance, &PluginFake{})
7969
s.prepareAndExec(r)
8070

8171
// Iterate jobs
@@ -108,15 +98,13 @@ func TestSchedulePipeline(t *testing.T) {
10898
t.Fatal(err)
10999
}
110100
p, _ := prepareTestData()
111-
storeInstance.PipelinePut(p)
112-
pluginFake = &PluginFake{}
113-
pluginFake.jobs = p.Jobs
114-
s := NewScheduler(storeInstance, pluginFake)
101+
storeInstance.PipelinePut(&p)
102+
s := NewScheduler(storeInstance, &PluginFake{})
115103
err := s.Init()
116104
if err != nil {
117105
t.Fatal(err)
118106
}
119-
_, err = s.SchedulePipeline(p)
107+
_, err = s.SchedulePipeline(&p)
120108
if err != nil {
121109
t.Fatal(err)
122110
}
@@ -143,15 +131,13 @@ func TestSchedule(t *testing.T) {
143131
t.Fatal(err)
144132
}
145133
p, _ := prepareTestData()
146-
storeInstance.PipelinePut(p)
147-
pluginFake = &PluginFake{}
148-
pluginFake.jobs = p.Jobs
149-
s := NewScheduler(storeInstance, pluginFake)
134+
storeInstance.PipelinePut(&p)
135+
s := NewScheduler(storeInstance, &PluginFake{})
150136
err := s.Init()
151137
if err != nil {
152138
t.Fatal(err)
153139
}
154-
_, err = s.SchedulePipeline(p)
140+
_, err = s.SchedulePipeline(&p)
155141
if err != nil {
156142
t.Fatal(err)
157143
}
@@ -188,11 +174,9 @@ func TestSetPipelineJobs(t *testing.T) {
188174
t.Fatal(err)
189175
}
190176
p, _ := prepareTestData()
191-
pluginFake = &PluginFake{}
192-
pluginFake.jobs = p.Jobs
193177
p.Jobs = nil
194-
s := NewScheduler(storeInstance, pluginFake)
195-
err := s.SetPipelineJobs(p)
178+
s := NewScheduler(storeInstance, &PluginFake{})
179+
err := s.SetPipelineJobs(&p)
196180
if err != nil {
197181
t.Fatal(err)
198182
}
@@ -205,7 +189,7 @@ func TestSetPipelineJobs(t *testing.T) {
205189
}
206190
}
207191

208-
func prepareTestData() (pipeline *gaia.Pipeline, pipelineRun *gaia.PipelineRun) {
192+
func prepareJobs() []gaia.Job {
209193
job1 := gaia.Job{
210194
ID: hash("Job1"),
211195
Title: "Job1",
@@ -231,18 +215,22 @@ func prepareTestData() (pipeline *gaia.Pipeline, pipelineRun *gaia.PipelineRun)
231215
Status: gaia.JobWaitingExec,
232216
}
233217

234-
pipeline = &gaia.Pipeline{
218+
return []gaia.Job{
219+
job1,
220+
job2,
221+
job3,
222+
job4,
223+
}
224+
}
225+
226+
func prepareTestData() (pipeline gaia.Pipeline, pipelineRun gaia.PipelineRun) {
227+
pipeline = gaia.Pipeline{
235228
ID: 1,
236229
Name: "Test Pipeline",
237230
Type: gaia.PTypeGolang,
238-
Jobs: []gaia.Job{
239-
job1,
240-
job2,
241-
job3,
242-
job4,
243-
},
231+
Jobs: prepareJobs(),
244232
}
245-
pipelineRun = &gaia.PipelineRun{
233+
pipelineRun = gaia.PipelineRun{
246234
ID: 1,
247235
PipelineID: 1,
248236
Status: gaia.RunNotScheduled,

0 commit comments

Comments
 (0)