Skip to content

Commit 3ca9845

Browse files
committed
roachtest: use atomic pointer for logger
The test runner swaps out the test logger when running post test artifacts collection and checks. However, in the case of a timeout, the test goroutine may still be running and have access to the test logger. This leads to a race condition where the logger is replaced as it's being used by the test. This change switches the test logger to use an atomic pointer instead.
1 parent 0b98fd7 commit 3ca9845

File tree

5 files changed

+53
-15
lines changed

5 files changed

+53
-15
lines changed

pkg/cmd/roachtest/github_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,12 +139,12 @@ func TestCreatePostRequest(t *testing.T) {
139139

140140
ti := &testImpl{
141141
spec: testSpec,
142-
l: nilLogger(),
143142
start: time.Date(2023, time.July, 21, 16, 34, 3, 817, time.UTC),
144143
end: time.Date(2023, time.July, 21, 16, 42, 13, 137, time.UTC),
145144
cockroach: "cockroach",
146145
cockroachEA: "cockroach-ea",
147146
}
147+
ti.ReplaceL(nilLogger())
148148

149149
testClusterImpl := &clusterImpl{spec: clusterSpec, arch: vm.ArchAMD64, name: "foo"}
150150
vo := vm.DefaultCreateOpts()

pkg/cmd/roachtest/test_impl.go

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"regexp"
1515
"strings"
1616
"sync"
17+
"sync/atomic"
1718
"time"
1819

1920
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry"
@@ -77,7 +78,11 @@ type testImpl struct {
7778
buildVersion *version.Version
7879

7980
// l is the logger that the test will use for its output.
80-
l *logger.Logger
81+
//
82+
// N.B. We need to use an atomic pointer here since the test
83+
// runner can swap the logger out when running post test assertions
84+
// and artifacts collection.
85+
l atomic.Pointer[logger.Logger]
8186

8287
// taskManager manages tasks (goroutines) for tests.
8388
taskManager task.Manager
@@ -172,7 +177,7 @@ func (t *testImpl) Cockroach() string {
172177
// If the test is a benchmark test, we don't want to enable assertions
173178
// as it will slow down performance.
174179
if t.spec.Benchmark {
175-
t.l.Printf("Benchmark test, running with standard cockroach")
180+
t.L().Printf("Benchmark test, running with standard cockroach")
176181
t.randomizedCockroach = t.StandardCockroach()
177182
return
178183
}
@@ -181,20 +186,20 @@ func (t *testImpl) Cockroach() string {
181186
// The build with runtime assertions should exist in every nightly
182187
// CI build, but we can't assume it exists in every roachtest call.
183188
if path := t.RuntimeAssertionsCockroach(); path != "" {
184-
t.l.Printf("Runtime assertions enabled")
189+
t.L().Printf("Runtime assertions enabled")
185190
t.randomizedCockroach = path
186191
return
187192
} else {
188-
t.l.Printf("WARNING: running without runtime assertions since the corresponding binary was not specified")
193+
t.L().Printf("WARNING: running without runtime assertions since the corresponding binary was not specified")
189194
}
190195
}
191-
t.l.Printf("Runtime assertions disabled")
196+
t.L().Printf("Runtime assertions disabled")
192197
t.randomizedCockroach = t.StandardCockroach()
193198
case registry.StandardCockroach:
194-
t.l.Printf("Runtime assertions disabled: registry.StandardCockroach set")
199+
t.L().Printf("Runtime assertions disabled: registry.StandardCockroach set")
195200
t.randomizedCockroach = t.StandardCockroach()
196201
case registry.RuntimeAssertionsCockroach:
197-
t.l.Printf("Runtime assertions enabled: registry.RuntimeAssertionsCockroach set")
202+
t.L().Printf("Runtime assertions enabled: registry.RuntimeAssertionsCockroach set")
198203
t.randomizedCockroach = t.RuntimeAssertionsCockroach()
199204
default:
200205
t.Fatal("Specified cockroach binary does not exist.")
@@ -259,13 +264,12 @@ func (t *testImpl) SnapshotPrefix() string {
259264

260265
// L returns the test's logger.
261266
func (t *testImpl) L() *logger.Logger {
262-
return t.l
267+
return t.l.Load()
263268
}
264269

265270
// ReplaceL replaces the test's logger.
266271
func (t *testImpl) ReplaceL(l *logger.Logger) {
267-
// TODO(tbg): get rid of this, this is racy & hacky.
268-
t.l = l
272+
t.l.Store(l)
269273
}
270274

271275
func (t *testImpl) status(ctx context.Context, id int64, args ...interface{}) {

pkg/cmd/roachtest/test_impl_test.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -179,9 +179,8 @@ func Test_failuresMatchingError(t *testing.T) {
179179
}
180180

181181
func Test_failureSpecifyOwnerAndAddFailureCombination(t *testing.T) {
182-
ti := testImpl{
183-
l: nilLogger(),
184-
}
182+
ti := testImpl{}
183+
ti.ReplaceL(nilLogger())
185184
ti.addFailure(0, "", vmPreemptionError("my_VM"))
186185
errWithOwnership := failuresAsErrorWithOwnership(ti.failures())
187186

pkg/cmd/roachtest/test_runner.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -830,14 +830,14 @@ func (r *testRunner) runWorker(
830830
buildVersion: binaryVersion,
831831
artifactsDir: testArtifactsDir,
832832
artifactsSpec: artifactsSpec,
833-
l: testL,
834833
versionsBinaryOverride: topt.versionsBinaryOverride,
835834
skipInit: topt.skipInit,
836835
debug: clustersOpt.debugMode.IsDebug(),
837836
goCoverEnabled: topt.goCoverEnabled,
838837
exportOpenmetrics: topt.exportOpenMetrics,
839838
runID: generateRunID(clustersOpt),
840839
}
840+
t.ReplaceL(testL)
841841
github := newGithubIssues(r.config.disableIssue, c, vmCreateOpts)
842842

843843
// handleClusterCreationFailure can be called when the `err` given

pkg/cmd/roachtest/test_test.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -768,3 +768,38 @@ func TestVMPreemptionPolling(t *testing.T) {
768768
require.NoError(t, err)
769769
})
770770
}
771+
772+
// TestRunnerFailureAfterTimeout checks that a test has a failure added
773+
// after the test has timed out works as expected.
774+
//
775+
// Specifically, this is a regression test that replacing the test logger
776+
// for post test artifacts collection or assertion checks is atomic and
777+
// doesn't race with the logger potentially still being used by the test.
778+
func TestRunnerFailureAfterTimeout(t *testing.T) {
779+
ctx := context.Background()
780+
stopper := stop.NewStopper()
781+
defer stopper.Stop(ctx)
782+
cr := newClusterRegistry()
783+
runner := newUnitTestRunner(cr, stopper)
784+
785+
var buf syncedBuffer
786+
copt := defaultClusterOpt()
787+
lopt := defaultLoggingOpt(&buf)
788+
test := registry.TestSpec{
789+
Name: `timeout`,
790+
Owner: OwnerUnitTest,
791+
// Set the timeout very low so we can observe the timeout
792+
// and error racing.
793+
Timeout: 1 * time.Nanosecond,
794+
Cluster: spec.MakeClusterSpec(0),
795+
CompatibleClouds: registry.AllExceptAWS,
796+
Suites: registry.Suites(registry.Nightly),
797+
CockroachBinary: registry.StandardCockroach,
798+
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
799+
t.Error("test failed")
800+
},
801+
}
802+
err := runner.Run(ctx, []registry.TestSpec{test}, 1, /* count */
803+
defaultParallelism, copt, testOpts{}, lopt)
804+
require.Error(t, err)
805+
}

0 commit comments

Comments
 (0)