Skip to content

runtime: let the fault thread to crash the process #63666

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
216 changes: 180 additions & 36 deletions src/runtime/runtime-gdb_unix_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,43 @@ import (
"testing"
)

func canGenerateCore(t *testing.T) bool {
// Ensure there is enough RLIMIT_CORE available to generate a full core.
var lim syscall.Rlimit
err := syscall.Getrlimit(syscall.RLIMIT_CORE, &lim)
if err != nil {
t.Fatalf("error getting rlimit: %v", err)
}
// Minimum RLIMIT_CORE max to allow. This is a conservative estimate.
// Most systems allow infinity.
const minRlimitCore = 100 << 20 // 100 MB
if lim.Max < minRlimitCore {
t.Skipf("RLIMIT_CORE max too low: %#+v", lim)
}

// Make sure core pattern will send core to the current directory.
b, err := os.ReadFile("/proc/sys/kernel/core_pattern")
if err != nil {
t.Fatalf("error reading core_pattern: %v", err)
}
if string(b) != "core\n" {
t.Skipf("Unexpected core pattern %q", string(b))
}

coreUsesPID := false
b, err = os.ReadFile("/proc/sys/kernel/core_uses_pid")
if err == nil {
switch string(bytes.TrimSpace(b)) {
case "0":
case "1":
coreUsesPID = true
default:
t.Skipf("unexpected core_uses_pid value %q", string(b))
}
}
return coreUsesPID
}

const coreSignalSource = `
package main

Expand Down Expand Up @@ -81,45 +118,12 @@ func TestGdbCoreSignalBacktrace(t *testing.T) {
t.Parallel()
checkGdbVersion(t)

// Ensure there is enough RLIMIT_CORE available to generate a full core.
var lim syscall.Rlimit
err := syscall.Getrlimit(syscall.RLIMIT_CORE, &lim)
if err != nil {
t.Fatalf("error getting rlimit: %v", err)
}
// Minimum RLIMIT_CORE max to allow. This is a conservative estimate.
// Most systems allow infinity.
const minRlimitCore = 100 << 20 // 100 MB
if lim.Max < minRlimitCore {
t.Skipf("RLIMIT_CORE max too low: %#+v", lim)
}

// Make sure core pattern will send core to the current directory.
b, err := os.ReadFile("/proc/sys/kernel/core_pattern")
if err != nil {
t.Fatalf("error reading core_pattern: %v", err)
}
if string(b) != "core\n" {
t.Skipf("Unexpected core pattern %q", string(b))
}

coreUsesPID := false
b, err = os.ReadFile("/proc/sys/kernel/core_uses_pid")
if err == nil {
switch string(bytes.TrimSpace(b)) {
case "0":
case "1":
coreUsesPID = true
default:
t.Skipf("unexpected core_uses_pid value %q", string(b))
}
}

dir := t.TempDir()
coreUsesPID := canGenerateCore(t)

// Build the source code.
dir := t.TempDir()
src := filepath.Join(dir, "main.go")
err = os.WriteFile(src, []byte(coreSignalSource), 0644)
err := os.WriteFile(src, []byte(coreSignalSource), 0644)
if err != nil {
t.Fatalf("failed to create file: %v", err)
}
Expand Down Expand Up @@ -230,3 +234,143 @@ func TestGdbCoreSignalBacktrace(t *testing.T) {
t.Fatalf("could not find runtime symbol in backtrace after signal handler:\n%s", rest)
}
}

const coreCrashThreadSource = `
package main

/*
#cgo CFLAGS: -g -O0
#include <stdio.h>
#include <stddef.h>
void trigger_crash()
{
int* ptr = NULL;
*ptr = 1024;
}
*/
import "C"
import (
"flag"
"fmt"
"os"
"runtime/debug"
"syscall"
)

func enableCore() {
debug.SetTraceback("crash")

var lim syscall.Rlimit
err := syscall.Getrlimit(syscall.RLIMIT_CORE, &lim)
if err != nil {
panic(fmt.Sprintf("error getting rlimit: %v", err))
}
lim.Cur = lim.Max
fmt.Fprintf(os.Stderr, "Setting RLIMIT_CORE = %+#v\n", lim)
err = syscall.Setrlimit(syscall.RLIMIT_CORE, &lim)
if err != nil {
panic(fmt.Sprintf("error setting rlimit: %v", err))
}
}

func main() {
flag.Parse()

enableCore()

C.trigger_crash()
}
`

// TestGdbCoreCrashThreadBacktrace tests that runtime could let the fault thread to crash process
// and make fault thread as number one thread while gdb in a core file
func TestGdbCoreCrashThreadBacktrace(t *testing.T) {
if runtime.GOOS != "linux" {
// N.B. This test isn't fundamentally Linux-only, but it needs
// to know how to enable/find core files on each OS.
t.Skip("Test only supported on Linux")
}
if runtime.GOARCH != "386" && runtime.GOARCH != "amd64" {
// TODO(go.dev/issue/25218): Other architectures use sigreturn
// via VDSO, which we somehow don't handle correctly.
t.Skip("Backtrace through signal handler only works on 386 and amd64")
}

checkGdbEnvironment(t)
t.Parallel()
checkGdbVersion(t)

coreUsesPID := canGenerateCore(t)

// Build the source code.
dir := t.TempDir()
src := filepath.Join(dir, "main.go")
err := os.WriteFile(src, []byte(coreCrashThreadSource), 0644)
if err != nil {
t.Fatalf("failed to create file: %v", err)
}
cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "a.exe", "main.go")
cmd.Dir = dir
out, err := testenv.CleanCmdEnv(cmd).CombinedOutput()
if err != nil {
t.Fatalf("building source %v\n%s", err, out)
}

// Start the test binary.
cmd = testenv.Command(t, "./a.exe")
cmd.Dir = dir
var output bytes.Buffer
cmd.Stdout = &output // for test logging
cmd.Stderr = &output

if err := cmd.Start(); err != nil {
t.Fatalf("error starting test binary: %v", err)
}

pid := cmd.Process.Pid

err = cmd.Wait()
t.Logf("child output:\n%s", output.String())
if err == nil {
t.Fatalf("Wait succeeded, want SIGABRT")
}
ee, ok := err.(*exec.ExitError)
if !ok {
t.Fatalf("Wait err got %T %v, want exec.ExitError", ee, ee)
}
ws, ok := ee.Sys().(syscall.WaitStatus)
if !ok {
t.Fatalf("Sys got %T %v, want syscall.WaitStatus", ee.Sys(), ee.Sys())
}
if ws.Signal() != syscall.SIGABRT {
t.Fatalf("Signal got %d want SIGABRT", ws.Signal())
}
if !ws.CoreDump() {
t.Fatalf("CoreDump got %v want true", ws.CoreDump())
}

coreFile := "core"
if coreUsesPID {
coreFile += fmt.Sprintf(".%d", pid)
}

// Execute gdb commands.
args := []string{"-nx", "-batch",
"-iex", "add-auto-load-safe-path " + filepath.Join(testenv.GOROOT(t), "src", "runtime"),
"-ex", "backtrace",
filepath.Join(dir, "a.exe"),
filepath.Join(dir, coreFile),
}
cmd = testenv.Command(t, "gdb", args...)

got, err := cmd.CombinedOutput()
t.Logf("gdb output:\n%s", got)
if err != nil {
t.Fatalf("gdb exited with error: %v", err)
}

re := regexp.MustCompile(`#.* trigger_crash`)
if found := re.Find(got) != nil; !found {
t.Fatalf("could not find trigger_crash in backtrace")
}
}
37 changes: 26 additions & 11 deletions src/runtime/signal_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -597,7 +597,7 @@ func adjustSignalStack(sig uint32, mp *m, gsigStack *gsignalStack) bool {

// crashing is the number of m's we have waited for when implementing
// GOTRACEBACK=crash when a signal is received.
var crashing int32
var crashing atomic.Int32

// testSigtrap and testSigusr1 are used by the runtime tests. If
// non-nil, it is called on SIGTRAP/SIGUSR1. If it returns true, the
Expand Down Expand Up @@ -730,7 +730,7 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
mp.throwing = throwTypeRuntime
mp.caughtsig.set(gp)

if crashing == 0 {
if crashing.Load() == 0 {
startpanic_m()
}

Expand All @@ -740,32 +740,47 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
if level > 0 {
goroutineheader(gp)
tracebacktrap(c.sigpc(), c.sigsp(), c.siglr(), gp)
if crashing > 0 && gp != mp.curg && mp.curg != nil && readgstatus(mp.curg)&^_Gscan == _Grunning {
if crashing.Load() > 0 && gp != mp.curg && mp.curg != nil && readgstatus(mp.curg)&^_Gscan == _Grunning {
// tracebackothers on original m skipped this one; trace it now.
goroutineheader(mp.curg)
traceback(^uintptr(0), ^uintptr(0), 0, mp.curg)
} else if crashing == 0 {
} else if crashing.Load() == 0 {
tracebackothers(gp)
print("\n")
}
dumpregs(c)
}

if docrash {
crashing++
if crashing < mcount()-int32(extraMLength.Load()) {
isCrashThread := false
if crashing.CompareAndSwap(0, 1) {
isCrashThread = true
} else {
crashing.Add(1)
}
if crashing.Load() < mcount()-int32(extraMLength.Load()) {
// There are other m's that need to dump their stacks.
// Relay SIGQUIT to the next m by sending it to the current process.
// All m's that have already received SIGQUIT have signal masks blocking
// receipt of any signals, so the SIGQUIT will go to an m that hasn't seen it yet.
// When the last m receives the SIGQUIT, it will fall through to the call to
// crash below. Just in case the relaying gets botched, each m involved in
// The first m will wait until all ms received the SIGQUIT, then crash/exit.
// Just in case the relaying gets botched, each m involved in
// the relay sleeps for 5 seconds and then does the crash/exit itself.
// In expected operation, the last m has received the SIGQUIT and run
// crash/exit and the process is gone, all long before any of the
// 5-second sleeps have finished.
// The faulting m is crashing first so it is the faulting thread in the core dump (see issue #63277):
// in expected operation, the first m will wait until the last m has received the SIGQUIT,
// and then run crash/exit and the process is gone.
// However, if it spends more than 5 seconds to send SIGQUIT to all ms,
// any of ms may crash/exit the process after waiting for 5 seconds.
print("\n-----\n\n")
raiseproc(_SIGQUIT)
}
if isCrashThread {
i := 0
for (crashing.Load() < mcount()-int32(extraMLength.Load())) && i < 10 {
i++
usleep(500 * 1000)
}
} else {
usleep(5 * 1000 * 1000)
}
printDebugLog()
Expand Down