Skip to content

Commit 67e5375

Browse files
committed
syscall: use CLONE_VFORK safely
Currently, CLONE_VFORK is used without much regard to the stack. This is dangerous, because anything the child does to the stack is visible to the parent. For example, if the compiler were to reuse named stack slots (which it currently doesn't do), it would be easy for the child running in the same stack frame as the parent to corrupt local variables that the parent then depended on. We're not sure of anything specific going wrong in this code right now, but it is at best a ticking time bomb. CLONE_VFORK can only safely be used if we ensure the child does not execute in any of the active stack frames of the parent. This commit implements this by arranging for the parent to return immediately from the frame the child will operate in, and for the child to never return to the frame the parent will operate in. Fixes #20732. Change-Id: Iad5b4ddc2b994c082bd278bfd52ef53bd38c037f Reviewed-on: https://go-review.googlesource.com/46173 Run-TryBot: Austin Clements <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Ian Lance Taylor <[email protected]>
1 parent 5a5ac34 commit 67e5375

File tree

1 file changed

+45
-25
lines changed

1 file changed

+45
-25
lines changed

src/syscall/exec_linux.go

+45-25
Original file line numberDiff line numberDiff line change
@@ -63,15 +63,46 @@ func runtime_AfterForkInChild()
6363
// functions that do not grow the stack.
6464
//go:norace
6565
func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr *ProcAttr, sys *SysProcAttr, pipe int) (pid int, err Errno) {
66+
// Set up and fork. This returns immediately in the parent or
67+
// if there's an error.
68+
r1, err1, p, locked := forkAndExecInChild1(argv0, argv, envv, chroot, dir, attr, sys, pipe)
69+
if locked {
70+
runtime_AfterFork()
71+
}
72+
if err1 != 0 {
73+
return 0, err1
74+
}
75+
76+
// parent; return PID
77+
pid = int(r1)
78+
79+
if sys.UidMappings != nil || sys.GidMappings != nil {
80+
Close(p[0])
81+
err := writeUidGidMappings(pid, sys)
82+
var err2 Errno
83+
if err != nil {
84+
err2 = err.(Errno)
85+
}
86+
RawSyscall(SYS_WRITE, uintptr(p[1]), uintptr(unsafe.Pointer(&err2)), unsafe.Sizeof(err2))
87+
Close(p[1])
88+
}
89+
90+
return pid, 0
91+
}
92+
93+
//go:norace
94+
func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr *ProcAttr, sys *SysProcAttr, pipe int) (r1 uintptr, err1 Errno, p [2]int, locked bool) {
95+
// vfork requires that the child not touch any of the parent's
96+
// active stack frames. Hence, the child does all post-fork
97+
// processing in this stack frame and never returns, while the
98+
// parent returns immediately from this frame and does all
99+
// post-fork processing in the outer frame.
66100
// Declare all variables at top in case any
67101
// declarations require heap allocation (e.g., err1).
68102
var (
69-
r1 uintptr
70-
err1 Errno
71103
err2 Errno
72104
nextfd int
73105
i int
74-
p [2]int
75106
)
76107

77108
// Record parent PID so child can test if it has died.
@@ -94,13 +125,15 @@ func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr
94125
// synchronizing writing of User ID/Group ID mappings.
95126
if sys.UidMappings != nil || sys.GidMappings != nil {
96127
if err := forkExecPipe(p[:]); err != nil {
97-
return 0, err.(Errno)
128+
err1 = err.(Errno)
129+
return
98130
}
99131
}
100132

101133
// About to call fork.
102134
// No more allocation or calls of non-assembly functions.
103135
runtime_BeforeFork()
136+
locked = true
104137
switch {
105138
case runtime.GOARCH == "amd64" && sys.Cloneflags&CLONE_NEWUSER == 0:
106139
r1, err1 = rawVforkSyscall(SYS_CLONE, uintptr(SIGCHLD|CLONE_VFORK|CLONE_VM)|sys.Cloneflags)
@@ -109,27 +142,14 @@ func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr
109142
default:
110143
r1, _, err1 = RawSyscall6(SYS_CLONE, uintptr(SIGCHLD)|sys.Cloneflags, 0, 0, 0, 0, 0)
111144
}
112-
if err1 != 0 {
113-
runtime_AfterFork()
114-
return 0, err1
115-
}
116-
117-
if r1 != 0 {
118-
// parent; return PID
119-
runtime_AfterFork()
120-
pid = int(r1)
121-
122-
if sys.UidMappings != nil || sys.GidMappings != nil {
123-
Close(p[0])
124-
err := writeUidGidMappings(pid, sys)
125-
if err != nil {
126-
err2 = err.(Errno)
127-
}
128-
RawSyscall(SYS_WRITE, uintptr(p[1]), uintptr(unsafe.Pointer(&err2)), unsafe.Sizeof(err2))
129-
Close(p[1])
130-
}
131-
132-
return pid, 0
145+
if err1 != 0 || r1 != 0 {
146+
// If we're in the parent, we must return immediately
147+
// so we're not in the same stack frame as the child.
148+
// This can at most use the return PC, which the child
149+
// will not modify, and the results of
150+
// rawVforkSyscall, which must have been written after
151+
// the child was replaced.
152+
return
133153
}
134154

135155
// Fork succeeded, now in child.

0 commit comments

Comments
 (0)