Skip to content

Commit f0b652e

Browse files
committed
[1.1] rootfs: try to scope MkdirAll to stay inside the rootfs
While we use SecureJoin to try to make all of our target paths inside the container safe, SecureJoin is not safe against an attacker than can change the path after we "resolve" it. os.MkdirAll can inadvertently follow symlinks and thus an attacker could end up tricking runc into creating empty directories on the host (note that the container doesn't get access to these directories, and the host just sees empty directories). However, this could potentially cause DoS issues by (for instance) creating a directory in a conf.d directory for a daemon that doesn't handle subdirectories properly. In addition, the handling for creating file bind-mounts did a plain open(O_CREAT) on the SecureJoin'd path, which is even more obviously unsafe (luckily we didn't use O_TRUNC, or this bug could've allowed an attacker to cause data loss...). Regardless of the symlink issue, opening an untrusted file could result in a DoS if the file is a hung tty or some other "nasty" file. We can use mknodat to safely create a regular file without opening anything anyway (O_CREAT|O_EXCL would also work but it makes the logic a bit more complicated, and we don't want to open the file for any particular reason anyway). libpathrs[1] is the long-term solution for these kinds of problems, but for now we can patch this particular issue by creating a more restricted MkdirAll that refuses to resolve symlinks and does the creation using file descriptors. This is loosely based on a more secure version that filepath-securejoin now has[2] and will be added to libpathrs soon[3]. [1]: https://github.com/openSUSE/libpathrs [2]: https://github.com/cyphar/filepath-securejoin/releases/tag/v0.3.0 [3]: openSUSE/libpathrs#10 Fixes: CVE-2024-45310 Signed-off-by: Aleksa Sarai <[email protected]>
1 parent 8781993 commit f0b652e

File tree

3 files changed

+176
-10
lines changed

3 files changed

+176
-10
lines changed

libcontainer/rootfs_linux.go

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ func mountCgroupV1(m *configs.Mount, c *mountConfig) error {
253253
if c.cgroupns {
254254
subsystemPath := filepath.Join(c.root, b.Destination)
255255
subsystemName := filepath.Base(b.Destination)
256-
if err := os.MkdirAll(subsystemPath, 0o755); err != nil {
256+
if err := utils.MkdirAllInRoot(c.root, subsystemPath, 0o755); err != nil {
257257
return err
258258
}
259259
if err := utils.WithProcfd(c.root, b.Destination, func(procfd string) error {
@@ -406,15 +406,26 @@ func createMountpoint(rootfs string, m *configs.Mount, mountFd *int, source stri
406406
return "", fmt.Errorf("%w: file bind mount over rootfs", errRootfsToFile)
407407
}
408408
// Make the parent directory.
409-
if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil {
409+
destDir, destBase := filepath.Split(dest)
410+
destDirFd, err := utils.MkdirAllInRootOpen(rootfs, destDir, 0o755)
411+
if err != nil {
410412
return "", fmt.Errorf("make parent dir of file bind-mount: %w", err)
411413
}
412-
// Make the target file.
413-
f, err := os.OpenFile(dest, os.O_CREATE, 0o755)
414-
if err != nil {
415-
return "", fmt.Errorf("create target of file bind-mount: %w", err)
414+
defer destDirFd.Close()
415+
// Make the target file. We want to avoid opening any file that is
416+
// already there because it could be a "bad" file like an invalid
417+
// device or hung tty that might cause a DoS, so we use mknodat.
418+
// destBase does not contain any "/" components, and mknodat does
419+
// not follow trailing symlinks, so we can safely just call mknodat
420+
// here.
421+
if err := unix.Mknodat(int(destDirFd.Fd()), destBase, unix.S_IFREG|0o644, 0); err != nil {
422+
// If we get EEXIST, there was already an inode there and
423+
// we can consider that a success.
424+
if !errors.Is(err, unix.EEXIST) {
425+
err = &os.PathError{Op: "mknod regular file", Path: dest, Err: err}
426+
return "", fmt.Errorf("create target of file bind-mount: %w", err)
427+
}
416428
}
417-
_ = f.Close()
418429
// Nothing left to do.
419430
return dest, nil
420431
}
@@ -433,7 +444,7 @@ func createMountpoint(rootfs string, m *configs.Mount, mountFd *int, source stri
433444
}
434445
}
435446

436-
if err := os.MkdirAll(dest, 0o755); err != nil {
447+
if err := utils.MkdirAllInRoot(rootfs, dest, 0o755); err != nil {
437448
return "", err
438449
}
439450
return dest, nil
@@ -463,7 +474,7 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) error {
463474
} else if !fi.IsDir() {
464475
return fmt.Errorf("filesystem %q must be mounted on ordinary directory", m.Device)
465476
}
466-
if err := os.MkdirAll(dest, 0o755); err != nil {
477+
if err := utils.MkdirAllInRoot(rootfs, dest, 0o755); err != nil {
467478
return err
468479
}
469480
// Selinux kernels do not support labeling of /proc or /sys.
@@ -751,7 +762,7 @@ func createDeviceNode(rootfs string, node *devices.Device, bind bool) error {
751762
if dest == rootfs {
752763
return fmt.Errorf("%w: mknod over rootfs", errRootfsToFile)
753764
}
754-
if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil {
765+
if err := utils.MkdirAllInRoot(rootfs, filepath.Dir(dest), 0o755); err != nil {
755766
return err
756767
}
757768
if bind {

libcontainer/system/linux.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ package system
66
import (
77
"os"
88
"os/exec"
9+
"runtime"
10+
"strings"
911
"unsafe"
1012

1113
"golang.org/x/sys/unix"
@@ -102,3 +104,42 @@ func GetSubreaper() (int, error) {
102104

103105
return int(i), nil
104106
}
107+
108+
func prepareAt(dir *os.File, path string) (int, string) {
109+
if dir == nil {
110+
return unix.AT_FDCWD, path
111+
}
112+
113+
// Rather than just filepath.Join-ing path here, do it manually so the
114+
// error and handle correctly indicate cases like path=".." as being
115+
// relative to the correct directory. The handle.Name() might end up being
116+
// wrong but because this is (currently) only used in MkdirAllInRoot, that
117+
// isn't a problem.
118+
dirName := dir.Name()
119+
if !strings.HasSuffix(dirName, "/") {
120+
dirName += "/"
121+
}
122+
fullPath := dirName + path
123+
124+
return int(dir.Fd()), fullPath
125+
}
126+
127+
func Openat(dir *os.File, path string, flags int, mode uint32) (*os.File, error) {
128+
dirFd, fullPath := prepareAt(dir, path)
129+
fd, err := unix.Openat(dirFd, path, flags, mode)
130+
if err != nil {
131+
return nil, &os.PathError{Op: "openat", Path: fullPath, Err: err}
132+
}
133+
runtime.KeepAlive(dir)
134+
return os.NewFile(uintptr(fd), fullPath), nil
135+
}
136+
137+
func Mkdirat(dir *os.File, path string, mode uint32) error {
138+
dirFd, fullPath := prepareAt(dir, path)
139+
err := unix.Mkdirat(dirFd, path, mode)
140+
if err != nil {
141+
err = &os.PathError{Op: "mkdirat", Path: fullPath, Err: err}
142+
}
143+
runtime.KeepAlive(dir)
144+
return err
145+
}

libcontainer/utils/utils_unix.go

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,17 @@
44
package utils
55

66
import (
7+
"errors"
78
"fmt"
89
"os"
10+
"path/filepath"
911
"strconv"
1012
"strings"
1113
_ "unsafe" // for go:linkname
1214

15+
"github.com/opencontainers/runc/libcontainer/system"
16+
17+
securejoin "github.com/cyphar/filepath-securejoin"
1318
"golang.org/x/sys/unix"
1419
)
1520

@@ -130,3 +135,112 @@ func IsLexicallyInRoot(root, path string) bool {
130135
}
131136
return strings.HasPrefix(path, root)
132137
}
138+
139+
// MkdirAllInRootOpen attempts to make
140+
//
141+
// path, _ := securejoin.SecureJoin(root, unsafePath)
142+
// os.MkdirAll(path, mode)
143+
// os.Open(path)
144+
//
145+
// safer against attacks where components in the path are changed between
146+
// SecureJoin returning and MkdirAll (or Open) being called. In particular, we
147+
// try to detect any symlink components in the path while we are doing the
148+
// MkdirAll.
149+
//
150+
// NOTE: Unlike os.MkdirAll, mode is not Go's os.FileMode, it is the unix mode
151+
// (the suid/sgid/sticky bits are not the same as for os.FileMode).
152+
//
153+
// NOTE: If unsafePath is a subpath of root, we assume that you have already
154+
// called SecureJoin and so we use the provided path verbatim without resolving
155+
// any symlinks (this is done in a way that avoids symlink-exchange races).
156+
// This means that the path also must not contain ".." elements, otherwise an
157+
// error will occur.
158+
//
159+
// This is a somewhat less safe alternative to
160+
// <https://github.com/cyphar/filepath-securejoin/pull/13>, but it should
161+
// detect attempts to trick us into creating directories outside of the root.
162+
// We should migrate to securejoin.MkdirAll once it is merged.
163+
func MkdirAllInRootOpen(root, unsafePath string, mode uint32) (_ *os.File, Err error) {
164+
// If the path is already "within" the root, use it verbatim.
165+
fullPath := unsafePath
166+
if !IsLexicallyInRoot(root, unsafePath) {
167+
var err error
168+
fullPath, err = securejoin.SecureJoin(root, unsafePath)
169+
if err != nil {
170+
return nil, err
171+
}
172+
}
173+
subPath, err := filepath.Rel(root, fullPath)
174+
if err != nil {
175+
return nil, err
176+
}
177+
178+
// Check for any silly mode bits.
179+
if mode&^0o7777 != 0 {
180+
return nil, fmt.Errorf("tried to include non-mode bits in MkdirAll mode: 0o%.3o", mode)
181+
}
182+
183+
currentDir, err := os.OpenFile(root, unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
184+
if err != nil {
185+
return nil, fmt.Errorf("open root handle: %w", err)
186+
}
187+
defer func() {
188+
if Err != nil {
189+
currentDir.Close()
190+
}
191+
}()
192+
193+
for _, part := range strings.Split(subPath, string(filepath.Separator)) {
194+
switch part {
195+
case "", ".":
196+
// Skip over no-op components.
197+
continue
198+
case "..":
199+
return nil, fmt.Errorf("possible breakout detected: found %q component in SecureJoin subpath %s", part, subPath)
200+
}
201+
202+
nextDir, err := system.Openat(currentDir, part, unix.O_DIRECTORY|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
203+
switch {
204+
case err == nil:
205+
// Update the currentDir.
206+
_ = currentDir.Close()
207+
currentDir = nextDir
208+
209+
case errors.Is(err, unix.ENOTDIR):
210+
// This might be a symlink or some other random file. Either way,
211+
// error out.
212+
return nil, fmt.Errorf("cannot mkdir in %s/%s: %w", currentDir.Name(), part, unix.ENOTDIR)
213+
214+
case errors.Is(err, os.ErrNotExist):
215+
// Luckily, mkdirat will not follow trailing symlinks, so this is
216+
// safe to do as-is.
217+
if err := system.Mkdirat(currentDir, part, mode); err != nil {
218+
return nil, err
219+
}
220+
// Open the new directory. There is a race here where an attacker
221+
// could swap the directory with a different directory, but
222+
// MkdirAll's fuzzy semantics mean we don't care about that.
223+
nextDir, err := system.Openat(currentDir, part, unix.O_DIRECTORY|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
224+
if err != nil {
225+
return nil, fmt.Errorf("open newly created directory: %w", err)
226+
}
227+
// Update the currentDir.
228+
_ = currentDir.Close()
229+
currentDir = nextDir
230+
231+
default:
232+
return nil, err
233+
}
234+
}
235+
return currentDir, nil
236+
}
237+
238+
// MkdirAllInRoot is a wrapper around MkdirAllInRootOpen which closes the
239+
// returned handle, for callers that don't need to use it.
240+
func MkdirAllInRoot(root, unsafePath string, mode uint32) error {
241+
f, err := MkdirAllInRootOpen(root, unsafePath, mode)
242+
if err == nil {
243+
_ = f.Close()
244+
}
245+
return err
246+
}

0 commit comments

Comments
 (0)