Skip to content

Commit 9c44407

Browse files
albanrata
andcommitted
Open bind mount sources from the host userns
The source of the bind mount might not be accessible in a different user namespace because a component of the source path might not be traversed under the users and groups mapped inside the user namespace. This caused errors such as the following: # time="2020-06-22T13:48:26Z" level=error msg="container_linux.go:367: starting container process caused: process_linux.go:459: container init caused: rootfs_linux.go:58: mounting \"/tmp/busyboxtest/source-inaccessible/dir\" to rootfs at \"/tmp/inaccessible\" caused: stat /tmp/busyboxtest/source-inaccessible/dir: permission denied" To solve this problem, this patch performs the following: 1. in nsexec.c, it opens the source path in the host userns (so we have the right permissions to open it) but in the container mntns (so the kernel cross mntns mount check let us mount it later: https://github.com/torvalds/linux/blob/v5.8/fs/namespace.c#L2312). 2. in nsexec.c, it passes the file descriptors of the source to the child process with SCM_RIGHTS. 3. In runc-init in Golang, it finishes the mounts while inside the userns even without access to the some components of the source paths. Passing the fds with SCM_RIGHTS is necessary because once the child process is in the container mntns, it is already in the container userns so it cannot temporarily join the host mntns. This patch uses the existing mechanism with _LIBCONTAINER_* environment variables to pass the file descriptors from runc to runc init. This patch uses the existing mechanism with the Netlink-style bootstrap to pass information about the list of source mounts to nsexec.c. Rootless containers don't use this bind mount sources fdpassing mechanism because we can't setns() to the target mntns in a rootless container (we don't have the privileges when we are in the host userns). This patch takes care of using O_CLOEXEC on mount fds, and close them early. Fixes: #2484. Signed-off-by: Alban Crequy <[email protected]> Signed-off-by: Rodrigo Campos <[email protected]> Co-authored-by: Rodrigo Campos <[email protected]>
1 parent 2357eab commit 9c44407

File tree

8 files changed

+412
-25
lines changed

8 files changed

+412
-25
lines changed

libcontainer/configs/mount.go

+6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package configs
22

3+
import "golang.org/x/sys/unix"
4+
35
const (
46
// EXT_COPYUP is a directive to copy up the contents of a directory when
57
// a tmpfs is mounted over it.
@@ -37,3 +39,7 @@ type Mount struct {
3739
// Optional Command to be run after Source is mounted.
3840
PostmountCmds []Command `json:"postmount_cmds"`
3941
}
42+
43+
func (m *Mount) IsBind() bool {
44+
return m.Flags&unix.MS_BIND != 0
45+
}

libcontainer/container_linux.go

+79-4
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,33 @@ func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, chi
521521
return cmd
522522
}
523523

524+
// shouldSendMountSources says whether the child process must setup bind mounts with
525+
// the source pre-opened (O_PATH) in the host user namespace.
526+
// See https://github.com/opencontainers/runc/issues/2484
527+
func (c *linuxContainer) shouldSendMountSources() bool {
528+
// Passing the mount sources via SCM_RIGHTS is only necessary when
529+
// both userns and mntns are active.
530+
if !c.config.Namespaces.Contains(configs.NEWUSER) ||
531+
!c.config.Namespaces.Contains(configs.NEWNS) {
532+
return false
533+
}
534+
535+
// nsexec.c send_mountsources() requires setns(mntns) capabilities
536+
// CAP_SYS_CHROOT and CAP_SYS_ADMIN.
537+
if c.config.RootlessEUID {
538+
return false
539+
}
540+
541+
// We need to send sources if there are bind-mounts.
542+
for _, m := range c.config.Mounts {
543+
if m.IsBind() {
544+
return true
545+
}
546+
}
547+
548+
return false
549+
}
550+
524551
func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*initProcess, error) {
525552
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
526553
nsMaps := make(map[configs.NamespaceType]string)
@@ -530,10 +557,40 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPa
530557
}
531558
}
532559
_, sharePidns := nsMaps[configs.NEWPID]
533-
data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps)
560+
data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps, initStandard)
534561
if err != nil {
535562
return nil, err
536563
}
564+
565+
if c.shouldSendMountSources() {
566+
// Elements on this slice will be paired with mounts (see StartInitialization() and
567+
// prepareRootfs()). This slice MUST have the same size as c.config.Mounts.
568+
mountFds := make([]int, len(c.config.Mounts))
569+
for i, m := range c.config.Mounts {
570+
if !m.IsBind() {
571+
// Non bind-mounts do not use an fd.
572+
mountFds[i] = -1
573+
continue
574+
}
575+
576+
// The fd passed here will not be used: nsexec.c will overwrite it with dup3(). We just need
577+
// to allocate a fd so that we know the number to pass in the environment variable. The fd
578+
// must not be closed before cmd.Start(), so we reuse messageSockPair.child because the
579+
// lifecycle of that fd is already taken care of.
580+
cmd.ExtraFiles = append(cmd.ExtraFiles, messageSockPair.child)
581+
mountFds[i] = stdioFdCount + len(cmd.ExtraFiles) - 1
582+
}
583+
584+
mountFdsJson, err := json.Marshal(mountFds)
585+
if err != nil {
586+
return nil, fmt.Errorf("Error creating _LIBCONTAINER_MOUNT_FDS: %w", err)
587+
}
588+
589+
cmd.Env = append(cmd.Env,
590+
"_LIBCONTAINER_MOUNT_FDS="+string(mountFdsJson),
591+
)
592+
}
593+
537594
init := &initProcess{
538595
cmd: cmd,
539596
messageSockPair: messageSockPair,
@@ -558,7 +615,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
558615
}
559616
// for setns process, we don't have to set cloneflags as the process namespaces
560617
// will only be set via setns syscall
561-
data, err := c.bootstrapData(0, state.NamespacePaths)
618+
data, err := c.bootstrapData(0, state.NamespacePaths, initSetns)
562619
if err != nil {
563620
return nil, err
564621
}
@@ -1213,7 +1270,9 @@ func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
12131270
case "bind":
12141271
// The prepareBindMount() function checks if source
12151272
// exists. So it cannot be used for other filesystem types.
1216-
if err := prepareBindMount(m, c.config.Rootfs); err != nil {
1273+
// TODO: pass something else than nil? Not sure if criu is
1274+
// impacted by issue #2484
1275+
if err := prepareBindMount(m, c.config.Rootfs, nil); err != nil {
12171276
return err
12181277
}
12191278
default:
@@ -2050,7 +2109,7 @@ func encodeIDMapping(idMap []configs.IDMap) ([]byte, error) {
20502109
// such as one that uses nsenter package to bootstrap the container's
20512110
// init process correctly, i.e. with correct namespaces, uid/gid
20522111
// mapping etc.
2053-
func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string) (io.Reader, error) {
2112+
func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string, it initType) (io.Reader, error) {
20542113
// create the netlink message
20552114
r := nl.NewNetlinkRequest(int(InitMsg), 0)
20562115

@@ -2132,6 +2191,22 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na
21322191
Value: c.config.RootlessEUID,
21332192
})
21342193

2194+
// Bind mount source to open.
2195+
if it == initStandard && c.shouldSendMountSources() {
2196+
var mounts []byte
2197+
for _, m := range c.config.Mounts {
2198+
if m.IsBind() {
2199+
mounts = append(mounts, []byte(m.Source)...)
2200+
}
2201+
mounts = append(mounts, byte(0))
2202+
}
2203+
2204+
r.AddData(&Bytemsg{
2205+
Type: MountSourcesAttr,
2206+
Value: mounts,
2207+
})
2208+
}
2209+
21352210
return bytes.NewReader(r.Serialize()), nil
21362211
}
21372212

libcontainer/factory_linux.go

+22-1
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,12 @@ func (l *LinuxFactory) StartInitialization() (err error) {
295295
return fmt.Errorf("unable to convert _LIBCONTAINER_LOGPIPE: %w", err)
296296
}
297297

298+
// Get mount files (O_PATH).
299+
mountFds, err := parseMountFds()
300+
if err != nil {
301+
return err
302+
}
303+
298304
// clear the current process's environment to clean any libcontainer
299305
// specific env vars.
300306
os.Clearenv()
@@ -305,7 +311,7 @@ func (l *LinuxFactory) StartInitialization() (err error) {
305311
}
306312
}()
307313

308-
i, err := newContainerInit(it, pipe, consoleSocket, fifofd, logPipeFd)
314+
i, err := newContainerInit(it, pipe, consoleSocket, fifofd, logPipeFd, mountFds)
309315
if err != nil {
310316
return err
311317
}
@@ -359,3 +365,18 @@ func NewgidmapPath(newgidmapPath string) func(*LinuxFactory) error {
359365
return nil
360366
}
361367
}
368+
369+
func parseMountFds() ([]int, error) {
370+
fdsJson := os.Getenv("_LIBCONTAINER_MOUNT_FDS")
371+
if fdsJson == "" {
372+
// Always return the nil slice if no fd is present.
373+
return nil, nil
374+
}
375+
376+
var mountFds []int
377+
if err := json.Unmarshal([]byte(fdsJson), &mountFds); err != nil {
378+
return nil, fmt.Errorf("Error unmarshalling _LIBCONTAINER_MOUNT_FDS: %w", err)
379+
}
380+
381+
return mountFds, nil
382+
}

libcontainer/init_linux.go

+7-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ type initer interface {
7676
Init() error
7777
}
7878

79-
func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, logFd int) (initer, error) {
79+
func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, logFd int, mountFds []int) (initer, error) {
8080
var config *initConfig
8181
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
8282
return nil, err
@@ -86,6 +86,11 @@ func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd,
8686
}
8787
switch t {
8888
case initSetns:
89+
// mountFds must be nil in this case. We don't mount while doing runc exec.
90+
if mountFds != nil {
91+
return nil, errors.New("mountFds must be nil. Can't mount while doing runc exec.")
92+
}
93+
8994
return &linuxSetnsInit{
9095
pipe: pipe,
9196
consoleSocket: consoleSocket,
@@ -100,6 +105,7 @@ func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd,
100105
config: config,
101106
fifoFd: fifoFd,
102107
logFd: logFd,
108+
mountFds: mountFds,
103109
}, nil
104110
}
105111
return nil, fmt.Errorf("unknown init type %q", t)

libcontainer/message_linux.go

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ const (
1818
RootlessEUIDAttr uint16 = 27287
1919
UidmapPathAttr uint16 = 27288
2020
GidmapPathAttr uint16 = 27289
21+
MountSourcesAttr uint16 = 27290
2122
)
2223

2324
type Int32msg struct {

0 commit comments

Comments
 (0)