Skip to content

Commit 0b55757

Browse files
committed
Support Rootless Docker
Requirements: - Install rootless Docker 20.10 or later, see https://rootlesscontaine.rs/getting-started/docker/ - Enable cgroup v2 delegation, see https://rootlesscontaine.rs/getting-started/common/cgroup2/ Usage: `minikube start --driver=docker --container-runtime=containerd`. The `--container-runtime` flag needs to be set to "containerd". CRI-O can be also supported later. Closes issue 10836 ("add support for rootless Docker"). Support for rootless Podman (issue 8719) is not covered in this commit. --- Code reading guide: - `deploy/kicbase/Dockerfile`: updated to install fuse-overlayfs and containerd-fuse-overlayfs, which is used instead of `overlayfs` snapshotter - `deploy/kicbase/entrypoint`: updated to verify cgroup v2 delegation. Mostly from https://github.com/kubernetes-sigs/kind/blob/8a83ee46b28a80ccd47a85e24294b3e149361947/images/base/files/usr/local/bin/entrypoint - `cmd/minikube/cmd/start_flags.go`: updated to set `KubeletInUserNamespace` feature gate when rootless - `pkg/drivers/kic/oci`: updated to use port forwarding, because rootless container IPs are not reachable from the host - `pkg/minikube/cruntime`: updated to generate `/etc/containerd/config.toml` with rootless support. Signed-off-by: Akihiro Suda <[email protected]>
1 parent 0bb1f67 commit 0b55757

File tree

17 files changed

+204
-27
lines changed

17 files changed

+204
-27
lines changed

Diff for: cmd/minikube/cmd/start_flags.go

+28
Original file line numberDiff line numberDiff line change
@@ -492,9 +492,37 @@ func generateNewConfigFromFlags(cmd *cobra.Command, k8sVersion string, drvName s
492492
}
493493
}
494494

495+
if driver.IsKIC(drvName) {
496+
si, err := oci.CachedDaemonInfo(drvName)
497+
if err != nil {
498+
exit.Message(reason.Usage, "Ensure your {{.driver_name}} is running and is healthy.", out.V{"driver_name": driver.FullName(drvName)})
499+
}
500+
if si.Rootless {
501+
if cc.KubernetesConfig.ContainerRuntime != "containerd" {
502+
exit.Message(reason.Usage, "Container runtime must be set to \"containerd\" for rootless")
503+
// TODO: support cri-o (https://kubernetes.io/docs/tasks/administer-cluster/kubelet-in-userns/#configuring-cri)
504+
}
505+
// KubeletInUserNamespace feature gate is essential for rootless driver.
506+
// See https://kubernetes.io/docs/tasks/administer-cluster/kubelet-in-userns/
507+
cc.KubernetesConfig.FeatureGates = addFeatureGate(cc.KubernetesConfig.FeatureGates, "KubeletInUserNamespace=true")
508+
}
509+
}
510+
495511
return cc
496512
}
497513

514+
func addFeatureGate(featureGates, s string) string {
515+
split := strings.Split(featureGates, ",")
516+
m := make(map[string]struct{}, len(split))
517+
for _, v := range split {
518+
m[v] = struct{}{}
519+
}
520+
if _, ok := m[s]; !ok {
521+
split = append(split, s)
522+
}
523+
return strings.Join(split, ",")
524+
}
525+
498526
func checkNumaCount(k8sVersion string) {
499527
if viper.GetInt(kvmNUMACount) < 1 || viper.GetInt(kvmNUMACount) > 8 {
500528
exit.Message(reason.Usage, "--kvm-numa-count range is 1-8")

Diff for: deploy/kicbase/Dockerfile

+18-1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ RUN cd ./cmd/auto-pause/ && go build
3030
FROM ubuntu:focal-20210401
3131

3232
ARG BUILDKIT_VERSION="v0.9.0"
33+
ARG FUSE_OVERLAYFS_VERSION="v1.7.1"
34+
ARG CONTAINERD_FUSE_OVERLAYFS_VERSION="1.0.3"
3335

3436
# copy in static files (configs, scripts)
3537
COPY deploy/kicbase/10-network-security.conf /etc/sysctl.d/10-network-security.conf
@@ -113,14 +115,29 @@ RUN clean-install \
113115
openssh-server \
114116
dnsutils \
115117
# libglib2.0-0 is required for conmon, which is required for podman
116-
libglib2.0-0
118+
libglib2.0-0 \
119+
# fuse3 is required for fuse-overlayfs
120+
fuse3
117121

118122
# install docker
119123
RUN sh -c "echo 'deb https://download.docker.com/linux/ubuntu focal stable' > /etc/apt/sources.list.d/docker.list" && \
120124
curl -L https://download.docker.com/linux/ubuntu/gpg -o docker.key && \
121125
apt-key add - < docker.key && \
122126
clean-install docker-ce docker-ce-cli containerd.io
123127

128+
# install fuse-overlayfs (used by rootless; apt-get version is old)
129+
RUN curl -sSL --retry 5 --output /usr/local/bin/fuse-overlayfs https://github.com/containers/fuse-overlayfs/releases/download/${FUSE_OVERLAYFS_VERSION}/fuse-overlayfs-$(uname -m) \
130+
&& chmod +x /usr/local/bin/fuse-overlayfs
131+
132+
# install containerd-fuse-overlayfs (used by rootless)
133+
RUN export ARCH=$(dpkg --print-architecture | sed 's/ppc64el/ppc64le/' | sed 's/armhf/arm-v7/') \
134+
&& echo "Installing containerd-fuse-overlayfs..." \
135+
&& export CONTAINERD_FUSE_OVERLAYFS_BASE_URL="https://github.com/containerd/fuse-overlayfs-snapshotter/releases/download/v${CONTAINERD_FUSE_OVERLAYFS_VERSION}" \
136+
&& curl -sSL --retry 5 --output /tmp/containerd-fuse-overlayfs.tgz "${CONTAINERD_FUSE_OVERLAYFS_BASE_URL}/containerd-fuse-overlayfs-${CONTAINERD_FUSE_OVERLAYFS_VERSION}-linux-${ARCH}.tar.gz" \
137+
&& tar -C /usr/local/bin -xzvf /tmp/containerd-fuse-overlayfs.tgz \
138+
&& rm -rf /tmp/containerd-fuse-overlayfs.tgz
139+
COPY deploy/kicbase/containerd-fuse-overlayfs.service /etc/systemd/system/containerd-fuse-overlayfs.service
140+
124141
# install buildkit
125142
RUN export ARCH=$(dpkg --print-architecture | sed 's/ppc64el/ppc64le/' | sed 's/armhf/arm-v7/') \
126143
&& echo "Installing buildkit ..." \

Diff for: deploy/kicbase/containerd-fuse-overlayfs.service

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# From https://github.com/kubernetes-sigs/kind/blob/0d3780371091b2dc9ff6eea1b6054f14ff5d970a/images/base/files/etc/systemd/system/containerd-fuse-overlayfs.service
2+
[Unit]
3+
Description=containerd fuse-overlayfs snapshotter
4+
PartOf=containerd.service
5+
6+
[Service]
7+
ExecStart=/usr/local/bin/containerd-fuse-overlayfs-grpc /run/containerd-fuse-overlayfs.sock /var/lib/containerd-fuse-overlayfs
8+
Type=notify
9+
Restart=always
10+
RestartSec=1
11+
12+
[Install]
13+
WantedBy=multi-user.target

Diff for: deploy/kicbase/entrypoint

+46-6
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,39 @@ set -o nounset
1919
set -o pipefail
2020
set -x
2121

22+
# If /proc/self/uid_map 4294967295 mappings, we are in the initial user namespace, i.e. the host.
23+
# Otherwise we are in a non-initial user namespace.
24+
# https://github.com/opencontainers/runc/blob/v1.0.0-rc92/libcontainer/system/linux.go#L109-L118
25+
userns=""
26+
if grep -Eqv "0[[:space:]]+0[[:space:]]+4294967295" /proc/self/uid_map; then
27+
userns="1"
28+
echo 'INFO: running in a user namespace (experimental)'
29+
fi
30+
31+
validate_userns() {
32+
if [[ -z "${userns}" ]]; then
33+
return
34+
fi
35+
36+
local nofile_hard
37+
nofile_hard="$(ulimit -Hn)"
38+
local nofile_hard_expected="64000"
39+
if [[ "${nofile_hard}" -lt "${nofile_hard_expected}" ]]; then
40+
echo "WARN: UserNS: expected RLIMIT_NOFILE to be at least ${nofile_hard_expected}, got ${nofile_hard}" >&2
41+
fi
42+
43+
if [[ ! -f "/sys/fs/cgroup/cgroup.controllers" ]]; then
44+
echo "ERROR: UserNS: cgroup v2 needs to be enabled, see https://rootlesscontaine.rs/getting-started/common/cgroup2/" >&2
45+
exit 1
46+
fi
47+
for f in cpu memory pids; do
48+
if ! grep -qw $f /sys/fs/cgroup/cgroup.controllers; then
49+
echo "ERROR: UserNS: $f controller needs to be delegated, see https://rootlesscontaine.rs/getting-started/common/cgroup2/" >&2
50+
exit 1
51+
fi
52+
done
53+
}
54+
2255
configure_containerd() {
2356
# we need to switch to the 'native' snapshotter on zfs
2457
if [[ "$(stat -f -c %T /kind)" == 'zfs' ]]; then
@@ -73,12 +106,16 @@ fix_mount() {
73106
sync
74107
fi
75108

76-
echo 'INFO: remounting /sys read-only'
77-
# systemd-in-a-container should have read only /sys
78-
# https://systemd.io/CONTAINER_INTERFACE/
79-
# however, we need other things from `docker run --privileged` ...
80-
# and this flag also happens to make /sys rw, amongst other things
81-
mount -o remount,ro /sys
109+
if [[ -z "${userns}" ]]; then
110+
echo 'INFO: remounting /sys read-only'
111+
# systemd-in-a-container should have read only /sys
112+
# https://systemd.io/CONTAINER_INTERFACE/
113+
# however, we need other things from `docker run --privileged` ...
114+
# and this flag also happens to make /sys rw, amongst other things
115+
#
116+
# This step is skipped when running inside UserNS, because it fails with EACCES.
117+
mount -o remount,ro /sys
118+
fi
82119

83120
echo 'INFO: making mounts shared' >&2
84121
# for mount propagation
@@ -334,6 +371,9 @@ enable_network_magic(){
334371
fi
335372
}
336373

374+
# validate state
375+
validate_userns
376+
337377
# run pre-init fixups
338378
# NOTE: it's important that we do configure* first in this order to avoid races
339379
configure_containerd

Diff for: hack/preload-images/generate.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ func generateTarball(kubernetesVersion, containerRuntime, tarballFilename string
9393
if err != nil {
9494
return errors.Wrap(err, "failed create new runtime")
9595
}
96-
if err := cr.Enable(true, false); err != nil {
96+
if err := cr.Enable(true, false, false); err != nil {
9797
return errors.Wrap(err, "enable container runtime")
9898
}
9999

Diff for: pkg/drivers/kic/oci/network.go

+27
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,33 @@ import (
3434
// RoutableHostIPFromInside returns the ip/dns of the host that container lives on
3535
// is routable from inside the container
3636
func RoutableHostIPFromInside(ociBin string, clusterName string, containerName string) (net.IP, error) {
37+
si, err := CachedDaemonInfo(ociBin)
38+
if err != nil {
39+
return nil, err
40+
}
41+
if si.Rootless {
42+
if IsExternalDaemonHost(ociBin) {
43+
return nil, fmt.Errorf("function RoutableHostIPFromInside is not implemented for external rootless daemons")
44+
// TODO: parse DaemonHost()
45+
}
46+
addrs, err := net.InterfaceAddrs()
47+
if err != nil {
48+
return nil, err
49+
}
50+
for _, addr := range addrs {
51+
var ip net.IP
52+
switch v := addr.(type) {
53+
case *net.IPAddr:
54+
ip = v.IP
55+
case *net.IPNet:
56+
ip = v.IP
57+
}
58+
if ip != nil && !ip.IsLoopback() {
59+
return ip, nil
60+
}
61+
}
62+
return nil, fmt.Errorf("could not detect host IP, tried %v", addrs)
63+
}
3764
if ociBin == Docker {
3865
if runtime.GOOS == "linux" {
3966
info, err := containerNetworkInspect(ociBin, clusterName)

Diff for: pkg/drivers/kic/oci/oci.go

+3
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,9 @@ func CreateContainerNode(p CreateParams) error {
162162
// including some ones docker would otherwise do by default.
163163
// for now this is what we want. in the future we may revisit this.
164164
"--privileged",
165+
// enable /dev/fuse explicitly for fuse-overlayfs
166+
// (Rootless Docker does not automatically mount /dev/fuse with --privileged)
167+
"--device", "/dev/fuse",
165168
"--security-opt", "seccomp=unconfined", // ignore seccomp
166169
"--tmpfs", "/tmp", // various things depend on working /tmp
167170
"--tmpfs", "/run", // systemd wants a writable /run

Diff for: pkg/minikube/cruntime/containerd.go

+25-4
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,12 @@ oom_score = 0
6969
[cgroup]
7070
path = ""
7171
72+
[proxy_plugins]
73+
# fuse-overlayfs is used for rootless
74+
[proxy_plugins."fuse-overlayfs"]
75+
type = "snapshot"
76+
address = "/run/containerd-fuse-overlayfs.sock"
77+
7278
[plugins]
7379
[plugins.cgroups]
7480
no_prometheus = false
@@ -80,6 +86,7 @@ oom_score = 0
8086
stats_collect_period = 10
8187
enable_tls_streaming = false
8288
max_container_log_line_size = 16384
89+
restrict_oom_score_adj = {{ .RestrictOOMScoreAdj }}
8390
8491
[plugins."io.containerd.grpc.v1.cri"]
8592
[plugins."io.containerd.grpc.v1.cri".containerd]
@@ -90,7 +97,7 @@ oom_score = 0
9097
SystemdCgroup = {{ .SystemdCgroup }}
9198
9299
[plugins.cri.containerd]
93-
snapshotter = "overlayfs"
100+
snapshotter = "{{ .Snapshotter }}"
94101
[plugins.cri.containerd.default_runtime]
95102
runtime_type = "io.containerd.runc.v2"
96103
[plugins.cri.containerd.untrusted_workload_runtime]
@@ -193,23 +200,31 @@ func (r *Containerd) Available() error {
193200
}
194201

195202
// generateContainerdConfig sets up /etc/containerd/config.toml
196-
func generateContainerdConfig(cr CommandRunner, imageRepository string, kv semver.Version, forceSystemd bool, insecureRegistry []string) error {
203+
func generateContainerdConfig(cr CommandRunner, imageRepository string, kv semver.Version, forceSystemd bool, insecureRegistry []string, inUserNamespace bool) error {
197204
cPath := containerdConfigFile
198205
t, err := template.New("containerd.config.toml").Parse(containerdConfigTemplate)
199206
if err != nil {
200207
return err
201208
}
202209
pauseImage := images.Pause(kv, imageRepository)
210+
snapshotter := "overlayfs"
211+
if inUserNamespace {
212+
snapshotter = "fuse-overlayfs"
213+
}
203214
opts := struct {
204215
PodInfraContainerImage string
205216
SystemdCgroup bool
206217
InsecureRegistry []string
207218
CNIConfDir string
219+
RestrictOOMScoreAdj bool
220+
Snapshotter string
208221
}{
209222
PodInfraContainerImage: pauseImage,
210223
SystemdCgroup: forceSystemd,
211224
InsecureRegistry: insecureRegistry,
212225
CNIConfDir: cni.ConfDir,
226+
RestrictOOMScoreAdj: inUserNamespace,
227+
Snapshotter: snapshotter,
213228
}
214229
var b bytes.Buffer
215230
if err := t.Execute(&b, opts); err != nil {
@@ -223,7 +238,7 @@ func generateContainerdConfig(cr CommandRunner, imageRepository string, kv semve
223238
}
224239

225240
// Enable idempotently enables containerd on a host
226-
func (r *Containerd) Enable(disOthers, forceSystemd bool) error {
241+
func (r *Containerd) Enable(disOthers, forceSystemd, inUserNamespace bool) error {
227242
if disOthers {
228243
if err := disableOthers(r, r.Runner); err != nil {
229244
klog.Warningf("disableOthers: %v", err)
@@ -232,13 +247,19 @@ func (r *Containerd) Enable(disOthers, forceSystemd bool) error {
232247
if err := populateCRIConfig(r.Runner, r.SocketPath()); err != nil {
233248
return err
234249
}
235-
if err := generateContainerdConfig(r.Runner, r.ImageRepository, r.KubernetesVersion, forceSystemd, r.InsecureRegistry); err != nil {
250+
if err := generateContainerdConfig(r.Runner, r.ImageRepository, r.KubernetesVersion, forceSystemd, r.InsecureRegistry, inUserNamespace); err != nil {
236251
return err
237252
}
238253
if err := enableIPForwarding(r.Runner); err != nil {
239254
return err
240255
}
241256

257+
if inUserNamespace {
258+
if err := r.Init.EnableNow("containerd-fuse-overlayfs"); err != nil {
259+
return err
260+
}
261+
}
262+
242263
// Otherwise, containerd will fail API requests with 'Unimplemented'
243264
return r.Init.Restart("containerd")
244265
}

Diff for: pkg/minikube/cruntime/crio.go

+4-1
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,10 @@ func enableIPForwarding(cr CommandRunner) error {
139139
}
140140

141141
// Enable idempotently enables CRIO on a host
142-
func (r *CRIO) Enable(disOthers, _ bool) error {
142+
func (r *CRIO) Enable(disOthers, _, inUserNamespace bool) error {
143+
if inUserNamespace {
144+
return errors.New("inUserNamespace must not be true for cri-o (yet)")
145+
}
143146
if disOthers {
144147
if err := disableOthers(r, r.Runner); err != nil {
145148
klog.Warningf("disableOthers: %v", err)

Diff for: pkg/minikube/cruntime/cruntime.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ type Manager interface {
7878
// Version retrieves the current version of this runtime
7979
Version() (string, error)
8080
// Enable idempotently enables this runtime on a host
81-
Enable(bool, bool) error
81+
Enable(bool, bool, bool) error
8282
// Disable idempotently disables this runtime on a host
8383
Disable() error
8484
// Active returns whether or not a runtime is active on a host

Diff for: pkg/minikube/cruntime/cruntime_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -668,7 +668,7 @@ func TestEnable(t *testing.T) {
668668
if err != nil {
669669
t.Fatalf("New(%s): %v", tc.runtime, err)
670670
}
671-
err = cr.Enable(true, false)
671+
err = cr.Enable(true, false, false)
672672
if err != nil {
673673
t.Errorf("%s disable unexpected error: %v", tc.runtime, err)
674674
}

Diff for: pkg/minikube/cruntime/docker.go

+4-1
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,10 @@ func (r *Docker) Active() bool {
107107
}
108108

109109
// Enable idempotently enables Docker on a host
110-
func (r *Docker) Enable(disOthers, forceSystemd bool) error {
110+
func (r *Docker) Enable(disOthers, forceSystemd, inUserNamespace bool) error {
111+
if inUserNamespace {
112+
return errors.New("inUserNamespace must not be true for docker")
113+
}
111114
containerdWasActive := r.Init.Active("containerd")
112115

113116
if disOthers {

Diff for: pkg/minikube/driver/driver.go

+9-1
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,15 @@ func NeedsPortForward(name string) bool {
185185
return true
186186
}
187187
// Docker for Desktop
188-
return runtime.GOOS == "darwin" || runtime.GOOS == "windows" || detect.IsMicrosoftWSL()
188+
if runtime.GOOS == "darwin" || runtime.GOOS == "windows" || detect.IsMicrosoftWSL() {
189+
return true
190+
}
191+
192+
si, err := oci.CachedDaemonInfo(name)
193+
if err != nil {
194+
panic(err)
195+
}
196+
return si.Rootless
189197
}
190198

191199
// HasResourceLimits returns true if driver can set resource limits such as memory size or CPU count.

Diff for: pkg/minikube/node/start.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,8 @@ func configureRuntimes(runner cruntime.CommandRunner, cc config.ClusterConfig, k
349349
}
350350
}
351351

352-
err = cr.Enable(disableOthers, forceSystemd())
352+
inUserNamespace := strings.Contains(cc.KubernetesConfig.FeatureGates, "KubeletInUserNamespace=true")
353+
err = cr.Enable(disableOthers, forceSystemd(), inUserNamespace)
353354
if err != nil {
354355
exit.Error(reason.RuntimeEnable, "Failed to enable container runtime", err)
355356
}

Diff for: pkg/minikube/registry/drvs/docker/docker.go

+1-8
Original file line numberDiff line numberDiff line change
@@ -146,14 +146,7 @@ func status() (retState registry.State) {
146146
return suggestFix("info", -1, serr, fmt.Errorf("docker info error: %s", serr))
147147
}
148148

149-
if si.Rootless {
150-
return registry.State{
151-
Reason: "PROVIDER_DOCKER_ROOTLESS",
152-
Error: errors.New("rootless Docker not supported yet"),
153-
Installed: true,
154-
Healthy: false,
155-
Doc: "https://github.com/kubernetes/minikube/issues/10836"}
156-
}
149+
// TODO: validate cgroup v2 delegation when si.Rootless is true
157150

158151
return checkNeedsImprovement()
159152
}

0 commit comments

Comments
 (0)