Skip to content

Commit 0d5f8c0

Browse files
committed
sdn: garbage-collect dead containers to recover IPAM leases
Port of kubernetes/kubernetes#35572 to openshift-sdn. Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1395183
1 parent c6a9396 commit 0d5f8c0

File tree

1 file changed

+100
-0
lines changed

1 file changed

+100
-0
lines changed

pkg/sdn/plugin/pod_linux.go

+100
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@ package plugin
44

55
import (
66
"fmt"
7+
"io/ioutil"
8+
"net"
79
"os/exec"
10+
"path/filepath"
811
"strconv"
912
"strings"
1013
"syscall"
@@ -20,6 +23,7 @@ import (
2023
knetwork "k8s.io/kubernetes/pkg/kubelet/network"
2124
kubehostport "k8s.io/kubernetes/pkg/kubelet/network/hostport"
2225
kbandwidth "k8s.io/kubernetes/pkg/util/bandwidth"
26+
ksets "k8s.io/kubernetes/pkg/util/sets"
2327

2428
"github.com/containernetworking/cni/pkg/invoke"
2529
"github.com/containernetworking/cni/pkg/ip"
@@ -270,6 +274,93 @@ func vnidToString(vnid uint32) string {
270274
return strconv.FormatUint(uint64(vnid), 10)
271275
}
272276

277+
// podIsExited returns true if the pod is exited (all containers inside are exited).
278+
func podIsExited(p *kcontainer.Pod) bool {
279+
for _, c := range p.Containers {
280+
if c.State != kcontainer.ContainerStateExited {
281+
return false
282+
}
283+
}
284+
return true
285+
}
286+
287+
// getNonExitedPods returns a list of pods that have at least one running container.
288+
func (m *podManager) getNonExitedPods() ([]*kcontainer.Pod, error) {
289+
ret := []*kcontainer.Pod{}
290+
pods, err := m.host.GetRuntime().GetPods(true)
291+
if err != nil {
292+
return nil, fmt.Errorf("Failed to retrieve pods from runtime: %v", err)
293+
}
294+
for _, p := range pods {
295+
if podIsExited(p) {
296+
continue
297+
}
298+
ret = append(ret, p)
299+
}
300+
return ret, nil
301+
}
302+
303+
// ipamGarbageCollection will release unused IPs from dead containers that
304+
// the CNI plugin was never notified had died. openshift-sdn uses the CNI
305+
// host-local IPAM plugin, which stores allocated IPs in a file in
306+
// /var/lib/cni/network. Each file in this directory has as its name the
307+
// allocated IP address of the container, and as its contents the container ID.
308+
// This routine looks for container IDs that are not reported as running by the
309+
// container runtime, and releases each one's IPAM allocation.
310+
func (m *podManager) ipamGarbageCollection() {
311+
glog.V(2).Infof("Starting IP garbage collection")
312+
313+
const ipamDir string = "/var/lib/cni/networks/openshift-sdn"
314+
files, err := ioutil.ReadDir(ipamDir)
315+
if err != nil {
316+
glog.Errorf("Failed to list files in CNI host-local IPAM store %v: %v", ipamDir, err)
317+
return
318+
}
319+
320+
// gather containerIDs for allocated ips
321+
ipContainerIdMap := make(map[string]string)
322+
for _, file := range files {
323+
// skip non checkpoint file
324+
if ip := net.ParseIP(file.Name()); ip == nil {
325+
continue
326+
}
327+
328+
content, err := ioutil.ReadFile(filepath.Join(ipamDir, file.Name()))
329+
if err != nil {
330+
glog.Errorf("Failed to read file %v: %v", file, err)
331+
}
332+
ipContainerIdMap[file.Name()] = strings.TrimSpace(string(content))
333+
}
334+
335+
// gather infra container IDs of current running Pods
336+
runningContainerIDs := ksets.String{}
337+
pods, err := m.getNonExitedPods()
338+
if err != nil {
339+
glog.Errorf("Failed to get pods: %v", err)
340+
return
341+
}
342+
for _, pod := range pods {
343+
containerID, err := m.host.GetRuntime().GetPodContainerID(pod)
344+
if err != nil {
345+
glog.Warningf("Failed to get infra containerID of %q/%q: %v", pod.Namespace, pod.Name, err)
346+
continue
347+
}
348+
349+
runningContainerIDs.Insert(strings.TrimSpace(containerID.ID))
350+
}
351+
352+
// release leaked ips
353+
for ip, containerID := range ipContainerIdMap {
354+
// if the container is not running, release IP
355+
if runningContainerIDs.Has(containerID) {
356+
continue
357+
}
358+
359+
glog.V(2).Infof("Releasing IP %q allocated to %q.", ip, containerID)
360+
m.ipamDel(containerID)
361+
}
362+
}
363+
273364
// Set up all networking (host/container veth, OVS flows, IPAM, loopback, etc)
274365
func (m *podManager) setup(req *cniserver.PodRequest) (*cnitypes.Result, *kubehostport.RunningPod, error) {
275366
podConfig, pod, err := m.getPodConfig(req)
@@ -279,6 +370,15 @@ func (m *podManager) setup(req *cniserver.PodRequest) (*cnitypes.Result, *kubeho
279370

280371
ipamResult, err := m.ipamAdd(req.Netns, req.ContainerId)
281372
if err != nil {
373+
// TODO: Remove this hack once we've figured out how to retrieve the netns
374+
// of an exited container. Currently, restarting docker will leak a bunch of
375+
// ips. This will exhaust available ip space unless we cleanup old ips. At the
376+
// same time we don't want to try GC'ing them periodically as that could lead
377+
// to a performance regression in starting pods. So on each setup failure, try
378+
// GC on the assumption that the kubelet is going to retry pod creation, and
379+
// when it does, there will be ips.
380+
m.ipamGarbageCollection()
381+
282382
return nil, nil, fmt.Errorf("failed to run IPAM for %v: %v", req.ContainerId, err)
283383
}
284384
podIP := ipamResult.IP4.IP.IP

0 commit comments

Comments
 (0)