Skip to content

Commit 57f69c1

Browse files
author
OpenShift Bot
authored
Merge pull request #11964 from dcbw/sdn-fix-ipam-exhaustion
Merged by openshift-bot
2 parents 7a40365 + 0d5f8c0 commit 57f69c1

File tree

1 file changed

+145
-36
lines changed

1 file changed

+145
-36
lines changed

pkg/sdn/plugin/pod_linux.go

+145-36
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@ package plugin
44

55
import (
66
"fmt"
7+
"io/ioutil"
8+
"net"
79
"os/exec"
10+
"path/filepath"
811
"strconv"
912
"strings"
1013
"syscall"
@@ -20,6 +23,7 @@ import (
2023
knetwork "k8s.io/kubernetes/pkg/kubelet/network"
2124
kubehostport "k8s.io/kubernetes/pkg/kubelet/network/hostport"
2225
kbandwidth "k8s.io/kubernetes/pkg/util/bandwidth"
26+
ksets "k8s.io/kubernetes/pkg/util/sets"
2327

2428
"github.com/containernetworking/cni/pkg/invoke"
2529
"github.com/containernetworking/cni/pkg/ip"
@@ -211,37 +215,43 @@ func addMacvlan(netns string) error {
211215
})
212216
}
213217

214-
// Run CNI IPAM for the container, either allocating an IP address and returning
215-
// it (for ADD) or releasing the lease and cleaning up (for DEL)
216-
func (m *podManager) runIPAM(netnsPath string, action cniserver.CNICommand, id string) (*cnitypes.Result, error) {
217-
args := &invoke.Args{
218+
func createIPAMArgs(netnsPath string, action cniserver.CNICommand, id string) *invoke.Args {
219+
return &invoke.Args{
218220
Command: string(action),
219221
ContainerID: id,
220222
NetNS: netnsPath,
221223
IfName: podInterfaceName,
222224
Path: "/opt/cni/bin",
223225
}
226+
}
224227

225-
if action == cniserver.CNI_ADD {
226-
result, err := invoke.ExecPluginWithResult("/opt/cni/bin/host-local", m.ipamConfig, args)
227-
if err != nil {
228-
return nil, fmt.Errorf("failed to run CNI IPAM ADD: %v", err)
229-
}
228+
// Run CNI IPAM allocation for the container and return the allocated IP address
229+
func (m *podManager) ipamAdd(netnsPath string, id string) (*cnitypes.Result, error) {
230+
if netnsPath == "" {
231+
return nil, fmt.Errorf("netns required for CNI_ADD")
232+
}
230233

231-
if result.IP4 == nil {
232-
return nil, fmt.Errorf("failed to obtain IP address from CNI IPAM")
233-
}
234+
args := createIPAMArgs(netnsPath, cniserver.CNI_ADD, id)
235+
result, err := invoke.ExecPluginWithResult("/opt/cni/bin/host-local", m.ipamConfig, args)
236+
if err != nil {
237+
return nil, fmt.Errorf("failed to run CNI IPAM ADD: %v", err)
238+
}
234239

235-
return result, nil
236-
} else if action == cniserver.CNI_DEL {
237-
err := invoke.ExecPluginWithoutResult("/opt/cni/bin/host-local", m.ipamConfig, args)
238-
if err != nil {
239-
return nil, fmt.Errorf("failed to run CNI IPAM DEL: %v", err)
240-
}
241-
return nil, nil
240+
if result.IP4 == nil {
241+
return nil, fmt.Errorf("failed to obtain IP address from CNI IPAM")
242242
}
243243

244-
return nil, fmt.Errorf("invalid IPAM action %v", action)
244+
return result, nil
245+
}
246+
247+
// Run CNI IPAM release for the container
248+
func (m *podManager) ipamDel(id string) error {
249+
args := createIPAMArgs("", cniserver.CNI_DEL, id)
250+
err := invoke.ExecPluginWithoutResult("/opt/cni/bin/host-local", m.ipamConfig, args)
251+
if err != nil {
252+
return fmt.Errorf("failed to run CNI IPAM DEL: %v", err)
253+
}
254+
return nil
245255
}
246256

247257
func isScriptError(err error) bool {
@@ -264,15 +274,111 @@ func vnidToString(vnid uint32) string {
264274
return strconv.FormatUint(uint64(vnid), 10)
265275
}
266276

277+
// podIsExited returns true if the pod is exited (all containers inside are exited).
278+
func podIsExited(p *kcontainer.Pod) bool {
279+
for _, c := range p.Containers {
280+
if c.State != kcontainer.ContainerStateExited {
281+
return false
282+
}
283+
}
284+
return true
285+
}
286+
287+
// getNonExitedPods returns a list of pods that have at least one running container.
288+
func (m *podManager) getNonExitedPods() ([]*kcontainer.Pod, error) {
289+
ret := []*kcontainer.Pod{}
290+
pods, err := m.host.GetRuntime().GetPods(true)
291+
if err != nil {
292+
return nil, fmt.Errorf("Failed to retrieve pods from runtime: %v", err)
293+
}
294+
for _, p := range pods {
295+
if podIsExited(p) {
296+
continue
297+
}
298+
ret = append(ret, p)
299+
}
300+
return ret, nil
301+
}
302+
303+
// ipamGarbageCollection will release unused IPs from dead containers that
304+
// the CNI plugin was never notified had died. openshift-sdn uses the CNI
305+
// host-local IPAM plugin, which stores allocated IPs in a file in
306+
// /var/lib/cni/network. Each file in this directory has as its name the
307+
// allocated IP address of the container, and as its contents the container ID.
308+
// This routine looks for container IDs that are not reported as running by the
309+
// container runtime, and releases each one's IPAM allocation.
310+
func (m *podManager) ipamGarbageCollection() {
311+
glog.V(2).Infof("Starting IP garbage collection")
312+
313+
const ipamDir string = "/var/lib/cni/networks/openshift-sdn"
314+
files, err := ioutil.ReadDir(ipamDir)
315+
if err != nil {
316+
glog.Errorf("Failed to list files in CNI host-local IPAM store %v: %v", ipamDir, err)
317+
return
318+
}
319+
320+
// gather containerIDs for allocated ips
321+
ipContainerIdMap := make(map[string]string)
322+
for _, file := range files {
323+
// skip non checkpoint file
324+
if ip := net.ParseIP(file.Name()); ip == nil {
325+
continue
326+
}
327+
328+
content, err := ioutil.ReadFile(filepath.Join(ipamDir, file.Name()))
329+
if err != nil {
330+
glog.Errorf("Failed to read file %v: %v", file, err)
331+
}
332+
ipContainerIdMap[file.Name()] = strings.TrimSpace(string(content))
333+
}
334+
335+
// gather infra container IDs of current running Pods
336+
runningContainerIDs := ksets.String{}
337+
pods, err := m.getNonExitedPods()
338+
if err != nil {
339+
glog.Errorf("Failed to get pods: %v", err)
340+
return
341+
}
342+
for _, pod := range pods {
343+
containerID, err := m.host.GetRuntime().GetPodContainerID(pod)
344+
if err != nil {
345+
glog.Warningf("Failed to get infra containerID of %q/%q: %v", pod.Namespace, pod.Name, err)
346+
continue
347+
}
348+
349+
runningContainerIDs.Insert(strings.TrimSpace(containerID.ID))
350+
}
351+
352+
// release leaked ips
353+
for ip, containerID := range ipContainerIdMap {
354+
// if the container is not running, release IP
355+
if runningContainerIDs.Has(containerID) {
356+
continue
357+
}
358+
359+
glog.V(2).Infof("Releasing IP %q allocated to %q.", ip, containerID)
360+
m.ipamDel(containerID)
361+
}
362+
}
363+
267364
// Set up all networking (host/container veth, OVS flows, IPAM, loopback, etc)
268365
func (m *podManager) setup(req *cniserver.PodRequest) (*cnitypes.Result, *kubehostport.RunningPod, error) {
269366
podConfig, pod, err := m.getPodConfig(req)
270367
if err != nil {
271368
return nil, nil, err
272369
}
273370

274-
ipamResult, err := m.runIPAM(req.Netns, cniserver.CNI_ADD, req.ContainerId)
371+
ipamResult, err := m.ipamAdd(req.Netns, req.ContainerId)
275372
if err != nil {
373+
// TODO: Remove this hack once we've figured out how to retrieve the netns
374+
// of an exited container. Currently, restarting docker will leak a bunch of
375+
// ips. This will exhaust available ip space unless we cleanup old ips. At the
376+
// same time we don't want to try GC'ing them periodically as that could lead
377+
// to a performance regression in starting pods. So on each setup failure, try
378+
// GC on the assumption that the kubelet is going to retry pod creation, and
379+
// when it does, there will be ips.
380+
m.ipamGarbageCollection()
381+
276382
return nil, nil, fmt.Errorf("failed to run IPAM for %v: %v", req.ContainerId, err)
277383
}
278384
podIP := ipamResult.IP4.IP.IP
@@ -281,7 +387,7 @@ func (m *podManager) setup(req *cniserver.PodRequest) (*cnitypes.Result, *kubeho
281387
var success bool
282388
defer func() {
283389
if !success {
284-
m.runIPAM(req.Netns, cniserver.CNI_DEL, req.ContainerId)
390+
m.ipamDel(req.ContainerId)
285391
if err := m.hostportHandler.SyncHostports(TUN, m.getRunningPods()); err != nil {
286392
glog.Warningf("failed syncing hostports: %v", err)
287393
}
@@ -393,29 +499,32 @@ func (m *podManager) update(req *cniserver.PodRequest) error {
393499

394500
// Clean up all pod networking (clear OVS flows, release IPAM lease, remove host/container veth)
395501
func (m *podManager) teardown(req *cniserver.PodRequest) error {
502+
netnsValid := true
396503
if err := ns.IsNSorErr(req.Netns); err != nil {
397504
if _, ok := err.(ns.NSPathNotExistErr); ok {
398-
glog.V(3).Infof("teardown called on already-destroyed pod %s/%s", req.PodNamespace, req.PodName)
399-
return nil
505+
glog.V(3).Infof("teardown called on already-destroyed pod %s/%s; only cleaning up IPAM", req.PodNamespace, req.PodName)
506+
netnsValid = false
400507
}
401508
}
402509

403-
hostVethName, contVethMac, podIP, err := getVethInfo(req.Netns, podInterfaceName)
404-
if err != nil {
405-
return err
406-
}
510+
if netnsValid {
511+
hostVethName, contVethMac, podIP, err := getVethInfo(req.Netns, podInterfaceName)
512+
if err != nil {
513+
return err
514+
}
407515

408-
// The script's teardown functionality doesn't need the VNID
409-
out, err := exec.Command(sdnScript, tearDownCmd, hostVethName, contVethMac, podIP, "-1").CombinedOutput()
410-
glog.V(5).Infof("TearDownPod network plugin output: %s, %v", string(out), err)
516+
// The script's teardown functionality doesn't need the VNID
517+
out, err := exec.Command(sdnScript, tearDownCmd, hostVethName, contVethMac, podIP, "-1").CombinedOutput()
518+
glog.V(5).Infof("TearDownPod network plugin output: %s, %v", string(out), err)
411519

412-
if isScriptError(err) {
413-
return fmt.Errorf("error running network teardown script: %s", getScriptError(out))
414-
} else if err != nil {
415-
return err
520+
if isScriptError(err) {
521+
return fmt.Errorf("error running network teardown script: %s", getScriptError(out))
522+
} else if err != nil {
523+
return err
524+
}
416525
}
417526

418-
if _, err := m.runIPAM(req.Netns, cniserver.CNI_DEL, req.ContainerId); err != nil {
527+
if err := m.ipamDel(req.ContainerId); err != nil {
419528
return err
420529
}
421530

0 commit comments

Comments
 (0)