@@ -4,7 +4,10 @@ package plugin
4
4
5
5
import (
6
6
"fmt"
7
+ "io/ioutil"
8
+ "net"
7
9
"os/exec"
10
+ "path/filepath"
8
11
"strconv"
9
12
"strings"
10
13
"syscall"
@@ -20,6 +23,7 @@ import (
20
23
knetwork "k8s.io/kubernetes/pkg/kubelet/network"
21
24
kubehostport "k8s.io/kubernetes/pkg/kubelet/network/hostport"
22
25
kbandwidth "k8s.io/kubernetes/pkg/util/bandwidth"
26
+ ksets "k8s.io/kubernetes/pkg/util/sets"
23
27
24
28
"github.com/containernetworking/cni/pkg/invoke"
25
29
"github.com/containernetworking/cni/pkg/ip"
@@ -270,6 +274,93 @@ func vnidToString(vnid uint32) string {
270
274
return strconv .FormatUint (uint64 (vnid ), 10 )
271
275
}
272
276
277
+ // podIsExited returns true if the pod is exited (all containers inside are exited).
278
+ func podIsExited (p * kcontainer.Pod ) bool {
279
+ for _ , c := range p .Containers {
280
+ if c .State != kcontainer .ContainerStateExited {
281
+ return false
282
+ }
283
+ }
284
+ return true
285
+ }
286
+
287
+ // getNonExitedPods returns a list of pods that have at least one running container.
288
+ func (m * podManager ) getNonExitedPods () ([]* kcontainer.Pod , error ) {
289
+ ret := []* kcontainer.Pod {}
290
+ pods , err := m .host .GetRuntime ().GetPods (true )
291
+ if err != nil {
292
+ return nil , fmt .Errorf ("Failed to retrieve pods from runtime: %v" , err )
293
+ }
294
+ for _ , p := range pods {
295
+ if podIsExited (p ) {
296
+ continue
297
+ }
298
+ ret = append (ret , p )
299
+ }
300
+ return ret , nil
301
+ }
302
+
303
+ // ipamGarbageCollection will release unused IPs from dead containers that
304
+ // the CNI plugin was never notified had died. openshift-sdn uses the CNI
305
+ // host-local IPAM plugin, which stores allocated IPs in a file in
306
+ // /var/lib/cni/network. Each file in this directory has as its name the
307
+ // allocated IP address of the container, and as its contents the container ID.
308
+ // This routine looks for container IDs that are not reported as running by the
309
+ // container runtime, and releases each one's IPAM allocation.
310
+ func (m * podManager ) ipamGarbageCollection () {
311
+ glog .V (2 ).Infof ("Starting IP garbage collection" )
312
+
313
+ const ipamDir string = "/var/lib/cni/networks/openshift-sdn"
314
+ files , err := ioutil .ReadDir (ipamDir )
315
+ if err != nil {
316
+ glog .Errorf ("Failed to list files in CNI host-local IPAM store %v: %v" , ipamDir , err )
317
+ return
318
+ }
319
+
320
+ // gather containerIDs for allocated ips
321
+ ipContainerIdMap := make (map [string ]string )
322
+ for _ , file := range files {
323
+ // skip non checkpoint file
324
+ if ip := net .ParseIP (file .Name ()); ip == nil {
325
+ continue
326
+ }
327
+
328
+ content , err := ioutil .ReadFile (filepath .Join (ipamDir , file .Name ()))
329
+ if err != nil {
330
+ glog .Errorf ("Failed to read file %v: %v" , file , err )
331
+ }
332
+ ipContainerIdMap [file .Name ()] = strings .TrimSpace (string (content ))
333
+ }
334
+
335
+ // gather infra container IDs of current running Pods
336
+ runningContainerIDs := ksets.String {}
337
+ pods , err := m .getNonExitedPods ()
338
+ if err != nil {
339
+ glog .Errorf ("Failed to get pods: %v" , err )
340
+ return
341
+ }
342
+ for _ , pod := range pods {
343
+ containerID , err := m .host .GetRuntime ().GetPodContainerID (pod )
344
+ if err != nil {
345
+ glog .Warningf ("Failed to get infra containerID of %q/%q: %v" , pod .Namespace , pod .Name , err )
346
+ continue
347
+ }
348
+
349
+ runningContainerIDs .Insert (strings .TrimSpace (containerID .ID ))
350
+ }
351
+
352
+ // release leaked ips
353
+ for ip , containerID := range ipContainerIdMap {
354
+ // if the container is not running, release IP
355
+ if runningContainerIDs .Has (containerID ) {
356
+ continue
357
+ }
358
+
359
+ glog .V (2 ).Infof ("Releasing IP %q allocated to %q." , ip , containerID )
360
+ m .ipamDel (containerID )
361
+ }
362
+ }
363
+
273
364
// Set up all networking (host/container veth, OVS flows, IPAM, loopback, etc)
274
365
func (m * podManager ) setup (req * cniserver.PodRequest ) (* cnitypes.Result , * kubehostport.RunningPod , error ) {
275
366
podConfig , pod , err := m .getPodConfig (req )
@@ -279,6 +370,15 @@ func (m *podManager) setup(req *cniserver.PodRequest) (*cnitypes.Result, *kubeho
279
370
280
371
ipamResult , err := m .ipamAdd (req .Netns , req .ContainerId )
281
372
if err != nil {
373
+ // TODO: Remove this hack once we've figured out how to retrieve the netns
374
+ // of an exited container. Currently, restarting docker will leak a bunch of
375
+ // ips. This will exhaust available ip space unless we cleanup old ips. At the
376
+ // same time we don't want to try GC'ing them periodically as that could lead
377
+ // to a performance regression in starting pods. So on each setup failure, try
378
+ // GC on the assumption that the kubelet is going to retry pod creation, and
379
+ // when it does, there will be ips.
380
+ m .ipamGarbageCollection ()
381
+
282
382
return nil , nil , fmt .Errorf ("failed to run IPAM for %v: %v" , req .ContainerId , err )
283
383
}
284
384
podIP := ipamResult .IP4 .IP .IP
0 commit comments