From 605ca6183c4db3bc82370cd0ca2ab7bcfb5e010c Mon Sep 17 00:00:00 2001 From: Maximilian Meister Date: Tue, 19 Jun 2018 08:28:30 +0200 Subject: [PATCH 1/4] fix kubernetes issue #65204 https://github.com/kubernetes/kubernetes/issues/65204 Signed-off-by: Maximilian Meister --- fs/fs.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/fs.go b/fs/fs.go index b86ec36efe..4e4dff26de 100644 --- a/fs/fs.go +++ b/fs/fs.go @@ -533,6 +533,21 @@ func (self *RealFsInfo) GetDirFsDevice(dir string) (*DeviceInfo, error) { } mount, found := self.mounts[dir] + // try the parent dir if not found until we reach the root dir + // this is an issue on btrfs systems where the directory is not + // the subvolume + for !found { + pathdir, _ := filepath.Split(dir) + // break when we reach root + if pathdir == "/" { + break + } + // trim "/" from the new parent path otherwise the next possible + // filepath.Split in the loop will not split the string any further + dir = strings.TrimSuffix(pathdir, "/") + mount, found = self.mounts[dir] + } + if found && mount.Fstype == "btrfs" && mount.Major == 0 && strings.HasPrefix(mount.Source, "/dev/") { major, minor, err := getBtrfsMajorMinorIds(mount) if err != nil { From 098ba73d540bdaac5c8a95908bbdd959be9b55b1 Mon Sep 17 00:00:00 2001 From: Rohit Agarwal Date: Fri, 15 Jun 2018 17:30:41 -0700 Subject: [PATCH 2/4] Initialize NVML on demand. Earlier if the NVIDIA driver was not installed when cAdvisor was started we would start a goroutine to try to initialize NVML every minute. This resulted in a race. We can have a situation where: - goroutine tries to initialize NVML but fails. So, it sleeps for a minute. - the driver is installed. - a container that uses NVIDIA devices is started. This container would not get GPU stats because a minute has not passed since the last failed initialization attempt and so NVML is not initialized. --- accelerators/nvidia.go | 49 +++++++++++++++++-------------------- accelerators/nvidia_test.go | 14 ++++++++++- 2 files changed, 35 insertions(+), 28 deletions(-) diff --git a/accelerators/nvidia.go b/accelerators/nvidia.go index 054d206b2a..496feba5ee 100644 --- a/accelerators/nvidia.go +++ b/accelerators/nvidia.go @@ -31,7 +31,10 @@ import ( ) type NvidiaManager struct { - sync.RWMutex + sync.Mutex + + // true if there are NVIDIA devices present on the node + devicesPresent bool // true if the NVML library (libnvidia-ml.so.1) was loaded successfully nvmlInitialized bool @@ -51,20 +54,9 @@ func (nm *NvidiaManager) Setup() { return } - nm.initializeNVML() - if nm.nvmlInitialized { - return - } - go func() { - glog.V(2).Info("Starting goroutine to initialize NVML") - // TODO: use globalHousekeepingInterval - for range time.Tick(time.Minute) { - nm.initializeNVML() - if nm.nvmlInitialized { - return - } - } - }() + nm.devicesPresent = true + + initializeNVML(nm) } // detectDevices returns true if a device with given pci id is present on the node. @@ -91,20 +83,18 @@ func detectDevices(vendorId string) bool { } // initializeNVML initializes the NVML library and sets up the nvmlDevices map. -func (nm *NvidiaManager) initializeNVML() { +// This is defined as a variable to help in testing. +var initializeNVML = func(nm *NvidiaManager) { if err := gonvml.Initialize(); err != nil { // This is under a logging level because otherwise we may cause // log spam if the drivers/nvml is not installed on the system. glog.V(4).Infof("Could not initialize NVML: %v", err) return } + nm.nvmlInitialized = true numDevices, err := gonvml.DeviceCount() if err != nil { glog.Warningf("GPU metrics would not be available. Failed to get the number of nvidia devices: %v", err) - nm.Lock() - // Even though we won't have GPU metrics, the library was initialized and should be shutdown when exiting. - nm.nvmlInitialized = true - nm.Unlock() return } glog.V(1).Infof("NVML initialized. Number of nvidia devices: %v", numDevices) @@ -122,10 +112,6 @@ func (nm *NvidiaManager) initializeNVML() { } nm.nvidiaDevices[int(minorNumber)] = device } - nm.Lock() - // Doing this at the end to avoid race in accessing nvidiaDevices in GetCollector. - nm.nvmlInitialized = true - nm.Unlock() } // Destroy shuts down NVML. @@ -139,12 +125,21 @@ func (nm *NvidiaManager) Destroy() { // present in the devices.list file in the given devicesCgroupPath. func (nm *NvidiaManager) GetCollector(devicesCgroupPath string) (AcceleratorCollector, error) { nc := &NvidiaCollector{} - nm.RLock() + + if !nm.devicesPresent { + return nc, nil + } + // Makes sure that we don't call initializeNVML() concurrently and + // that we only call initializeNVML() when it's not initialized. + nm.Lock() + if !nm.nvmlInitialized { + initializeNVML(nm) + } if !nm.nvmlInitialized || len(nm.nvidiaDevices) == 0 { - nm.RUnlock() + nm.Unlock() return nc, nil } - nm.RUnlock() + nm.Unlock() nvidiaMinorNumbers, err := parseDevicesCgroup(devicesCgroupPath) if err != nil { return nc, err diff --git a/accelerators/nvidia_test.go b/accelerators/nvidia_test.go index c054433f89..b7e7c4d613 100644 --- a/accelerators/nvidia_test.go +++ b/accelerators/nvidia_test.go @@ -71,13 +71,16 @@ func TestGetCollector(t *testing.T) { return []int{2, 3}, nil } parseDevicesCgroup = mockParser + originalInitializeNVML := initializeNVML + initializeNVML = func(_ *NvidiaManager) {} defer func() { parseDevicesCgroup = originalParser + initializeNVML = originalInitializeNVML }() nm := &NvidiaManager{} - // When nvmlInitialized is false, empty collector should be returned. + // When devicesPresent is false, empty collector should be returned. ac, err := nm.GetCollector("does-not-matter") assert.Nil(t, err) assert.NotNil(t, ac) @@ -85,6 +88,15 @@ func TestGetCollector(t *testing.T) { assert.True(t, ok) assert.Equal(t, 0, len(nc.Devices)) + // When nvmlInitialized is false, empty collector should be returned. + nm.devicesPresent = true + ac, err = nm.GetCollector("does-not-matter") + assert.Nil(t, err) + assert.NotNil(t, ac) + nc, ok = ac.(*NvidiaCollector) + assert.True(t, ok) + assert.Equal(t, 0, len(nc.Devices)) + // When nvidiaDevices is empty, empty collector should be returned. nm.nvmlInitialized = true ac, err = nm.GetCollector("does-not-matter") From 8a0fea3e159aba2b783838de2b3de9ab0513abb0 Mon Sep 17 00:00:00 2001 From: David Ashpole Date: Thu, 14 Jun 2018 10:17:50 -0700 Subject: [PATCH 3/4] dont watch .mount cgroups --- manager/watcher/raw/raw.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/manager/watcher/raw/raw.go b/manager/watcher/raw/raw.go index 103fa887af..01967ee3ef 100644 --- a/manager/watcher/raw/raw.go +++ b/manager/watcher/raw/raw.go @@ -110,6 +110,11 @@ func (self *rawContainerWatcher) Stop() error { // Watches the specified directory and all subdirectories. Returns whether the path was // already being watched and an error (if any). func (self *rawContainerWatcher) watchDirectory(dir string, containerName string) (bool, error) { + // Don't watch .mount cgroups because they never have containers as sub-cgroups. A single container + // can have many .mount cgroups associated with it which can quickly exhaust the inotify watches on a node. + if strings.HasSuffix(containerName, ".mount") { + return false, nil + } alreadyWatching, err := self.watcher.AddWatch(containerName, dir) if err != nil { return alreadyWatching, err From 58b59e5b8379c9180871e3ec95d572efddf16c08 Mon Sep 17 00:00:00 2001 From: Antonio Murdaca Date: Wed, 13 Jun 2018 19:06:45 +0200 Subject: [PATCH 4/4] container: fix concurrent map acccess GetSpec() can be called concurrently in manager/container.go.updateSpec() results into a concurrent map access on the labels map because we're directly updating the map inside GetSpec(). The labels map from the container handler is not a copy of the map itself, just a reference, that's why we're getting the concurrent map access. Fix this by moving the label update with restartcount to the handler's initialization method which is not called concurrently. Signed-off-by: Antonio Murdaca --- container/crio/handler.go | 12 ++++-------- container/docker/handler.go | 12 ++++-------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/container/crio/handler.go b/container/crio/handler.go index 024341da8a..0eecb98044 100644 --- a/container/crio/handler.go +++ b/container/crio/handler.go @@ -81,9 +81,6 @@ type crioContainerHandler struct { ipAddress string ignoreMetrics container.MetricSet - - // container restart count - restartCount int } var _ container.ContainerHandler = &crioContainerHandler{} @@ -175,7 +172,10 @@ func newCrioContainerHandler( // ignore err and get zero as default, this happens with sandboxes, not sure why... // kube isn't sending restart count in labels for sandboxes. restartCount, _ := strconv.Atoi(cInfo.Annotations["io.kubernetes.container.restartCount"]) - handler.restartCount = restartCount + // Only adds restartcount label if it's greater than 0 + if restartCount > 0 { + handler.labels["restartcount"] = strconv.Itoa(restartCount) + } handler.ipAddress = cInfo.IP @@ -225,10 +225,6 @@ func (self *crioContainerHandler) GetSpec() (info.ContainerSpec, error) { spec, err := common.GetSpec(self.cgroupPaths, self.machineInfoFactory, self.needNet(), hasFilesystem) spec.Labels = self.labels - // Only adds restartcount label if it's greater than 0 - if self.restartCount > 0 { - spec.Labels["restartcount"] = strconv.Itoa(self.restartCount) - } spec.Envs = self.envs spec.Image = self.image diff --git a/container/docker/handler.go b/container/docker/handler.go index 8f63d02eb0..a037b092e6 100644 --- a/container/docker/handler.go +++ b/container/docker/handler.go @@ -115,9 +115,6 @@ type dockerContainerHandler struct { // zfs watcher zfsWatcher *zfs.ZfsWatcher - - // container restart count - restartCount int } var _ container.ContainerHandler = &dockerContainerHandler{} @@ -250,7 +247,10 @@ func newDockerContainerHandler( handler.image = ctnr.Config.Image handler.networkMode = ctnr.HostConfig.NetworkMode handler.deviceID = ctnr.GraphDriver.Data["DeviceId"] - handler.restartCount = ctnr.RestartCount + // Only adds restartcount label if it's greater than 0 + if ctnr.RestartCount > 0 { + handler.labels["restartcount"] = strconv.Itoa(ctnr.RestartCount) + } // Obtain the IP address for the contianer. // If the NetworkMode starts with 'container:' then we need to use the IP address of the container specified. @@ -386,10 +386,6 @@ func (self *dockerContainerHandler) GetSpec() (info.ContainerSpec, error) { spec, err := common.GetSpec(self.cgroupPaths, self.machineInfoFactory, self.needNet(), hasFilesystem) spec.Labels = self.labels - // Only adds restartcount label if it's greater than 0 - if self.restartCount > 0 { - spec.Labels["restartcount"] = strconv.Itoa(self.restartCount) - } spec.Envs = self.envs spec.Image = self.image spec.CreationTime = self.creationTime