Skip to content

Commit a641d38

Browse files
Add nvidia-cdi-refresh service
Automatic regeneration of /var/run/cdi/nvidia.yaml New units: • nvidia-cdi-refresh.service – one-shot wrapper for nvidia-ctk cdi generate (adds sleep + required caps). • nvidia-cdi-refresh.path – fires on driver install/upgrade via modules.dep.bin changes. • 60-nvidia-cdi-refresh.rules – udev triggers for module add/remove, PCI bind/unbind/change, and MIG /dev/nvidia-caps* char-device events. Packaging • RPM %post reloads udev/systemd and enables the path unit on fresh installs. • DEB postinst does the same (configure, skip on upgrade). Result: CDI spec is always up to date Signed-off-by: Carlos Eduardo Arango Gutierrez <[email protected]>
1 parent adb5e67 commit a641d38

11 files changed

+145
-1
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
[Unit]
16+
Description=Trigger CDI refresh on NVIDIA driver install / uninstall events
17+
18+
[Path]
19+
# depmod rewrites these exactly once per (un)install
20+
PathChanged=/lib/modules/%v/modules.dep.bin
21+
22+
[Install]
23+
WantedBy=multi-user.target
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
[Unit]
16+
Description=Refresh NVIDIA CDI specification file
17+
18+
[Service]
19+
Type=oneshot
20+
# The 30-second delay ensures that dependent services or resources are fully initialized.
21+
ExecStartPre=/bin/sleep 30
22+
ExecStart=/usr/bin/nvidia-ctk cdi generate --output=/var/run/cdi/nvidia.yaml
23+
CapabilityBoundingSet=CAP_SYS_MODULE CAP_SYS_ADMIN CAP_MKNOD
24+
25+
[Install]
26+
WantedBy=multi-user.target
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# NVIDIA kernel-module events
16+
ACTION=="add|remove", SUBSYSTEM=="module", KERNEL=="nvidia*", \
17+
TAG+="systemd", ENV{SYSTEMD_WANTS}+="nvidia-cdi-refresh.service"
18+
19+
# First bind/unbind/change of a GPU PCI function to the NVIDIA driver
20+
ACTION=="bind|unbind|change", SUBSYSTEM=="pci", DRIVER=="nvidia", \
21+
TAG+="systemd", ENV{SYSTEMD_WANTS}+="nvidia-cdi-refresh.service"

docker/Dockerfile.debian

+2
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ RUN make PREFIX=${DIST_DIR} cmds
5555

5656
WORKDIR $DIST_DIR
5757
COPY packaging/debian ./debian
58+
COPY deployments/systemd/ .
59+
COPY deployments/udev/ .
5860

5961
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
6062
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}

docker/Dockerfile.opensuse-leap

+2
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ RUN make PREFIX=${DIST_DIR} cmds
4646

4747
WORKDIR $DIST_DIR/..
4848
COPY packaging/rpm .
49+
COPY deployments/systemd/ .
50+
COPY deployments/udev/ .
4951

5052
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
5153
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}

docker/Dockerfile.rpm-yum

+2
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ RUN make PREFIX=${DIST_DIR} cmds
7171

7272
WORKDIR $DIST_DIR/..
7373
COPY packaging/rpm .
74+
COPY deployments/systemd/* ${DIST_DIR}/
75+
COPY deployments/udev/* ${DIST_DIR}/
7476

7577
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
7678
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}

docker/Dockerfile.ubuntu

+2
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ RUN make PREFIX=${DIST_DIR} cmds
5353

5454
WORKDIR $DIST_DIR
5555
COPY packaging/debian ./debian
56+
COPY deployments/systemd/ .
57+
COPY deployments/udev/ .
5658

5759
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
5860
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}

packaging/debian/control

+6
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,9 @@ Architecture: any
2929
Depends: ${misc:Depends}, nvidia-container-toolkit-base (= @VERSION@)
3030
Description: NVIDIA Container Toolkit Operator Extensions
3131
Provides tools for using the NVIDIA Container Toolkit with the GPU Operator
32+
33+
Package: nvidia-container-toolkit-cdi-refresh
34+
Architecture: any
35+
Depends: ${misc:Depends}, nvidia-container-toolkit-base (= @VERSION@)
36+
Description: NVIDIA CDI Refresh Service
37+
Provides a service to refresh the NVIDIA CDI specification
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
60-nvidia-cdi-refresh.rules /etc/udev/rules.d/
2+
nvidia-cdi-refresh.service /etc/systemd/system/
3+
nvidia-cdi-refresh.path /etc/systemd/system/
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#!/bin/sh
2+
3+
set -e
4+
5+
case "$1" in
6+
configure)
7+
if command -v udevadm >/dev/null 2>&1; then
8+
udevadm control --reload || true
9+
fi
10+
11+
if command -v systemctl >/dev/null 2>&1 \
12+
&& systemctl --quiet is-system-running 2>/dev/null; then
13+
14+
systemctl daemon-reload || true
15+
16+
if [ -z "$2" ]; then # $2 empty → first install
17+
systemctl enable --now nvidia-cdi-refresh.path || true
18+
fi
19+
fi
20+
;;
21+
22+
abort-upgrade|abort-remove|abort-deconfigure)
23+
# Nothing to do for these dpkg abort cases
24+
;;
25+
26+
*)
27+
echo "postinst called with unknown argument \`$1'" >&2
28+
exit 1
29+
;;
30+
esac
31+
32+
exit 0

packaging/rpm/SPECS/nvidia-container-toolkit.spec

+26-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ Source3: nvidia-container-runtime
1717
Source4: nvidia-container-runtime.cdi
1818
Source5: nvidia-container-runtime.legacy
1919
Source6: nvidia-cdi-hook
20+
Source7: nvidia-cdi-refresh.service
21+
Source8: nvidia-cdi-refresh.path
22+
Source9: 60-nvidia-cdi-refresh.rules
2023

2124
Obsoletes: nvidia-container-runtime <= 3.5.0-1, nvidia-container-runtime-hook <= 1.4.0-2
2225
Provides: nvidia-container-runtime
@@ -28,23 +31,40 @@ Requires: nvidia-container-toolkit-base == %{version}-%{release}
2831
Provides tools and utilities to enable GPU support in containers.
2932

3033
%prep
31-
cp %{SOURCE0} %{SOURCE1} %{SOURCE2} %{SOURCE3} %{SOURCE4} %{SOURCE5} %{SOURCE6} .
34+
cp %{SOURCE0} %{SOURCE1} %{SOURCE2} %{SOURCE3} %{SOURCE4} %{SOURCE5} %{SOURCE6} %{SOURCE7} %{SOURCE8} %{SOURCE9} .
3235

3336
%install
3437
mkdir -p %{buildroot}%{_bindir}
38+
mkdir -p %{buildroot}/etc/systemd/system/
39+
mkdir -p %{buildroot}/etc/udev/rules.d
40+
3541
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime-hook
3642
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime
3743
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.cdi
3844
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.legacy
3945
install -m 755 -t %{buildroot}%{_bindir} nvidia-ctk
4046
install -m 755 -t %{buildroot}%{_bindir} nvidia-cdi-hook
47+
install -m 644 -t %{buildroot}/etc/systemd/system %{SOURCE7}
48+
install -m 644 -t %{buildroot}/etc/systemd/system %{SOURCE8}
49+
install -m 644 -t %{buildroot}/etc/udev/rules.d %{SOURCE9}
4150

4251
%post
4352
if [ $1 -gt 1 ]; then # only on package upgrade
4453
mkdir -p %{_localstatedir}/lib/rpm-state/nvidia-container-toolkit
4554
cp -af %{_bindir}/nvidia-container-runtime-hook %{_localstatedir}/lib/rpm-state/nvidia-container-toolkit
4655
fi
4756

57+
# Reload udev so the new rule is active immediately
58+
/usr/bin/udevadm control --reload || :
59+
60+
# Reload systemd unit cache
61+
/bin/systemctl daemon-reload || :
62+
63+
# On fresh install ($1 == 1) enable the path unit so it starts at boot
64+
if [ "$1" -eq 1 ]; then
65+
/bin/systemctl enable --now nvidia-cdi-refresh.path || :
66+
fi
67+
4868
%posttrans
4969
if [ ! -e %{_bindir}/nvidia-container-runtime-hook ]; then
5070
# repairing lost file nvidia-container-runtime-hook
@@ -64,6 +84,11 @@ fi
6484
%files
6585
%license LICENSE
6686
%{_bindir}/nvidia-container-runtime-hook
87+
%config /etc/systemd/system/nvidia-cdi-refresh.service
88+
%config /etc/systemd/system/nvidia-cdi-refresh.path
89+
%dir /etc/systemd/system
90+
%config /etc/udev/rules.d/60-nvidia-cdi-refresh.rules
91+
%dir /etc/udev/rules.d
6792

6893
%changelog
6994
# As of 1.10.0-1 we generate the release information automatically

0 commit comments

Comments
 (0)