nvproxy: implement missing NV_MEMORY_MULTICAST_FABRIC, NV00FD_*

thundergolfer · gvisor-bot · commit dea0db64925d · 2024-07-12T18:48:11.000-07:00
Adding ioctls to fix a simple multi-GPU Huggingface`accelerate` program that does not work on GCP H100s. --- ### System details * **instance type:** `a3-highgpu-8g` (GCP, us-east4-a) * **NVIDIA driver:** `Driver Version: 550.54.15 CUDA Version: 12.4` * **NVIDIA device:** 4 x NVIDIA H100 HBM3 * **uname -a:** `Linux gcp-h100-us-east4-a-0-bb25baf985414f8899dfdfcb82d6796d 5.15.0-208.159.3.el9uek.x86_64 #2 SMP Wed Jun 19 09:05:13 PDT 2024 x86_64 x86_64 x86_64 GNU/Linux` ``` runsc version release-20240513.0-173-gc526d251933a-dirty spec: 1.1.0-rc.1 ``` --- ## Reproduction steps 1. **Install gVisor** **2. Add GPU enabling gvisor options** In `/etc/docker/daemon.json`: ```json { "runtimes": { "nvidia": { "path": "nvidia-container-runtime", "runtimeArgs": [] }, "runsc": { "path": "/home/modal/runsc", "runtimeArgs": ["--nvproxy", "--nvproxy-docker", "-debug-log=/tmp/runsc/", "-debug", "-strace"] } } } ``` **3. Run Dockerfile** ```Dockerfile # Dockerfile FROM winglian/axolotl@sha256:5c724f7accd8188b0f84ead93b7efbfa8f8661f40e133646bd6d946bc3423d6d RUN pip install fastapi==0.111.0 RUN pip install huggingface-hub~=0.23.0 pydantic==2.6.3 python-dateutil ENV HUGGINGFACE_HUB_CACHE="/pretrained" ENV TQDM_DISABLE="true" ENV AXOLOTL_NCCL_TIMEOUT="60" COPY <<EOF repro.py import os import subprocess from pathlib import Path print("[MOD-3226] hello from the repro!!!") from accelerate import Accelerator accelerator = Accelerator() with accelerator.main_process_first(): print(f"hello! {accelerator.process_index}") EOF ENTRYPOINT ["accelerate", "launch", "repro.py"] ``` ``` sudo docker run -it --runtime=$RUNTIME --gpus='"device=GPU-c453e5c7-a56d-70bf-78ce-61be6cb8e0db,GPU-4703196a-e3df-9e3f-bb8b-6fa91c8e9970,GPU-4a9c162c-9280-eaa8-215a-2c681e82a99f,GPU-1660d344-e18b-e48a-cced-38380e903c31"' ce4326479c8412b13bba27416e3e77093d4411279b432ca1b25050f17ef57a67 ``` ### Results **`runc`** ``` sudo docker run -it --gpus='"device=GPU-c453e5c7-a56d-70bf-78ce-61be6cb8e0db,GPU-4703196a-e3df-9e3f-bb8b-6fa91c8e9970,GPU-4a9c162c-9280-eaa8-215a-2c681e82a99f,GPU-1660d344-e18b-e48a-cced-38380e903c31"' ce4326479c8412b13bba27416e3e77093d4411279b432ca1b25050f17ef57a67 The following values were not passed to `accelerate launch` and had defaults used instead: `--num_processes` was set to a value of `4` More than one GPU was found, enabling multi-GPU training. If this was unintended please pass in `--num_processes=1`. `--num_machines` was set to a value of `1` `--mixed_precision` was set to a value of `'no'` `--dynamo_backend` was set to a value of `'no'` To avoid this warning pass in values for each of the problematic parameters or run `accelerate config`. [MOD-3226] hello from the repro!!! [MOD-3226] hello from the repro!!! [MOD-3226] hello from the repro!!! [MOD-3226] hello from the repro!!! hello! 0 hello! 1 hello! 2hello! 3 ``` **`runsc` (main)** <details> <summary>💥 Failure logs</summary> ``` sudo docker run -it --runtime=runsc --gpus='"device=GPU-c453e5c7-a56d-70bf-78ce-61be6cb8e0db,GPU-4703196a-e3df-9e3f-bb8b-6fa91c8e9970,GPU-4a9c162c-9280-eaa8-215a-2c681e82a99f,GPU-1660d344-e18b-e48a-cced-38380e903c31"' ce4326479c8412b13bba27416e3e77093d4411279b432ca1b25050f17ef57a67 The following values were not passed to `accelerate launch` and had defaults used instead: `--num_processes` was set to a value of `4` More than one GPU was found, enabling multi-GPU training. If this was unintended please pass in `--num_processes=1`. `--num_machines` was set to a value of `1` `--mixed_precision` was set to a value of `'no'` `--dynamo_backend` was set to a value of `'no'` To avoid this warning pass in values for each of the problematic parameters or run `accelerate config`. [MOD-3226] hello from the repro!!! [MOD-3226] hello from the repro!!! [MOD-3226] hello from the repro!!! [MOD-3226] hello from the repro!!! Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher. hello! 0 Traceback (most recent call last): File "/workspace/axolotl/repro.py", line 10, in <module> with accelerator.main_process_first(): File "/root/miniconda3/envs/py3.10/lib/python3.10/contextlib.py", line 142, in __exit__ next(self.gen) File "/root/miniconda3/envs/py3.10/lib/python3.10/site-packages/accelerate/accelerator.py", line 884, in main_process_first with self.state.main_process_first(): File "/root/miniconda3/envs/py3.10/lib/python3.10/contextlib.py", line 142, in __exit__ next(self.gen) File "/root/miniconda3/envs/py3.10/lib/python3.10/site-packages/accelerate/state.py", line 1056, in main_process_first with PartialState().main_process_first(): File "/root/miniconda3/envs/py3.10/lib/python3.10/contextlib.py", line 142, in __exit__ next(self.gen) File "/root/miniconda3/envs/py3.10/lib/python3.10/site-packages/accelerate/state.py", line 502, in main_process_first yield from self._goes_first(self.is_main_process) File "/root/miniconda3/envs/py3.10/lib/python3.10/site-packages/accelerate/state.py", line 390, in _goes_first self.wait_for_everyone() File "/root/miniconda3/envs/py3.10/lib/python3.10/site-packages/accelerate/state.py", line 379, in wait_for_everyone torch.distributed.barrier() File "/root/miniconda3/envs/py3.10/lib/python3.10/site-packages/torch/distributed/c10d_logger.py", line 47, in wrapper return func(*args, **kwargs) File "/root/miniconda3/envs/py3.10/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 3696, in barrier work = default_pg.barrier(opts=opts) torch.distributed.DistBackendError: NCCL error in: ../torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:1333, unhandled cuda error (run with NCCL_DEBUG=INFO for details), NCCL version 2.18.6 ncclUnhandledCudaError: Call to CUDA function failed. Last error: Cuda failure 'unknown error' [2024-07-11 19:52:01,530] torch.distributed.elastic.multiprocessing.api: [WARNING] Sending process 68 closing signal SIGTERM [2024-07-11 19:52:01,532] torch.distributed.elastic.multiprocessing.api: [WARNING] Sending process 69 closing signal SIGTERM [2024-07-11 19:52:01,533] torch.distributed.elastic.multiprocessing.api: [WARNING] Sending process 70 closing signal SIGTERM [2024-07-11 19:52:02,108] torch.distributed.elastic.multiprocessing.api: [ERROR] failed (exitcode: 1) local_rank: 0 (pid: 67) of binary: /root/miniconda3/envs/py3.10/bin/python Traceback (most recent call last): File "/root/miniconda3/envs/py3.10/bin/accelerate", line 8, in <module> sys.exit(main()) File "/root/miniconda3/envs/py3.10/lib/python3.10/site-packages/accelerate/commands/accelerate_cli.py", line 46, in main args.func(args) File "/root/miniconda3/envs/py3.10/lib/python3.10/site-packages/accelerate/commands/launch.py", line 1073, in launch_command multi_gpu_launcher(args) File "/root/miniconda3/envs/py3.10/lib/python3.10/site-packages/accelerate/commands/launch.py", line 718, in multi_gpu_launcher distrib_run.run(args) File "/root/miniconda3/envs/py3.10/lib/python3.10/site-packages/torch/distributed/run.py", line 797, in run elastic_launch( File "/root/miniconda3/envs/py3.10/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 134, in __call__ return launch_agent(self._config, self._entrypoint, list(args)) File "/root/miniconda3/envs/py3.10/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 264, in launch_agent raise ChildFailedError( torch.distributed.elastic.multiprocessing.errors.ChildFailedError: repro.py FAILED ------------------------------------------------------------ Failures: <NO_OTHER_FAILURES> ------------------------------------------------------------ Root Cause (first observed failure): [0]: time : 2024-07-11_19:52:01 host : d45a08528293 rank : 0 (local_rank: 0) exitcode : 1 (pid: 67) error_file: <N/A> traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html ``` --- </details> **`runsc` (this pull request)** <details> <summary>✅ Success logs</summary> ``` [modal@gcp-h100-us-east4-a-0-bb25baf985414f8899dfdfcb82d6796d ~]$ sudo docker run -it --runtime=runsc --gpus='"device=GPU-c453e5c7-a56d-70bf-78ce-61be6cb8e0db,GPU-4703196a-e3df-9e3f-bb8b-6fa91c8e9970,GPU-4a9c162c-9280-eaa8-215a-2c681e82a99f,GPU-1660d344-e18b-e48a-cced-38380e903c31"' ce4326479c8412b13bba27416e3e77093d4411279b432ca1b25050f17ef57a67 The following values were not passed to `accelerate launch` and had defaults used instead: `--num_processes` was set to a value of `4` More than one GPU was found, enabling multi-GPU training. If this was unintended please pass in `--num_processes=1`. `--num_machines` was set to a value of `1` `--mixed_precision` was set to a value of `'no'` `--dynamo_backend` was set to a value of `'no'` To avoid this warning pass in values for each of the problematic parameters or run `accelerate config`. [MOD-3226] hello from the repro!!! [MOD-3226] hello from the repro!!! [MOD-3226] hello from the repro!!! [MOD-3226] hello from the repro!!! Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher. hello! 0 hello! 1 hello! 3hello! 2 ``` </details> FUTURE_COPYBARA_INTEGRATE_REVIEW=#10649 from thundergolfer:master d3d19f1 PiperOrigin-RevId: 651754677
diff --git a/pkg/abi/nvgpu/classes.go b/pkg/abi/nvgpu/classes.go
@@ -42,6 +42,7 @@ const (
 	NV01_EVENT_OS_EVENT              = 0x00000079
 	NV01_DEVICE_0                    = 0x00000080
 	NV_MEMORY_FABRIC                 = 0x000000f8
+	NV_MEMORY_MULTICAST_FABRIC       = 0x000000fd
 	NV20_SUBDEVICE_0                 = 0x00002080
 	NV2081_BINAPI                    = 0x00002081
 	NV50_P2P                         = 0x0000503b
@@ -333,6 +334,45 @@ type NV00F8_ALLOCATION_PARAMETERS struct {
 	Map        nv00f8Map
 }
 
+// From src/common/sdk/nvidia/inc/class/cl00e0.h
+const (
+	NV_MEM_EXPORT_UUID_LEN = 16
+)
+
+// NV_EXPORT_MEM_PACKET is from
+// src/common/sdk/nvidia/inc/class/cl00e0.h
+//
+// +marshal
+type NV_EXPORT_MEM_PACKET struct {
+	UUID   [NV_MEM_EXPORT_UUID_LEN]uint8
+	Opaque [16]uint8
+}
+
+// NV00FD_ALLOCATION_PARAMETERS is the alloc param type for NV_MEMORY_MULTICAST_FABRIC
+// from src/common/sdk/nvidia/inc/class/cl00fd.h
+//
+// +marshal
+type NV00FD_ALLOCATION_PARAMETERS struct {
+	Alignment  uint64
+	AllocSize  uint64
+	PageSize   uint32
+	AllocFlags uint32
+	NumGPUs    uint32
+	_          uint32
+	POsEvent   P64
+}
+
+// NV00FD_ALLOCATION_PARAMETERS_V545 is the updated version of
+// NV00FD_ALLOCATION_PARAMETERS since 545.23.06.
+//
+// +marshal
+type NV00FD_ALLOCATION_PARAMETERS_V545 struct {
+	ExpPacket NV_EXPORT_MEM_PACKET
+	Index     uint16
+	_         [6]byte
+	NV00FD_ALLOCATION_PARAMETERS
+}
+
 // NV_CONFIDENTIAL_COMPUTE_ALLOC_PARAMS is the alloc param type for
 // NV_CONFIDENTIAL_COMPUTE, from src/common/sdk/nvidia/inc/class/clcb33.h.
 //
diff --git a/pkg/abi/nvgpu/ctrl.go b/pkg/abi/nvgpu/ctrl.go
@@ -236,6 +236,21 @@ const (
 	NV0080_CTRL_CMD_PERF_CUDA_LIMIT_SET_CONTROL = 0x801909
 )
 
+// From src/common/sdk/nvidia/inc/ctrl/ctrl00fd.h:
+const (
+	NV00FD_CTRL_CMD_GET_INFO   = 0xfd0101
+	NV00FD_CTRL_CMD_ATTACH_MEM = 0xfd0102
+	NV00FD_CTRL_CMD_ATTACH_GPU = 0xfd0104
+	NV00FD_CTRL_CMD_DETACH_MEM = 0xfd0105
+)
+
+// +marshal
+type NV00FD_CTRL_ATTACH_GPU_PARAMS struct {
+	HSubDevice    Handle
+	Flags         uint32
+	DevDescriptor uint64
+}
+
 // From src/common/sdk/nvidia/inc/ctrl/ctrl2080/ctrl2080bus.h:
 const (
 	NV2080_CTRL_CMD_BUS_GET_PCI_INFO                   = 0x20801801
diff --git a/pkg/sentry/devices/nvproxy/frontend.go b/pkg/sentry/devices/nvproxy/frontend.go
@@ -564,6 +564,18 @@ func ctrlHasFrontendFD[Params any, PtrParams hasFrontendFDPtr[Params]](fi *front
 	return n, nil
 }
 
+func ctrlMemoryMulticastFabricAttachGPU(fi *frontendIoctlState, ioctlParams *nvgpu.NVOS54Parameters) (uintptr, error) {
+	var ctrlParams nvgpu.NV00FD_CTRL_ATTACH_GPU_PARAMS
+	if ctrlParams.SizeBytes() != int(ioctlParams.ParamsSize) {
+		return 0, linuxerr.EINVAL
+	}
+	if _, err := ctrlParams.CopyIn(fi.t, addrFromP64(ioctlParams.Params)); err != nil {
+		return 0, err
+	}
+
+	return ctrlMemoryMulticastFabricAttachGPUInvoke(fi, ioctlParams, &ctrlParams)
+}
+
 func ctrlClientSystemGetBuildVersion(fi *frontendIoctlState, ioctlParams *nvgpu.NVOS54Parameters) (uintptr, error) {
 	var ctrlParams nvgpu.NV0000_CTRL_SYSTEM_GET_BUILD_VERSION_PARAMS
 	if ctrlParams.SizeBytes() != int(ioctlParams.ParamsSize) {
diff --git a/pkg/sentry/devices/nvproxy/frontend_unsafe.go b/pkg/sentry/devices/nvproxy/frontend_unsafe.go
@@ -47,6 +47,23 @@ func rmControlInvoke[Params any](fi *frontendIoctlState, ioctlParams *nvgpu.NVOS
 	return n, nil
 }
 
+func ctrlMemoryMulticastFabricAttachGPUInvoke(fi *frontendIoctlState, ioctlParams *nvgpu.NVOS54Parameters, ctrlParams *nvgpu.NV00FD_CTRL_ATTACH_GPU_PARAMS) (uintptr, error) {
+	origDevDescriptor := ctrlParams.DevDescriptor
+	devDescriptor, _ := fi.t.FDTable().Get(int32(origDevDescriptor))
+	if devDescriptor == nil {
+		return 0, linuxerr.EINVAL
+	}
+	defer devDescriptor.DecRef(fi.ctx)
+	devDesc, ok := devDescriptor.Impl().(*frontendFD)
+	if !ok {
+		return 0, linuxerr.EINVAL
+	}
+	ctrlParams.DevDescriptor = uint64(devDesc.hostFD)
+	n, err := rmControlInvoke(fi, ioctlParams, ctrlParams)
+	ctrlParams.DevDescriptor = origDevDescriptor
+	return n, err
+}
+
 func ctrlClientSystemGetBuildVersionInvoke(fi *frontendIoctlState, ioctlParams *nvgpu.NVOS54Parameters, ctrlParams *nvgpu.NV0000_CTRL_SYSTEM_GET_BUILD_VERSION_PARAMS, driverVersionBuf, versionBuf, titleBuf *byte) (uintptr, error) {
 	// *Buf arguments don't need runtime.KeepAlive() since our caller
 	// ctrlClientSystemGetBuildVersion() copies them out, keeping them alive
diff --git a/pkg/sentry/devices/nvproxy/version.go b/pkg/sentry/devices/nvproxy/version.go
@@ -225,6 +225,9 @@ func Init() {
 					0x80028b: rmControlSimple, // unknown, paramsSize == 1
 					nvgpu.NV0080_CTRL_CMD_GPU_GET_CLASSLIST_V2:                             rmControlSimple,
 					nvgpu.NV0080_CTRL_CMD_HOST_GET_CAPS_V2:                                 rmControlSimple,
+					nvgpu.NV00FD_CTRL_CMD_GET_INFO:                                         rmControlSimple,
+					nvgpu.NV00FD_CTRL_CMD_ATTACH_MEM:                                       rmControlSimple,
+					nvgpu.NV00FD_CTRL_CMD_DETACH_MEM:                                       rmControlSimple,
 					nvgpu.NV2080_CTRL_CMD_BUS_GET_PCI_INFO:                                 rmControlSimple,
 					nvgpu.NV2080_CTRL_CMD_BUS_GET_PCI_BAR_INFO:                             rmControlSimple,
 					nvgpu.NV2080_CTRL_CMD_BUS_GET_INFO_V2:                                  rmControlSimple,
@@ -290,46 +293,48 @@ func Init() {
 					nvgpu.NV0000_CTRL_CMD_OS_UNIX_GET_EXPORT_OBJECT_INFO:                   ctrlHasFrontendFD[nvgpu.NV0000_CTRL_OS_UNIX_GET_EXPORT_OBJECT_INFO_PARAMS],
 					nvgpu.NV0041_CTRL_CMD_GET_SURFACE_INFO:                                 ctrlClientGetSurfaceInfo,
 					nvgpu.NV0080_CTRL_CMD_FIFO_GET_CHANNELLIST:                             ctrlDevFIFOGetChannelList,
+					nvgpu.NV00FD_CTRL_CMD_ATTACH_GPU:                                       ctrlMemoryMulticastFabricAttachGPU,
 					nvgpu.NV0080_CTRL_CMD_GPU_GET_CLASSLIST:                                ctrlDevGpuGetClasslist,
 					nvgpu.NV2080_CTRL_CMD_FIFO_DISABLE_CHANNELS:                            ctrlSubdevFIFODisableChannels,
 					nvgpu.NV2080_CTRL_CMD_GR_GET_INFO:                                      ctrlSubdevGRGetInfo,
 					nvgpu.NV503C_CTRL_CMD_REGISTER_VA_SPACE:                                ctrlRegisterVASpace,
 				},
 				allocationClass: map[nvgpu.ClassID]allocationClassHandler{
-					nvgpu.NV01_ROOT:                 rmAllocRootClient,
-					nvgpu.NV01_ROOT_NON_PRIV:        rmAllocRootClient,
-					nvgpu.NV01_MEMORY_SYSTEM:        rmAllocSimple[nvgpu.NV_MEMORY_ALLOCATION_PARAMS],
-					nvgpu.NV01_MEMORY_LOCAL_USER:    rmAllocSimple[nvgpu.NV_MEMORY_ALLOCATION_PARAMS],
-					nvgpu.NV01_ROOT_CLIENT:          rmAllocRootClient,
-					nvgpu.NV01_EVENT_OS_EVENT:       rmAllocEventOSEvent,
-					nvgpu.NV2081_BINAPI:             rmAllocSimple[nvgpu.NV2081_ALLOC_PARAMETERS],
-					nvgpu.NV01_DEVICE_0:             rmAllocSimple[nvgpu.NV0080_ALLOC_PARAMETERS],
-					nvgpu.NV_MEMORY_FABRIC:          rmAllocSimple[nvgpu.NV00F8_ALLOCATION_PARAMETERS],
-					nvgpu.NV20_SUBDEVICE_0:          rmAllocSimple[nvgpu.NV2080_ALLOC_PARAMETERS],
-					nvgpu.NV50_MEMORY_VIRTUAL:       rmAllocSimple[nvgpu.NV_MEMORY_ALLOCATION_PARAMS],
-					nvgpu.NV50_P2P:                  rmAllocSimple[nvgpu.NV503B_ALLOC_PARAMETERS],
-					nvgpu.NV50_THIRD_PARTY_P2P:      rmAllocSimple[nvgpu.NV503C_ALLOC_PARAMETERS],
-					nvgpu.GT200_DEBUGGER:            rmAllocSMDebuggerSession,
-					nvgpu.FERMI_CONTEXT_SHARE_A:     rmAllocContextShare,
-					nvgpu.FERMI_VASPACE_A:           rmAllocSimple[nvgpu.NV_VASPACE_ALLOCATION_PARAMETERS],
-					nvgpu.KEPLER_CHANNEL_GROUP_A:    rmAllocChannelGroup,
-					nvgpu.TURING_CHANNEL_GPFIFO_A:   rmAllocChannel,
-					nvgpu.AMPERE_CHANNEL_GPFIFO_A:   rmAllocChannel,
-					nvgpu.HOPPER_CHANNEL_GPFIFO_A:   rmAllocChannel,
-					nvgpu.TURING_DMA_COPY_A:         rmAllocSimple[nvgpu.NVB0B5_ALLOCATION_PARAMETERS],
-					nvgpu.AMPERE_DMA_COPY_A:         rmAllocSimple[nvgpu.NVB0B5_ALLOCATION_PARAMETERS],
-					nvgpu.AMPERE_DMA_COPY_B:         rmAllocSimple[nvgpu.NVB0B5_ALLOCATION_PARAMETERS],
-					nvgpu.HOPPER_DMA_COPY_A:         rmAllocSimple[nvgpu.NVB0B5_ALLOCATION_PARAMETERS],
-					nvgpu.TURING_COMPUTE_A:          rmAllocSimple[nvgpu.NV_GR_ALLOCATION_PARAMETERS],
-					nvgpu.AMPERE_COMPUTE_A:          rmAllocSimple[nvgpu.NV_GR_ALLOCATION_PARAMETERS],
-					nvgpu.AMPERE_COMPUTE_B:          rmAllocSimple[nvgpu.NV_GR_ALLOCATION_PARAMETERS],
-					nvgpu.ADA_COMPUTE_A:             rmAllocSimple[nvgpu.NV_GR_ALLOCATION_PARAMETERS],
-					nvgpu.NV_CONFIDENTIAL_COMPUTE:   rmAllocSimple[nvgpu.NV_CONFIDENTIAL_COMPUTE_ALLOC_PARAMS],
-					nvgpu.HOPPER_COMPUTE_A:          rmAllocSimple[nvgpu.NV_GR_ALLOCATION_PARAMETERS],
-					nvgpu.HOPPER_USERMODE_A:         rmAllocSimple[nvgpu.NV_HOPPER_USERMODE_A_PARAMS],
-					nvgpu.GF100_SUBDEVICE_MASTER:    rmAllocNoParams,
-					nvgpu.TURING_USERMODE_A:         rmAllocNoParams,
-					nvgpu.HOPPER_SEC2_WORK_LAUNCH_A: rmAllocNoParams,
+					nvgpu.NV01_ROOT:                  rmAllocRootClient,
+					nvgpu.NV01_ROOT_NON_PRIV:         rmAllocRootClient,
+					nvgpu.NV01_MEMORY_SYSTEM:         rmAllocSimple[nvgpu.NV_MEMORY_ALLOCATION_PARAMS],
+					nvgpu.NV01_MEMORY_LOCAL_USER:     rmAllocSimple[nvgpu.NV_MEMORY_ALLOCATION_PARAMS],
+					nvgpu.NV01_ROOT_CLIENT:           rmAllocRootClient,
+					nvgpu.NV01_EVENT_OS_EVENT:        rmAllocEventOSEvent,
+					nvgpu.NV2081_BINAPI:              rmAllocSimple[nvgpu.NV2081_ALLOC_PARAMETERS],
+					nvgpu.NV01_DEVICE_0:              rmAllocSimple[nvgpu.NV0080_ALLOC_PARAMETERS],
+					nvgpu.NV_MEMORY_FABRIC:           rmAllocSimple[nvgpu.NV00F8_ALLOCATION_PARAMETERS],
+					nvgpu.NV_MEMORY_MULTICAST_FABRIC: rmAllocSimple[nvgpu.NV00FD_ALLOCATION_PARAMETERS],
+					nvgpu.NV20_SUBDEVICE_0:           rmAllocSimple[nvgpu.NV2080_ALLOC_PARAMETERS],
+					nvgpu.NV50_MEMORY_VIRTUAL:        rmAllocSimple[nvgpu.NV_MEMORY_ALLOCATION_PARAMS],
+					nvgpu.NV50_P2P:                   rmAllocSimple[nvgpu.NV503B_ALLOC_PARAMETERS],
+					nvgpu.NV50_THIRD_PARTY_P2P:       rmAllocSimple[nvgpu.NV503C_ALLOC_PARAMETERS],
+					nvgpu.GT200_DEBUGGER:             rmAllocSMDebuggerSession,
+					nvgpu.FERMI_CONTEXT_SHARE_A:      rmAllocContextShare,
+					nvgpu.FERMI_VASPACE_A:            rmAllocSimple[nvgpu.NV_VASPACE_ALLOCATION_PARAMETERS],
+					nvgpu.KEPLER_CHANNEL_GROUP_A:     rmAllocChannelGroup,
+					nvgpu.TURING_CHANNEL_GPFIFO_A:    rmAllocChannel,
+					nvgpu.AMPERE_CHANNEL_GPFIFO_A:    rmAllocChannel,
+					nvgpu.HOPPER_CHANNEL_GPFIFO_A:    rmAllocChannel,
+					nvgpu.TURING_DMA_COPY_A:          rmAllocSimple[nvgpu.NVB0B5_ALLOCATION_PARAMETERS],
+					nvgpu.AMPERE_DMA_COPY_A:          rmAllocSimple[nvgpu.NVB0B5_ALLOCATION_PARAMETERS],
+					nvgpu.AMPERE_DMA_COPY_B:          rmAllocSimple[nvgpu.NVB0B5_ALLOCATION_PARAMETERS],
+					nvgpu.HOPPER_DMA_COPY_A:          rmAllocSimple[nvgpu.NVB0B5_ALLOCATION_PARAMETERS],
+					nvgpu.TURING_COMPUTE_A:           rmAllocSimple[nvgpu.NV_GR_ALLOCATION_PARAMETERS],
+					nvgpu.AMPERE_COMPUTE_A:           rmAllocSimple[nvgpu.NV_GR_ALLOCATION_PARAMETERS],
+					nvgpu.AMPERE_COMPUTE_B:           rmAllocSimple[nvgpu.NV_GR_ALLOCATION_PARAMETERS],
+					nvgpu.ADA_COMPUTE_A:              rmAllocSimple[nvgpu.NV_GR_ALLOCATION_PARAMETERS],
+					nvgpu.NV_CONFIDENTIAL_COMPUTE:    rmAllocSimple[nvgpu.NV_CONFIDENTIAL_COMPUTE_ALLOC_PARAMS],
+					nvgpu.HOPPER_COMPUTE_A:           rmAllocSimple[nvgpu.NV_GR_ALLOCATION_PARAMETERS],
+					nvgpu.HOPPER_USERMODE_A:          rmAllocSimple[nvgpu.NV_HOPPER_USERMODE_A_PARAMS],
+					nvgpu.GF100_SUBDEVICE_MASTER:     rmAllocNoParams,
+					nvgpu.TURING_USERMODE_A:          rmAllocNoParams,
+					nvgpu.HOPPER_SEC2_WORK_LAUNCH_A:  rmAllocNoParams,
 				},
 			}
 		}
@@ -351,6 +356,7 @@ func Init() {
 		v545_23_06 := func() *driverABI {
 			abi := v535_113_01()
 			abi.controlCmd[nvgpu.NV0000_CTRL_CMD_OS_UNIX_GET_EXPORT_OBJECT_INFO] = ctrlHasFrontendFD[nvgpu.NV0000_CTRL_OS_UNIX_GET_EXPORT_OBJECT_INFO_PARAMS_V545]
+			abi.allocationClass[nvgpu.NV_MEMORY_MULTICAST_FABRIC] = rmAllocSimple[nvgpu.NV00FD_ALLOCATION_PARAMETERS_V545]
 			abi.allocationClass[nvgpu.NV01_MEMORY_SYSTEM] = rmAllocSimple[nvgpu.NV_MEMORY_ALLOCATION_PARAMS_V545]
 			abi.allocationClass[nvgpu.NV01_MEMORY_LOCAL_USER] = rmAllocSimple[nvgpu.NV_MEMORY_ALLOCATION_PARAMS_V545]
 			abi.allocationClass[nvgpu.NV50_MEMORY_VIRTUAL] = rmAllocSimple[nvgpu.NV_MEMORY_ALLOCATION_PARAMS_V545]