@@ -64,7 +64,7 @@ def rmsnorm(
64
64
out: Optional[torch.Tensor]
65
65
The output tensor, if specified, the kernel will update this tensor inplace.
66
66
enable_pdl: bool
67
- Whether to enable `programmatic dependency loading
67
+ Whether to enable `programmatic dependent launch
68
68
<https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#programmatic-dependent-launch-and-synchronization>`_
69
69
70
70
Returns
@@ -130,7 +130,7 @@ def fused_add_rmsnorm(
130
130
eps: float
131
131
Epsilon for numerical stability.
132
132
enable_pdl: bool
133
- Whether to enable `programmatic dependency loading
133
+ Whether to enable `programmatic dependent launch
134
134
<https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#programmatic-dependent-launch-and-synchronization>`_
135
135
"""
136
136
with input .device as device : # device guard
@@ -172,7 +172,7 @@ def gemma_rmsnorm(
172
172
out: Optional[torch.Tensor]
173
173
The output tensor, if specified, the kernel will update this tensor inplace.
174
174
enable_pdl: bool
175
- Whether to enable `programmatic dependency loading
175
+ Whether to enable `programmatic dependent launch
176
176
<https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#programmatic-dependent-launch-and-synchronization>`_
177
177
178
178
Returns
@@ -240,7 +240,7 @@ def gemma_fused_add_rmsnorm(
240
240
eps: float
241
241
Epsilon for numerical stability.
242
242
enable_pdl: bool
243
- Whether to enable `programmatic dependency loading
243
+ Whether to enable `programmatic dependent launch
244
244
<https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#programmatic-dependent-launch-and-synchronization>`_
245
245
"""
246
246
with input .device as device :
0 commit comments