Tensor and CUDA graph fix

orion160 · orion160 · commit 21dd51f8a886 · 2024-06-08T12:59:42.000-05:00
diff --git a/recipes_source/recipes/tuning_guide.py b/recipes_source/recipes/tuning_guide.py
@@ -331,8 +331,14 @@ def gelu(x):
 # it must be explicitly set as it can conflict with some operations which do not
 # benefit from Tensor core computations.
 
+## Tensor computation can be enabled "manually" modifying the matrix multiplication precision
+## The default precision is "highest" which will perform the operation according to the dtype
 
-torch.backends.cuda.matmul.allow_tf32
+# precision "high" and "medium" can be hardware accelerated via tensor cores
+# and will set torch.backends.cuda.matmul.allow_tf32 = True if available
+
+# Carefully consider the tradeoff between speed and precision at the moment of evaluating your models!
+torch.set_float32_matmul_precision("high")
 
 ###############################################################################
 # Use CUDA Graphs
@@ -341,8 +347,13 @@ def gelu(x):
 # on some cases the context switch between CPU and GPU can lead to bad resourse
 # utilization. CUDA graphs are a way to keep computation within the GPU without
 # paying the extra cost of kernel launches and host synchronization.
-#
-# It can be enabled using `torch.compile <https://pytorch.org/docs/stable/generated/torch.compile.html>`_ "reduce-overhead" and "max-autotune" modes.
+
+# It can be enabled using 
+torch.compile(m, "reduce-overhead")
+# or
+torch.compile(m, "max-autotune")
+
+###############################################################################
 # Special care must be present when using cuda graphs as it can lead to increased memory consumption and some models might not compile.
 
 ###############################################################################