huggingface · ArthurZucker · Apr 7, 2025 · Apr 4, 2025 · Apr 4, 2025 · Apr 7, 2025
diff --git a/src/transformers/integrations/flex_attention.py b/src/transformers/integrations/flex_attention.py
@@ -11,6 +11,7 @@
   year = {2024}
 }
 """
+
 # coding=utf-8
 # Copyright 2025 The HuggingFace Inc. team.
 #
@@ -31,6 +32,7 @@
 import torch
 
 from ..utils import is_torch_flex_attn_available
+from ..utils.import_utils import _torch_version
 
 
 if is_torch_flex_attn_available():
@@ -63,8 +65,13 @@ def __init__(self):
         """
         Initialize or update the singleton instance.
         """
-        if self._is_flex_compiled is False:
-            self._compiled_flex_attention = torch.compile(flex_attention, dynamic=False)
+        if not self._is_flex_compiled:
+            if _torch_version == "2.6.0":
 def is_torch_greater_or_equal(library_version: str, accept_dev: bool = False): 
 def is_torch_greater_or_equal(library_version: str, accept_dev: bool = False): 
+                self._compiled_flex_attention = torch.compile(
+                    flex_attention, dynamic=False, mode="max-autotune-no-cudagraphs"
+                )
+            else:
+                self._compiled_flex_attention = torch.compile(flex_attention, dynamic=False)
             self._is_flex_compiled = True
 
     def __call__(self):