cutlass int4 support

jerryzh168 · jerryzh168 · commit 0885a4993186 · 2025-02-27T14:07:58.000-08:00
diff --git a/torchao/dtypes/uintx/cutlass_int4_packed_layout.py b/torchao/dtypes/uintx/cutlass_int4_packed_layout.py
@@ -28,6 +28,17 @@ def _aqt_is_int4(aqt):
     )
 
 
+def _same_metadata(self: "Int4PackedTensorImpl", src: "Int4PackedTensorImpl") -> bool:
+    return (
+        isinstance(self, Int4PackedTensorImpl)
+        and isinstance(src, Int4PackedTensorImpl)
+        and self.shape == src.shape
+        and self.int_data.shape == src.int_data.shape
+        and self.scale.shape == src.scale.shape
+        and type(self._layout) == type(src._layout)
+    )
+
+
 @dataclass(frozen=True)
 class CutlassInt4PackedLayout(Layout):
     """Layout class for int4 packed layout for affine quantized tensor, for cutlass kernel."""
@@ -77,6 +88,18 @@ def __torch_dispatch__(cls, func, types, args, kwargs):
                 func, args, kwargs, args[0]._apply_fn_to_data(torch.detach)
             )
 
+        elif func is aten.copy_.default:
+            self = args[0]
+            src = args[1]
+            if _same_metadata(self, src):
+                self_tensors = self.__tensor_flatten__()[0]
+                for tensor_name in self_tensors:
+                    getattr(self, tensor_name).copy_(getattr(src, tensor_name))
+                return
+            raise ValueError(
+                f"Not supported args for copy_ due to metadata mistach: {args[0], args[1]}"
+            )
+
         raise NotImplementedError(
             f"Int4PackedTensorImpl dispatch: attempting to run {func}, this is not supported"
         )