merge with utilities

kylesayrs · kylesayrs · commit e884298d1223 · 2025-05-30T14:32:41.000-04:00
Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/src/compressed_tensors/utils/offload.py b/src/compressed_tensors/utils/offload.py
@@ -38,6 +38,7 @@
     from accelerate.hooks import (
         AlignDevicesHook,
         add_hook_to_module,
+        attach_align_device_hook,
         named_module_tensors,
         remove_hook_from_module,
     )
@@ -58,6 +59,7 @@
     set_module_tensor_to_device = None
     named_module_tensors = None
     dispatch_model = None
+    attach_align_device_hook = None
 
 
 __all__ = [
@@ -458,21 +460,42 @@ def register_offload_module(base: torch.nn.Module, name: str, module: torch.nn.M
 
 
 @check_accelerate(fallback="error")
-def force_cpu_offload(module: torch.nn.Module, execution_device: torch.device):
+def force_cpu_offload(
+    module: torch.nn.Module, execution_device: torch.device
+) -> torch.nn.Module:
+    """
+    Force cpu offloading a module, primarily used for testing
+
+    :param module: module containing parameters to offload
+    :param execution_device: execution device submodules
+    :return: module with hooks to perform cpu offloading
+    """
+    # edge case: there is a bug in `dispatch_model` which causes
+    # the function to only work if the model contains submodules
+    if next(module.children(), None) is None:
+        attach_align_device_hook(
+            module,
+            execution_device=execution_device,
+            offload=True,
+            weights_map=module.state_dict(),
+            tied_params_map={},
+        )
+        return module
+
     device_map = {}
 
-    def dfs(name: List[str], module: torch.nn.Module):
+    def collect_device_map(name: List[str], module: torch.nn.Module):
         if next(module.parameters(recurse=False), None) is not None:
             device_map[".".join(name)] = "cpu"
             return
 
         else:
             for submodule_name, submodule in module.named_children():
                 name.append(submodule_name)
-                dfs(name, submodule)
+                collect_device_map(name, submodule)
                 name.pop()
 
-    dfs([], module)
+    collect_device_map([], module)
 
     return dispatch_model(
         module, device_map, main_device=execution_device, force_hooks=True
diff --git a/tests/test_utils/test_offload.py b/tests/test_utils/test_offload.py
@@ -18,8 +18,10 @@
     align_modules,
     delete_offload_parameter,
     disable_hf_hook,
+    force_cpu_offload,
     get_execution_device,
     has_offloaded_params,
+    register_offload_module,
     register_offload_parameter,
     update_offload_parameter,
 )
@@ -37,9 +39,17 @@ def forward(self, x):
         return x * self.a + self.b
 
 
+class ExampleModel(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.linear = torch.nn.Linear(1, 2)
+
+    def forward(self, x):
+        return self.linear(x)
+
+
 @requires_accelerate()
 def test_has_offloaded_params():
-    from accelerate.big_modeling import cpu_offload_with_hook
     from accelerate.hooks import attach_align_device_hook, remove_hook_from_module
 
     module = ExampleModule()
@@ -48,10 +58,6 @@ def test_has_offloaded_params():
     attach_align_device_hook(module, offload=False)
     assert not has_offloaded_params(module)
 
-    remove_hook_from_module(module)
-    module, _ = cpu_offload_with_hook(module)
-    assert not has_offloaded_params(module)
-
     remove_hook_from_module(module)
     attach_align_device_hook(module, offload=True, weights_map=module.state_dict())
     assert has_offloaded_params(module)
@@ -334,3 +340,62 @@ def test_offload_to_weights_map():
     weights_map = PrefixedDataset(OffloadedWeightsLoader({name: old_value}), prefix)
     offload_to_weights_map(weights_map, name, new_value)
     assert weights_map[name] == new_value
+
+
+@requires_gpu
+@requires_accelerate()
+def test_register_offload_module():
+    execution_device = torch.device("cuda")
+
+    # no offloading
+    model = ExampleModel()
+    child = torch.nn.Linear(2, 3)
+    register_offload_module(model, "child", child)
+    register_offload_module(model.linear, "child", child)
+    assert child in model.children()
+    assert child in model.linear.children()
+
+    # with offloading
+    model = ExampleModel()
+    child = torch.nn.Linear(2, 3)
+    force_cpu_offload(model, execution_device)
+    register_offload_module(model, "child", child)
+    register_offload_module(model.linear, "child", child)
+    assert child in model.children()
+    assert child in model.linear.children()
+
+    # can run modules
+    model(torch.empty(1))
+    child(torch.empty(2, device=execution_device))
+
+
+@requires_gpu
+@requires_accelerate()
+def test_force_cpu_offload():
+    execution_device = torch.device("cuda")
+
+    # single module
+    module = torch.nn.Linear(1, 2)
+    module = force_cpu_offload(module, execution_device)
+    assert has_offloaded_params(module)
+    assert module._hf_hook.offload
+    assert module.weight.device == torch.device("meta")
+    assert "weight" in module._hf_hook.weights_map
+    assert module._hf_hook.tied_params_map is not None
+
+    # can run
+    module(torch.empty(1, device=execution_device))
+
+    # model
+    model = ExampleModel()
+    model = force_cpu_offload(model, execution_device)
+    assert not has_offloaded_params(model)
+
+    assert has_offloaded_params(model.linear)
+    assert model.linear._hf_hook.offload
+    assert model.linear.weight.device == torch.device("meta")
+    assert "weight" in model.linear._hf_hook.weights_map
+    assert model.linear._hf_hook.tied_params_map is not None
+
+    # can run
+    model(torch.empty(1, device=execution_device))