Lightning-AI · carmocca · Nov 22, 2021 · Nov 19, 2021 · Nov 19, 2021 · Nov 19, 2021
@@ -150,10 +150,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 -
 
 
--
+- Fixed `Trainer(move_metrics_to_cpu=True)` not moving the evaluation logged results to CPU ([#10631](https://github.com/PyTorchLightning/pytorch-lightning/pull/10631))
 
 
--
+- Fixed the `{validation,test}_step` outputs getting moved to CPU with `Trainer(move_metrics_to_cpu=True)` ([#10631](https://github.com/PyTorchLightning/pytorch-lightning/pull/10631))
 
 
 

@@ -24,7 +24,6 @@
 from pytorch_lightning.trainer.progress import BatchProgress
 from pytorch_lightning.utilities.auto_restart import MergedIteratorState, reload_dataloader_state_dict
 from pytorch_lightning.utilities.fetching import AbstractDataFetcher, DataFetcher
-from pytorch_lightning.utilities.memory import recursive_detach
 from pytorch_lightning.utilities.model_helpers import is_overridden
 from pytorch_lightning.utilities.types import EPOCH_OUTPUT, STEP_OUTPUT
 
@@ -134,10 +133,13 @@ def advance(
         self.trainer.logger_connector.update_eval_step_metrics()
 
         # track epoch level outputs
-        if self._should_track_batch_outputs_for_epoch_end():
-            output = recursive_detach(output, to_cpu=self.trainer.move_metrics_to_cpu)
-            if output is not None:
-                self.outputs.append(output)
+        if self._should_track_batch_outputs_for_epoch_end() and output is not None:
+            self.outputs.append(output)
+
+        if self.trainer.move_metrics_to_cpu:
+            # the evaluation step output is not moved as they are not considered "metrics"
+            assert self.trainer._results is not None
+            self.trainer._results.cpu()
 
         if not self.batch_progress.is_last_batch:
             # if fault tolerant is enabled and process has been notified, exit.

@@ -699,3 +699,27 @@ def test_filter_metrics_for_dataloader(kwargs, expected):
     """Logged metrics should only include metrics from the concerned dataloader."""
     actual = LoggerConnector._filter_metrics_for_dataloader(**kwargs)
     assert actual == expected
+
+
+def test_evaluation_move_metrics_to_cpu_and_outputs(tmpdir):
+    class TestModel(BoringModel):
+        def validation_step(self, *args):
+            x = torch.tensor(2.0, requires_grad=True, device=self.device)
+            y = x * 2
+            assert x.requires_grad is True
+            assert y.grad_fn is None  # disabled by validation
+
+            self.log("foo", y)
+            return y
+
+        def validation_epoch_end(self, outputs):
+            # the step outputs were not moved
+            assert all(o.device == self.device for o in outputs), outputs
+            # but the logging results were
+            assert self.trainer.callback_metrics["foo"].device.type == "cpu"
+
+    model = TestModel()
+    trainer = Trainer(
+        default_root_dir=tmpdir, limit_val_batches=2, move_metrics_to_cpu=True, accelerator="auto", devices=1
+    )
+    trainer.validate(model, verbose=False)
-Original file line number
+Diff line change
@@ Expand Up @@
     -
-    -
+    - Fixed `Trainer(move_metrics_to_cpu=True)` not moving the evaluation logged results to CPU ([#10631](https://github.com/PyTorchLightning/pytorch-lightning/pull/10631))
-    -
+    - Fixed the `{validation,test}_step` outputs getting moved to CPU with `Trainer(move_metrics_to_cpu=True)` ([#10631](https://github.com/PyTorchLightning/pytorch-lightning/pull/10631))
@@ Expand Down @@