Lightning-AI · carmocca · Nov 19, 2021 · Oct 22, 2021 · Nov 19, 2021 · Nov 19, 2021
@@ -146,6 +146,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Fixed
 
+- When a tensor is logged with `self.log`, run its computation with the same `dtype` ([#10076](https://github.com/PyTorchLightning/pytorch-lightning/pull/10076))
+
 
 - Fixed signals being registered within threads ([#10610](https://github.com/PyTorchLightning/pytorch-lightning/pull/10610))
 

@@ -156,16 +156,9 @@ def update_eval_step_metrics(self) -> None:
 
     @staticmethod
     def _filter_metrics_for_dataloader(
-        dl_idx: int, metrics: Dict[str, Union[Any, Dict[str, Any]]], metric_prefix: str = "dataloader_idx"
-    ) -> Dict[str, Union[Any, Dict[str, Any]]]:
-        result = {}
-        for k, v in metrics.items():
-            if metric_prefix not in k:
-                result[k] = v
-                continue
-            if k.endswith(f"{metric_prefix}_{dl_idx}"):
-                result[k] = v
-        return result
+        dl_idx: int, metrics: _OUT_DICT, metric_prefix: str = "dataloader_idx"
+    ) -> _OUT_DICT:
+        return {k: v for k, v in metrics.items() if metric_prefix not in k or k.endswith(f"{metric_prefix}_{dl_idx}")}
 
     def _prepare_eval_loop_results(self, metrics: _OUT_DICT) -> None:
         if self.trainer.sanity_checking:

@@ -207,13 +207,22 @@ def __init__(self, metadata: _Metadata, is_tensor: bool) -> None:
         self.meta = metadata
         self.has_reset = False
         if is_tensor:
-            self.add_state("value", torch.tensor(0, dtype=torch.float), dist_reduce_fx=torch.sum)
+            # do not set a dtype in case the default dtype was changed
+            self.add_state("value", torch.tensor(0.0), dist_reduce_fx=torch.sum)
             if self.meta.is_mean_reduction:
-                self.add_state("cumulated_batch_size", torch.tensor(0, dtype=torch.float), dist_reduce_fx=torch.sum)
+                self.add_state("cumulated_batch_size", torch.tensor(0), dist_reduce_fx=torch.sum)
 
     def update(self, value: _IN_METRIC, batch_size: torch.Tensor) -> None:
         if self.is_tensor:
-            value = value.float()
+            if not torch.is_floating_point(value):
+                dtype = torch.get_default_dtype()
+                warning_cache.warn(
+                    # do not include the value to avoid cache misses
+                    f"You called `self.log({self.meta.name!r}, ...)` in your `{self.meta.fx}` but the value needs to"
+                    f" be floating point. Converting it to {dtype}."
+                )
+                value = value.to(dtype)
+
             if self.meta.on_step:
                 self._forward_cache = self.meta.sync(value.clone())  # `clone` because `sync` is in-place
 

@@ -549,12 +549,42 @@ def on_train_epoch_end(self) -> None:
 
 def test_metric_result_computed_check():
     """Unittest ``_get_cache`` with multielement tensors."""
-    sync = _Sync()
     metadata = _Metadata("foo", "bar", on_epoch=True, enable_graph=True)
-    metadata.sync = sync
+    metadata.sync = _Sync()
     rm = ResultMetric(metadata, is_tensor=True)
     computed_value = torch.tensor([1, 2, 3])
     rm._computed = computed_value
     cache = ResultCollection._get_cache(rm, on_step=False)
     # `enable_graph=True` so no detach, identity works
     assert cache is computed_value
+
+
+@pytest.mark.parametrize("floating_dtype", (torch.float, torch.double))
+def test_metric_result_respects_dtype(floating_dtype):
+    torch.set_default_dtype(floating_dtype)
+    fixed_dtype = torch.long  # default by PyTorch
+
+    metadata = _Metadata("foo", "bar")
+    metadata.sync = _Sync()
+    rm = ResultMetric(metadata, is_tensor=True)
+
+    assert rm.value.dtype == floating_dtype
+    assert rm.cumulated_batch_size.dtype == fixed_dtype
+
+    # two fixed point numbers - should be converted
+    value, batch_size = torch.tensor(2), torch.tensor(3)
+    assert value.dtype == fixed_dtype
+    with pytest.warns(
+        UserWarning, match=rf"`self.log\('bar', ...\)` in your `foo` .* Converting it to {floating_dtype}"
+    ):
+        rm.update(value, batch_size)
+    # floating and fixed
+    rm.update(torch.tensor(4.0), torch.tensor(5))
+
+    total = rm.compute()
+
+    assert total == (2 * 3 + 4 * 5) / (5 + 3)
+    assert total.dtype == floating_dtype
+
+    # restore to avoid impacting other tests
+    torch.set_default_dtype(torch.float)
@@ -530,9 +530,9 @@ def _assert_called(model, fn, stage):
 
 
 def test_result_collection_on_tensor_with_mean_reduction():
-    result_collection = ResultCollection(True, torch.device("cpu"))
+    result_collection = ResultCollection(True)
     product = [(True, True), (False, True), (True, False), (False, False)]
-    values = torch.arange(1, 10).float()  # need to convert to float() due to precision issues using torch 1.4
+    values = torch.arange(1, 10)
     batches = values * values
 
     for i, v in enumerate(values):
Original file line number	Diff line number	Diff line change
Expand Up		@@ -146,6 +146,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

		### Fixed

		- When a tensor is logged with `self.log`, run its computation with the same `dtype` ([#10076](https://github.com/PyTorchLightning/pytorch-lightning/pull/10076))


		- Fixed signals being registered within threads ([#10610](https://github.com/PyTorchLightning/pytorch-lightning/pull/10610))

Expand Down