Integration tests for Precision in Lite (#14815)

awaelchli · carmocca · Borda · web-flow · commit d7404c775a8a · 2022-09-26T18:50:11.000Z
Co-authored-by: Carlos Mocholí &lt;carlossmocholi@gmail.com&gt;
Co-authored-by: Jirka Borovec &lt;Borda@users.noreply.github.com&gt;
Co-authored-by: Justus Schock &lt;12886177+justusschock@users.noreply.github.com&gt;
diff --git a/src/lightning_lite/plugins/precision/deepspeed.py b/src/lightning_lite/plugins/precision/deepspeed.py
@@ -30,7 +30,7 @@ class DeepSpeedPrecision(Precision):
     """Precision plugin for DeepSpeed integration.
 
     Args:
-        precision: Double precision (64), full precision (32), half precision (16) or bfloat16 precision (bf16).
+        precision: Full precision (32), half precision (16) or bfloat16 precision (bf16).
         amp_type: The mixed precision backend to use ("native" or "apex").
         amp_level: The optimization level to use (O1, O2, etc...). By default it will be set to "O2"
             if ``amp_type`` is set to "apex".
diff --git a/tests/tests_lite/helpers/models.py b/tests/tests_lite/helpers/models.py
@@ -0,0 +1,68 @@
+from typing import Any, Iterator
+
+import torch
+import torch.nn as nn
+from torch import Tensor
+from torch.nn import Module
+from torch.optim import Optimizer
+from torch.utils.data import DataLoader, Dataset, IterableDataset
+
+from lightning_lite import LightningLite
+
+
+class RandomDataset(Dataset):
+    def __init__(self, size: int, length: int) -> None:
+        self.len = length
+        self.data = torch.randn(length, size)
+
+    def __getitem__(self, index: int) -> Tensor:
+        return self.data[index]
+
+    def __len__(self) -> int:
+        return self.len
+
+
+class RandomIterableDataset(IterableDataset):
+    def __init__(self, size: int, count: int) -> None:
+        self.count = count
+        self.size = size
+
+    def __iter__(self) -> Iterator[Tensor]:
+        for _ in range(self.count):
+            yield torch.randn(self.size)
+
+
+class BoringLite(LightningLite):
+    def get_model(self) -> Module:
+        return nn.Linear(32, 2)
+
+    def get_dataloader(self) -> DataLoader:
+        return DataLoader(RandomDataset(32, 64))
+
+    def step(self, model: Module, batch: Any) -> Tensor:
+        output = model(batch)
+        loss = torch.nn.functional.mse_loss(output, torch.ones_like(output))
+        return loss
+
+    def after_backward(self, model: Module) -> None:
+        pass
+
+    def after_optimizer_step(self, model: Module, optimizer: Optimizer) -> None:
+        pass
+
+    def run(self) -> None:
+        model = self.get_model()
+        optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
+        dataloader = self.get_dataloader()
+
+        model, optimizer = self.setup(model, optimizer)
+        dataloader = self.setup_dataloaders(dataloader)
+
+        data_iter = iter(dataloader)
+        batch = next(data_iter)
+        loss = self.step(model, batch)
+        self.backward(loss)
+        self.after_backward(model)
+        optimizer.step()
+        self.after_optimizer_step(model, optimizer)
+        optimizer.zero_grad()
diff --git a/tests/tests_lite/plugins/precision/test_double_integration.py b/tests/tests_lite/plugins/precision/test_double_integration.py
@@ -0,0 +1,55 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Integration tests for double-precision training."""
+
+import torch
+import torch.nn as nn
+from tests_lite.helpers.models import BoringLite
+
+
+class BoringDoubleModule(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.layer = torch.nn.Linear(32, 2)
+        self.register_buffer("complex_buffer", torch.complex(torch.rand(10), torch.rand(10)), False)
+
+    def forward(self, x):
+        assert x.dtype == torch.float64
+        # the default dtype for new tensors is now float64
+        assert torch.tensor([0.0]).dtype == torch.float64
+        return self.layer(x)
+
+
+class DoublePrecisionBoringLite(BoringLite):
+    def get_model(self):
+        return BoringDoubleModule()
+
+    def step(self, model, batch):
+        model.double()  # TODO(lite): this needs to be done automatically in Lite.setup()
+        assert model.layer.weight.dtype == model.layer.bias.dtype == torch.float64
+        assert model.complex_buffer.dtype == torch.complex64
+
+        assert batch.dtype == torch.float32
+        output = model(batch)
+        assert output.dtype == torch.float32
+        loss = torch.nn.functional.mse_loss(output, torch.ones_like(output))
+        return loss
+
+    def after_backward(self, model):
+        assert model.layer.weight.grad.dtype == torch.float64
+
+
+def test_double_precision(tmpdir):
+    lite = DoublePrecisionBoringLite(precision=64)
+    lite.run()
diff --git a/tests/tests_lite/plugins/precision/test_native_amp.py b/tests/tests_lite/plugins/precision/test_native_amp.py
@@ -43,11 +43,25 @@ def test_native_amp_precision_bf16_min_torch():
 
 @RunIf(min_torch="1.10")
 def test_native_amp_precision_forward_context():
-    precision_plugin = NativeMixedPrecision(precision="mixed", device="cuda")
+    """Test to ensure that the context manager correctly is set to CPU + bfloat16."""
+    precision_plugin = NativeMixedPrecision(precision=16, device="cuda")
+    assert precision_plugin.device == "cuda"
+    assert isinstance(precision_plugin.scaler, torch.cuda.amp.GradScaler)
     assert torch.get_default_dtype() == torch.float32
     with precision_plugin.forward_context():
         assert torch.get_autocast_gpu_dtype() == torch.float16
 
+    precision_plugin = NativeMixedPrecision(precision="bf16", device="cpu")
+    assert precision_plugin.device == "cpu"
+    assert precision_plugin.scaler is None
+    with precision_plugin.forward_context():
+        assert torch.get_autocast_cpu_dtype() == torch.bfloat16
+
+    context_manager = precision_plugin._autocast_context_manager()
+    assert isinstance(context_manager, torch.autocast)
+    # check with str due to a bug upstream: https://github.com/pytorch/pytorch/issues/65786
+    assert str(context_manager.fast_dtype) == str(torch.bfloat16)
+
 
 def test_native_amp_precision_backward():
     precision_plugin = NativeMixedPrecision(precision="mixed", device="cuda")
diff --git a/tests/tests_lite/plugins/precision/test_native_amp_integration.py b/tests/tests_lite/plugins/precision/test_native_amp_integration.py
@@ -0,0 +1,72 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Integration tests for native automatic mixed precision (AMP) training."""
+import pytest
+import torch
+import torch.nn as nn
+from tests_lite.helpers.models import BoringLite
+from tests_lite.helpers.runif import RunIf
+
+
+class NativeMixedPrecisionModule(nn.Module):
+    def __init__(self, expected_dtype):
+        super().__init__()
+        self.expected_dtype = expected_dtype
+        self.layer = torch.nn.Linear(32, 2)
+
+    def forward(self, x):
+        assert x.dtype == self.expected_dtype
+        if x.device.type == "cpu":
+            assert torch.is_autocast_cpu_enabled()
+        else:
+            assert torch.is_autocast_enabled()
+        output = self.layer(x)
+        assert output.dtype == self.expected_dtype
+        return output
+
+
+class NativeMixedPrecisionBoringLite(BoringLite):
+
+    expected_dtype: torch.dtype
+
+    def get_model(self):
+        return NativeMixedPrecisionModule(self.expected_dtype)
+
+    def step(self, model, batch):
+        assert model.layer.weight.dtype == torch.float32
+
+        assert batch.dtype == torch.float32
+        output = model(batch)
+        assert output.dtype == torch.float32
+        loss = torch.nn.functional.mse_loss(output, torch.ones_like(output))
+        return loss
+
+    def after_backward(self, model):
+        assert model.layer.weight.grad.dtype == torch.float32
+
+
+@RunIf(min_torch="1.10")
+@pytest.mark.parametrize(
+    "accelerator, precision, expected_dtype",
+    [
+        ("cpu", 16, torch.bfloat16),
+        ("cpu", "bf16", torch.bfloat16),
+        pytest.param("cuda", 16, torch.float16, marks=RunIf(min_cuda_gpus=1)),
+        pytest.param("cuda", "bf16", torch.bfloat16, marks=RunIf(min_cuda_gpus=1, bf16_cuda=True)),
+    ],
+)
+def test_native_mixed_precision(accelerator, precision, expected_dtype):
+    lite = NativeMixedPrecisionBoringLite(accelerator=accelerator, precision=16)
+    lite.expected_dtype = expected_dtype
+    lite.run()
diff --git a/tests/tests_lite/test_connector.py b/tests/tests_lite/test_connector.py
@@ -27,7 +27,7 @@
 from lightning_lite.accelerators.cuda import CUDAAccelerator
 from lightning_lite.accelerators.mps import MPSAccelerator
 from lightning_lite.connector import _Connector
-from lightning_lite.plugins import DoublePrecision, Precision
+from lightning_lite.plugins import DoublePrecision, NativeMixedPrecision, Precision
 from lightning_lite.plugins.environments import (
     KubeflowEnvironment,
     LightningEnvironment,
@@ -692,3 +692,44 @@ def test_gpu_accelerator_no_gpu_backend_found_error(*_):
 def test_ddp_fork_on_unsupported_platform(_, strategy):
     with pytest.raises(ValueError, match="process forking is not supported on this platform"):
         _Connector(strategy=strategy)
+
+
+@mock.patch("lightning_lite.plugins.precision.native_amp._TORCH_GREATER_EQUAL_1_10", True)
+def test_precision_selection_16_on_cpu_warns():
+    with pytest.warns(
+        UserWarning, match=r"precision=16\)` but native AMP is not supported on CPU. Using `precision='bf16"
+    ):
+        _Connector(precision=16)
+
+
+@mock.patch("lightning_lite.plugins.precision.native_amp._TORCH_GREATER_EQUAL_1_10", False)
+def test_precision_selection_16_raises_torch_version(monkeypatch):
+    with pytest.raises(ImportError, match="must install torch greater or equal to 1.10"):
+        _Connector(accelerator="cpu", precision=16)
+    with pytest.raises(ImportError, match="must install torch greater or equal to 1.10"):
+        _Connector(accelerator="cpu", precision="bf16")
+
+
+class MyNativeAMP(NativeMixedPrecision):
+    pass
+
+
+@RunIf(mps=False)
+@pytest.mark.parametrize("strategy,devices", [("ddp", 2), ("ddp_spawn", 2)])
+@pytest.mark.parametrize(
+    "is_custom_plugin,plugin_cls",
+    [(False, NativeMixedPrecision), (True, MyNativeAMP)],
+)
+@mock.patch("lightning_lite.plugins.precision.native_amp._TORCH_GREATER_EQUAL_1_10", True)
+def test_precision_selection_amp_ddp(strategy, devices, is_custom_plugin, plugin_cls):
+    plugin = None
+    if is_custom_plugin:
+        plugin = plugin_cls(16, "cpu")
+
+    trainer = _Connector(
+        precision=16,
+        devices=devices,
+        strategy=strategy,
+        plugins=plugin,
+    )
+    assert isinstance(trainer.precision_plugin, plugin_cls)
diff --git a/tests/tests_lite/test_parity.py b/tests/tests_lite/test_parity.py
@@ -23,6 +23,7 @@
 import torch.multiprocessing as mp
 import torch.nn.functional
 from lightning_utilities.core.apply_func import apply_to_collection
+from tests_lite.helpers.models import RandomDataset
 from tests_lite.helpers.runif import RunIf
 from torch import nn
 from torch.nn.parallel.distributed import DistributedDataParallel
@@ -34,7 +35,6 @@
 from lightning_lite.strategies.ddp_spawn import DDPSpawnStrategy
 from lightning_lite.utilities.apply_func import move_data_to_device
 from lightning_lite.utilities.cloud_io import atomic_save
-from pytorch_lightning.demos.boring_classes import RandomDataset
 
 
 class BoringModel(nn.Module):
diff --git a/tests/tests_lite/utilities/test_data.py b/tests/tests_lite/utilities/test_data.py
@@ -2,6 +2,7 @@
 
 import pytest
 import torch
+from tests_lite.helpers.models import RandomDataset, RandomIterableDataset
 from torch.utils.data import BatchSampler, DataLoader, RandomSampler, SequentialSampler
 
 from lightning_lite.utilities.data import (
@@ -16,9 +17,6 @@
 )
 from lightning_lite.utilities.exceptions import MisconfigurationException
 
-# TODO(lite): provide boring classes in Lite
-from pytorch_lightning.demos.boring_classes import RandomDataset, RandomIterableDataset
-
 
 def test_has_iterable_dataset():
     assert has_iterable_dataset(DataLoader(RandomIterableDataset(1, 1)))