Lightning-AI
diff --git a/‎.github/workflows/ci_test-base.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/ci_test-base.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/ci_test-conda.yml
Lines changed: 0 additions & 1 deletion b/‎.github/workflows/ci_test-conda.yml
Lines changed: 0 additions & 1 deletion
diff --git a/‎.github/workflows/ci_test-full.yml
Lines changed: 0 additions & 2 deletions b/‎.github/workflows/ci_test-full.yml
Lines changed: 0 additions & 2 deletions
diff --git a/‎.github/workflows/probot-auto-cc.yml
Lines changed: 3 additions & 5 deletions b/‎.github/workflows/probot-auto-cc.yml
Lines changed: 3 additions & 5 deletions
diff --git a/‎CHANGELOG.md
Lines changed: 8 additions & 2 deletions b/‎CHANGELOG.md
Lines changed: 8 additions & 2 deletions
diff --git a/‎docs/source/_templates/layout.html
Lines changed: 1 addition & 1 deletion b/‎docs/source/_templates/layout.html
Lines changed: 1 addition & 1 deletion
diff --git a/‎pl_examples/loop_examples/kfold.py
Lines changed: 1 addition & 1 deletion b/‎pl_examples/loop_examples/kfold.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pl_examples/loop_examples/yielding_training_step.py
Lines changed: 1 addition & 1 deletion b/‎pl_examples/loop_examples/yielding_training_step.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pytorch_lightning/__init__.py
Lines changed: 0 additions & 4 deletions b/‎pytorch_lightning/__init__.py
Lines changed: 0 additions & 4 deletions
diff --git a/‎pytorch_lightning/lite/wrappers.py
Lines changed: 2 additions & 1 deletion b/‎pytorch_lightning/lite/wrappers.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎pytorch_lightning/loggers/tensorboard.py
Lines changed: 2 additions & 0 deletions b/‎pytorch_lightning/loggers/tensorboard.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎pytorch_lightning/plugins/training_type/deepspeed.py
Lines changed: 13 additions & 8 deletions b/‎pytorch_lightning/plugins/training_type/deepspeed.py
Lines changed: 13 additions & 8 deletions
diff --git a/‎pytorch_lightning/plugins/training_type/ipu.py
Lines changed: 12 additions & 8 deletions b/‎pytorch_lightning/plugins/training_type/ipu.py
Lines changed: 12 additions & 8 deletions
diff --git a/‎pytorch_lightning/trainer/connectors/data_connector.py
Lines changed: 0 additions & 8 deletions b/‎pytorch_lightning/trainer/connectors/data_connector.py
Lines changed: 0 additions & 8 deletions
diff --git a/‎pytorch_lightning/trainer/trainer.py
Lines changed: 0 additions & 8 deletions b/‎pytorch_lightning/trainer/trainer.py
Lines changed: 0 additions & 8 deletions
@@ -19,7 +19,7 @@ jobs:
         # this will install stable torch
         python-version: [3.9]
 
-    # Timeout: https://stackoverflow.com/a/59076067/4521646
+    # lower timeout as this should run very quickly
     timeout-minutes: 20
     steps:
     - uses: actions/checkout@v2
 
@@ -17,7 +17,6 @@ jobs:
         python-version: ["3.8"]  # previous to last Python version as that one is already used in test-full
         pytorch-version: ["1.7", "1.8", "1.9", "1.10"]  # nightly: add when there's a release candidate
 
-    # Timeout: https://stackoverflow.com/a/59076067/4521646
     timeout-minutes: 35
     steps:
     - uses: actions/checkout@v2
 
@@ -29,8 +29,6 @@ jobs:
           # nightly: add when there's a release candidate
           #- {os: ubuntu-20.04, python-version: "3.10", requires: "latest", release: "pre"}
 
-    # Timeout: https://stackoverflow.com/a/59076067/4521646
-    # TODO: the macOS is taking too long, probably caching did not work...
     timeout-minutes: 40
 
     steps:
 
@@ -2,16 +2,14 @@ name: Probot
 
 on:
   issues:
-    types:
-      - labeled
+    types: [labeled]
   pull_request:
-    types:
-      - labeled
+    types: [labeled, ready_for_review]
 
 jobs:
   auto-cc:
-    if: ${{ github.repository_owner == 'PyTorchLightning' }}
     runs-on: ubuntu-latest
+    if: github.event_name == 'issue' || github.event.pull_request.draft == false
     steps:
       - uses: carmocca/probot@v1
         env:
 
@@ -31,6 +31,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - The `monitor` argument in the `EarlyStopping` callback is no longer optional ([#10328](https://github.com/PyTorchLightning/pytorch-lightning/pull/10328))
 
 
+- Do not fail if batch size could not be inferred for logging when using DeepSpeed ([#10438](https://github.com/PyTorchLightning/pytorch-lightning/issues/10438))
+
+
 - Raise `MisconfigurationException` when `enable_progress_bar=False` and a progress bar instance has been passed in the callback list ([#10520](https://github.com/PyTorchLightning/pytorch-lightning/issues/10520))
 
 
@@ -133,6 +136,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Removed deprecated `CheckpointConnector.hpc_load` property in favor of `CheckpointConnector.restore` ([#10525](https://github.com/PyTorchLightning/pytorch-lightning/pull/10525))
 
 
+- Removed deprecated `reload_dataloaders_every_epoch` from `Trainer` in favour of `reload_dataloaders_every_n_epochs` ([#10481](https://github.com/PyTorchLightning/pytorch-lightning/pull/10481))
+
+
 
 ### Fixed
 
@@ -142,7 +148,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed `CombinedLoader` and `max_size_cycle` didn't receive a `DistributedSampler` ([#10374](https://github.com/PyTorchLightning/pytorch-lightning/issues/10374))
 
 
-- Fixed `to_torchscript()` causing false positive deprecation warnings ([#10470](https://github.com/PyTorchLightning/pytorch-lightning/issues/10470))
+- Fixed scripting causing false positive deprecation warnings ([#10470](https://github.com/PyTorchLightning/pytorch-lightning/pull/10470), [#10555](https://github.com/PyTorchLightning/pytorch-lightning/pull/10555))
 
 
 - Fixed `isinstance` not working with `init_meta_context`, materialized model not being moved to the device ([#10493](https://github.com/PyTorchLightning/metrics/pull/10493))
@@ -157,7 +163,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed sampler replacement logic with `overfit_batches` to only replace the sample when `SequentialSampler` is not used ([#10486](https://github.com/PyTorchLightning/pytorch-lightning/issues/10486))
 
 
--
+- Fixed propagation of device and dtype information to submodules of LightningLite when they inherit from `DeviceDtypeModuleMixin` ([#10559](https://github.com/PyTorchLightning/pytorch-lightning/issues/10559))
 
 
 -
 
@@ -4,7 +4,7 @@
 {% block footer %}
 {{ super() }}
 <script script type="text/javascript">
-  var collapsedSections = ['Best practices', 'Lightning API', 'Optional extensions', 'Tutorials', 'API References', 'Bolts', 'Examples', 'Partner Domain Frameworks', 'Community'];
+  var collapsedSections = ['Best practices', 'Optional extensions', 'Tutorials', 'API References', 'Bolts', 'Examples', 'Partner Domain Frameworks', 'Community'];
 </script>
 
 {% endblock %}
@@ -205,7 +205,7 @@ def on_run_end(self) -> None:
         voting_model = EnsembleVotingModel(type(self.trainer.lightning_module), checkpoint_paths)
         voting_model.trainer = self.trainer
         # This requires to connect the new model and move it the right device.
-        self.trainer.accelerator.connect(voting_model)
+        self.trainer.training_type_plugin.connect(voting_model)
         self.trainer.training_type_plugin.model_to_device()
         self.trainer.test_loop.run()
 
 
@@ -86,7 +86,7 @@ def _training_step(self, generator):
         # Here, instead of calling `lightning_module.training_step()`
         # we call next() on the generator!
         training_step_output = next(generator)
-        self.trainer.accelerator.post_training_step()
+        self.trainer.training_type_plugin.post_training_step()
 
         training_step_output = self.trainer.call_hook("training_step_end", training_step_output)
 
 
@@ -1,7 +1,6 @@
 """Root package info."""
 
 import logging
-import os
 
 from pytorch_lightning.__about__ import *  # noqa: F401, F403
 
@@ -14,9 +13,6 @@
     _logger.addHandler(logging.StreamHandler())
     _logger.propagate = False
 
-_PACKAGE_ROOT = os.path.dirname(__file__)
-_PROJECT_ROOT = os.path.dirname(_PACKAGE_ROOT)
-
 from pytorch_lightning.callbacks import Callback  # noqa: E402
 from pytorch_lightning.core import LightningDataModule, LightningModule  # noqa: E402
 from pytorch_lightning.trainer import Trainer  # noqa: E402
 
@@ -24,6 +24,7 @@
 from torch.utils.data import DataLoader
 
 from pytorch_lightning.accelerators import Accelerator
+from pytorch_lightning.core.mixins import DeviceDtypeModuleMixin
 from pytorch_lightning.plugins import PrecisionPlugin
 from pytorch_lightning.utilities.apply_func import apply_to_collection, move_data_to_device
 
@@ -64,7 +65,7 @@ def step(self, closure: Optional[Callable] = None) -> None:
         )
 
 
-class _LiteModule(nn.Module):
+class _LiteModule(DeviceDtypeModuleMixin):
     def __init__(self, module: nn.Module, precision_plugin: PrecisionPlugin) -> None:
         """The LiteModule is a thin wrapper around the :class:`torch.nn.Module` and handles precision / autocast
         automatically for the forward pass.
 
@@ -240,7 +240,9 @@ def log_graph(self, model: "pl.LightningModule", input_array=None):
 
             if input_array is not None:
                 input_array = model._apply_batch_transfer_handler(input_array)
+                model._running_torchscript = True
                 self.experiment.add_graph(model, input_array)
+                model._running_torchscript = False
             else:
                 rank_zero_warn(
                     "Could not log computational graph since the"
 
@@ -618,11 +618,6 @@ def _format_batch_size_and_grad_accum_config(self):
             )
         self.config["gradient_accumulation_steps"] = self.lightning_module.trainer.accumulate_grad_batches
         if "train_micro_batch_size_per_gpu" not in self.config:
-            rank_zero_warn(
-                "Inferring the batch size for internal deepspeed logging from the `train_dataloader()`. "
-                "If you require skipping this, please pass "
-                "`Trainer(strategy=DeepSpeedPlugin(logging_batch_size_per_gpu=batch_size))`"
-            )
             batch_size = self._auto_select_batch_size()
             self.config["train_micro_batch_size_per_gpu"] = batch_size
         if "gradient_clipping" not in self.config:
@@ -634,9 +629,19 @@ def _auto_select_batch_size(self):
         batch_size = 1
         train_dl_source = self.lightning_module.trainer._data_connector._train_dataloader_source
         if train_dl_source.is_defined():
-            train_dataloader = train_dl_source.dataloader()
-            if hasattr(train_dataloader, "batch_sampler"):
-                batch_size = train_dataloader.batch_sampler.batch_size
+            try:
+                train_dataloader = train_dl_source.dataloader()
+                if hasattr(train_dataloader, "batch_sampler"):
+                    batch_size = train_dataloader.batch_sampler.batch_size
+            # broad exception on purpose as `source.dataloader()` will fail if the dataloader requires `setup`
+            # to have been called before
+            except Exception:
+                if self.global_rank == 0:
+                    deepspeed.utils.logging.logger.warning(
+                        "Tried to infer the batch size for internal deepspeed logging from the `train_dataloader()`. "
+                        "To ensure DeepSpeed logging remains correct, please manually pass the plugin with the "
+                        "batch size, `Trainer(strategy=DeepSpeedPlugin(logging_batch_size_per_gpu=batch_size))`."
+                    )
         return batch_size
 
     def _format_precision_config(self):
 
@@ -237,21 +237,25 @@ def to_tensor(x):
         args = apply_to_collection(args, dtype=(int, float), function=to_tensor)
         return args
 
-    def training_step(self, *args, **kwargs):
+    def _step(self, stage: RunningStage, *args: Any, **kwargs: Any):
         args = self._prepare_input(args)
-        return self.poptorch_models[RunningStage.TRAINING](*args, **kwargs)
+        poptorch_model = self.poptorch_models[stage]
+        self.lightning_module._running_torchscript = True
+        out = poptorch_model(*args, **kwargs)
+        self.lightning_module._running_torchscript = False
+        return out
+
+    def training_step(self, *args, **kwargs):
+        return self._step(RunningStage.TRAINING, *args, **kwargs)
 
     def validation_step(self, *args, **kwargs):
-        args = self._prepare_input(args)
-        return self.poptorch_models[RunningStage.VALIDATING](*args, **kwargs)
+        return self._step(RunningStage.VALIDATING, *args, **kwargs)
 
     def test_step(self, *args, **kwargs):
-        args = self._prepare_input(args)
-        return self.poptorch_models[RunningStage.TESTING](*args, **kwargs)
+        return self._step(RunningStage.TESTING, *args, **kwargs)
 
     def predict_step(self, *args, **kwargs):
-        args = self._prepare_input(args)
-        return self.poptorch_models[RunningStage.PREDICTING](*args, **kwargs)
+        return self._step(RunningStage.PREDICTING, *args, **kwargs)
 
     def teardown(self) -> None:
         # undo dataloader patching
 
@@ -64,7 +64,6 @@ def on_trainer_init(
         self,
         check_val_every_n_epoch: int,
         reload_dataloaders_every_n_epochs: int,
-        reload_dataloaders_every_epoch: bool,
         prepare_data_per_node: Optional[bool] = None,
     ) -> None:
         self.trainer.datamodule = None
@@ -83,13 +82,6 @@ def on_trainer_init(
 
         self.trainer.check_val_every_n_epoch = check_val_every_n_epoch
 
-        if reload_dataloaders_every_epoch:
-            reload_dataloaders_every_n_epochs = int(reload_dataloaders_every_epoch)
-            rank_zero_deprecation(
-                "`reload_dataloaders_every_epoch` is deprecated in v1.4 and will be removed in v1.6."
-                " Please use `reload_dataloaders_every_n_epochs` in Trainer."
-            )
-
         if not isinstance(reload_dataloaders_every_n_epochs, int) or (reload_dataloaders_every_n_epochs < 0):
             raise MisconfigurationException(
                 f"`reload_dataloaders_every_n_epochs` should be an int >= 0, got {reload_dataloaders_every_n_epochs}."
 
@@ -162,7 +162,6 @@ def __init__(
         benchmark: bool = False,
         deterministic: bool = False,
         reload_dataloaders_every_n_epochs: int = 0,
-        reload_dataloaders_every_epoch: bool = False,
         auto_lr_find: Union[bool, str] = False,
         replace_sampler_ddp: bool = True,
         detect_anomaly: bool = False,
@@ -341,12 +340,6 @@ def __init__(
 
             reload_dataloaders_every_n_epochs: Set to a non-negative integer to reload dataloaders every n epochs.
 
-            reload_dataloaders_every_epoch: Set to True to reload dataloaders every epoch.
-
-                .. deprecated:: v1.4
-                    ``reload_dataloaders_every_epoch`` has been deprecated in v1.4 and will be removed in v1.6.
-                    Please use ``reload_dataloaders_every_n_epochs``.
-
             replace_sampler_ddp: Explicitly enables or disables sampler replacement. If not specified this
                 will toggled automatically when DDP is used. By default it will add ``shuffle=True`` for
                 train sampler and ``shuffle=False`` for val/test sampler. If you want to customize it,
@@ -515,7 +508,6 @@ def __init__(
         self._data_connector.on_trainer_init(
             check_val_every_n_epoch,
             reload_dataloaders_every_n_epochs,
-            reload_dataloaders_every_epoch,
             prepare_data_per_node,
         )