Add LRFinder callback (#13802)

rohitgr7 · pre-commit-ci[bot] · Felonious-Spellfire · web-flow · commit 7fed7a12c56c · 2022-10-05T13:15:38.000+02:00
* add BatchSizeFinderCallback callback
* enable fast_dev_run test
* keep tune and remove early_exit
* move exception to setup
* Apply suggestions from code review

Co-authored-by: pre-commit-ci[bot] &lt;66853113+pre-commit-ci[bot]@users.noreply.github.com&gt;
Co-authored-by: Laverne Henderson &lt;laverne.henderson@coupa.com&gt;
Co-authored-by: Adrian Wälchli &lt;aedu.waelchli@gmail.com&gt;
diff --git a/docs/source-pytorch/api_references.rst b/docs/source-pytorch/api_references.rst
@@ -36,6 +36,7 @@ callbacks
     EarlyStopping
     GradientAccumulationScheduler
     LambdaCallback
+    LearningRateFinder
     LearningRateMonitor
     ModelCheckpoint
     ModelPruning
diff --git a/docs/source-pytorch/extensions/callbacks.rst b/docs/source-pytorch/extensions/callbacks.rst
@@ -93,6 +93,7 @@ Lightning has a few built-in callbacks.
     EarlyStopping
     GradientAccumulationScheduler
     LambdaCallback
+    LearningRateFinder
     LearningRateMonitor
     ModelCheckpoint
     ModelPruning
diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md
@@ -12,6 +12,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Added `BatchSizeFinder` callback ([#11089](https://github.com/PyTorchLightning/pytorch-lightning/pull/11089))
 
 
+- Added `LearningRateFinder` callback ([#13802](https://github.com/PyTorchLightning/pytorch-lightning/pull/13802))
+
+
 - Tuner now supports a new `method` argument which will determine when to run the `BatchSizeFinder`: one of `fit`, `validate`, `test` or `predict` ([#11089](https://github.com/PyTorchLightning/pytorch-lightning/pull/11089))
 
 
diff --git a/src/pytorch_lightning/callbacks/__init__.py b/src/pytorch_lightning/callbacks/__init__.py
@@ -19,6 +19,7 @@
 from pytorch_lightning.callbacks.finetuning import BackboneFinetuning, BaseFinetuning
 from pytorch_lightning.callbacks.gradient_accumulation_scheduler import GradientAccumulationScheduler
 from pytorch_lightning.callbacks.lambda_function import LambdaCallback
+from pytorch_lightning.callbacks.lr_finder import LearningRateFinder
 from pytorch_lightning.callbacks.lr_monitor import LearningRateMonitor
 from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint
 from pytorch_lightning.callbacks.model_summary import ModelSummary
@@ -41,6 +42,7 @@
     "EarlyStopping",
     "GradientAccumulationScheduler",
     "LambdaCallback",
+    "LearningRateFinder",
     "LearningRateMonitor",
     "ModelCheckpoint",
     "ModelPruning",
diff --git a/src/pytorch_lightning/callbacks/lr_finder.py b/src/pytorch_lightning/callbacks/lr_finder.py
@@ -0,0 +1,99 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+r"""
+LearningRateFinder
+==================
+
+Finds optimal learning rate
+"""
+from typing import Optional
+
+import pytorch_lightning as pl
+from pytorch_lightning.callbacks.callback import Callback
+from pytorch_lightning.tuner.lr_finder import _LRFinder, lr_find
+from pytorch_lightning.utilities.exceptions import _TunerExitException, MisconfigurationException
+from pytorch_lightning.utilities.seed import isolate_rng
+
+
+class LearningRateFinder(Callback):
+    """The ``LearningRateFinder`` callback enables the user to do a range test of good initial learning rates, to
+    reduce the amount of guesswork in picking a good starting learning rate.
+
+    Args:
+        min_lr: Minimum learning rate to investigate
+
+        max_lr: Maximum learning rate to investigate
+
+        num_training_steps: Number of learning rates to test
+
+        mode: Search strategy to update learning rate after each batch:
+
+            - ``'exponential'`` (default): Increases the learning rate exponentially.
+            - ``'linear'``: Increases the learning rate linearly.
+
+        early_stop_threshold: Threshold for stopping the search. If the
+            loss at any point is larger than early_stop_threshold*best_loss
+            then the search is stopped. To disable, set to None.
+
+        update_attr: Whether to update the learning rate attribute or not.
+
+    Raises:
+        MisconfigurationException:
+            If learning rate/lr in ``model`` or ``model.hparams`` isn't overridden when ``auto_lr_find=True``,
+            or if you are using more than one optimizer.
+    """
+
+    SUPPORTED_MODES = ("linear", "exponential")
+
+    def __init__(
+        self,
+        min_lr: float = 1e-8,
+        max_lr: float = 1,
+        num_training_steps: int = 100,
+        mode: str = "exponential",
+        early_stop_threshold: float = 4.0,
+        update_attr: bool = False,
+    ) -> None:
+        mode = mode.lower()
+        if mode not in self.SUPPORTED_MODES:
+            raise MisconfigurationException(f"`mode` should be either of {self.SUPPORTED_MODES}")
+
+        self._min_lr = min_lr
+        self._max_lr = max_lr
+        self._num_training_steps = num_training_steps
+        self._mode = mode
+        self._early_stop_threshold = early_stop_threshold
+        self._update_attr = update_attr
+
+        self._early_exit = False
+        self.lr_finder: Optional[_LRFinder] = None
+
+    def lr_find(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        with isolate_rng():
+            self.optimal_lr = lr_find(
+                trainer,
+                pl_module,
+                min_lr=self._min_lr,
+                max_lr=self._max_lr,
+                num_training=self._num_training_steps,
+                mode=self._mode,
+                early_stop_threshold=self._early_stop_threshold,
+                update_attr=self._update_attr,
+            )
+
+        if self._early_exit:
+            raise _TunerExitException()
+
+    def on_fit_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        self.lr_find(trainer, pl_module)
diff --git a/src/pytorch_lightning/trainer/connectors/callback_connector.py b/src/pytorch_lightning/trainer/connectors/callback_connector.py
@@ -29,6 +29,7 @@
     TQDMProgressBar,
 )
 from pytorch_lightning.callbacks.batch_size_finder import BatchSizeFinder
+from pytorch_lightning.callbacks.lr_finder import LearningRateFinder
 from pytorch_lightning.callbacks.rich_model_summary import RichModelSummary
 from pytorch_lightning.callbacks.timer import Timer
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
@@ -246,7 +247,7 @@ def _reorder_callbacks(callbacks: List[Callback]) -> List[Callback]:
         checkpoint_callbacks: List[Callback] = []
 
         for cb in callbacks:
-            if isinstance(cb, BatchSizeFinder):
+            if isinstance(cb, (BatchSizeFinder, LearningRateFinder)):
                 tuner_callbacks.append(cb)
             elif isinstance(cb, Checkpoint):
                 checkpoint_callbacks.append(cb)
diff --git a/src/pytorch_lightning/tuner/lr_finder.py b/src/pytorch_lightning/tuner/lr_finder.py
@@ -15,7 +15,7 @@
 import logging
 import os
 import uuid
-from functools import wraps
+from copy import deepcopy
 from typing import Any, Callable, cast, Dict, List, Optional, Sequence, TYPE_CHECKING, Union
 
 import numpy as np
@@ -25,8 +25,6 @@
 
 import pytorch_lightning as pl
 from pytorch_lightning.callbacks import Callback
-from pytorch_lightning.core.optimizer import _init_optimizers_and_lr_schedulers, _set_scheduler_opt_idx
-from pytorch_lightning.loggers.logger import DummyLogger
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.parsing import lightning_hasattr, lightning_setattr
 from pytorch_lightning.utilities.rank_zero import rank_zero_warn
@@ -92,7 +90,7 @@ class _LRFinder:
         lr = lr_finder.suggestion()
     """
 
-    def __init__(self, mode: str, lr_min: float, lr_max: float, num_training: int):
+    def __init__(self, mode: str, lr_min: float, lr_max: float, num_training: int) -> None:
         assert mode in ("linear", "exponential"), "mode should be either `linear` or `exponential`"
 
         self.mode = mode
@@ -104,38 +102,33 @@ def __init__(self, mode: str, lr_min: float, lr_max: float, num_training: int):
         self._total_batch_idx = 0  # for debug purpose
 
     def _exchange_scheduler(self, trainer: "pl.Trainer", model: "pl.LightningModule") -> Callable[["pl.Trainer"], None]:
+        # TODO: update docs here
         """Decorate `trainer.strategy.setup_optimizers` method such that it sets the user's originally specified
         optimizer together with a new scheduler that takes care of the learning rate search."""
-        setup_optimizers = trainer.strategy.setup_optimizers
+        from pytorch_lightning.core.optimizer import _set_scheduler_opt_idx
 
-        @wraps(setup_optimizers)
-        def func(trainer: "pl.Trainer") -> None:
-            # Decide the structure of the output from _init_optimizers_and_lr_schedulers
-            optimizers, _, _ = _init_optimizers_and_lr_schedulers(trainer.lightning_module)
+        optimizers = trainer.strategy.optimizers
 
-            if len(optimizers) != 1:
-                raise MisconfigurationException(
-                    f"`model.configure_optimizers()` returned {len(optimizers)}, but"
-                    " learning rate finder only works with single optimizer"
-                )
-
-            optimizer = optimizers[0]
+        if len(optimizers) != 1:
+            raise MisconfigurationException(
+                f"`model.configure_optimizers()` returned {len(optimizers)}, but"
+                " learning rate finder only works with single optimizer"
+            )
 
-            new_lrs = [self.lr_min] * len(optimizer.param_groups)
-            for param_group, new_lr in zip(optimizer.param_groups, new_lrs):
-                param_group["lr"] = new_lr
-                param_group["initial_lr"] = new_lr
+        optimizer = optimizers[0]
 
-            args = (optimizer, self.lr_max, self.num_training)
-            scheduler = _LinearLR(*args) if self.mode == "linear" else _ExponentialLR(*args)
-            scheduler = cast(pl.utilities.types._LRScheduler, scheduler)
+        new_lrs = [self.lr_min] * len(optimizer.param_groups)
+        for param_group, new_lr in zip(optimizer.param_groups, new_lrs):
+            param_group["lr"] = new_lr
+            param_group["initial_lr"] = new_lr
 
-            trainer.strategy.optimizers = [optimizer]
-            trainer.strategy.lr_scheduler_configs = [LRSchedulerConfig(scheduler, interval="step", opt_idx=0)]
-            trainer.strategy.optimizer_frequencies = []
-            _set_scheduler_opt_idx(trainer.optimizers, trainer.lr_scheduler_configs)
+        args = (optimizer, self.lr_max, self.num_training)
+        scheduler = _LinearLR(*args) if self.mode == "linear" else _ExponentialLR(*args)
+        scheduler = cast(pl.utilities.types._LRScheduler, scheduler)
 
-        return func
+        trainer.strategy.optimizers = [optimizer]
+        trainer.strategy.lr_scheduler_configs = [LRSchedulerConfig(scheduler, interval="step", opt_idx=0)]
+        _set_scheduler_opt_idx(trainer.optimizers, trainer.lr_scheduler_configs)
 
     def plot(self, suggest: bool = False, show: bool = False) -> Optional["plt.Figure"]:
         """Plot results from lr_find run
@@ -225,23 +218,25 @@ def lr_find(
     # Save initial model, that is loaded after learning rate is found
     ckpt_path = os.path.join(trainer.default_root_dir, f".lr_find_{uuid.uuid4()}.ckpt")
     trainer.save_checkpoint(ckpt_path)
+
+    # Arguments we adjust during the lr finder, save for restoring
     params = __lr_finder_dump_params(trainer)
 
     # Set to values that are required by the algorithm
     __lr_finder_reset_params(trainer, num_training, early_stop_threshold)
 
-    # Initialize lr finder object (stores results)
-    lr_finder = _LRFinder(mode, min_lr, max_lr, num_training)
-
     # Disable standard progress bar for fit
     if trainer.progress_bar_callback:
         trainer.progress_bar_callback.disable()
 
+    # Initialize lr finder object (stores results)
+    lr_finder = _LRFinder(mode, min_lr, max_lr, num_training)
+
     # Configure optimizer and scheduler
-    trainer.strategy.setup_optimizers = lr_finder._exchange_scheduler(trainer, model)  # type: ignore[assignment]
+    lr_finder._exchange_scheduler(trainer, model)
 
     # Fit, lr & loss logged in callback
-    trainer.tuner._run(model)
+    _try_loop_run(trainer, params)
 
     # Prompt if we stopped early
     if trainer.global_step != num_training:
@@ -274,31 +269,48 @@ def lr_find(
 
 def __lr_finder_dump_params(trainer: "pl.Trainer") -> Dict[str, Any]:
     return {
-        "auto_lr_find": trainer.auto_lr_find,
+        "optimizers": trainer.strategy.optimizers,
+        "lr_scheduler_configs": trainer.strategy.lr_scheduler_configs,
+        "optimizer_frequencies": trainer.strategy.optimizer_frequencies,
         "callbacks": trainer.callbacks,
-        "logger": trainer.logger,
+        "loggers": trainer.loggers,
+        # TODO: check if this is required
+        "auto_lr_find": trainer.auto_lr_find,
         "max_steps": trainer.fit_loop.max_steps,
-        "setup_optimizers": trainer.strategy.setup_optimizers,
+        "limit_val_batches": trainer.limit_val_batches,
+        "loop_state_dict": deepcopy(trainer.fit_loop.state_dict()),
     }
 
 
 def __lr_finder_reset_params(trainer: "pl.Trainer", num_training: int, early_stop_threshold: float) -> None:
+    from pytorch_lightning.loggers.logger import DummyLogger
+
+    trainer.strategy.lr_scheduler_configs = []
+    trainer.strategy.optimizer_frequencies = []
     # avoid lr find being called multiple times
     trainer.auto_lr_find = False
     # Use special lr logger callback
     trainer.callbacks = [_LRCallback(num_training, early_stop_threshold, progress_bar_refresh_rate=1)]
     # No logging
-    trainer.loggers = [DummyLogger()] if trainer.loggers else []
+    trainer.logger = DummyLogger() if trainer.logger is not None else None
     # Max step set to number of iterations
     trainer.fit_loop.max_steps = num_training
+    trainer.limit_val_batches = num_training
 
 
 def __lr_finder_restore_params(trainer: "pl.Trainer", params: Dict[str, Any]) -> None:
+    trainer.strategy.optimizers = params["optimizers"]
+    trainer.strategy.lr_scheduler_configs = params["lr_scheduler_configs"]
+    trainer.strategy.optimizer_frequencies = params["optimizer_frequencies"]
     trainer.auto_lr_find = params["auto_lr_find"]
     trainer.callbacks = params["callbacks"]
-    trainer.logger = params["logger"]
+    trainer.loggers = params["loggers"]
     trainer.fit_loop.max_steps = params["max_steps"]
-    trainer.strategy.setup_optimizers = params["setup_optimizers"]  # type: ignore[assignment]
+    trainer.limit_val_batches = params["limit_val_batches"]
+
+    loop = trainer.fit_loop
+    loop.load_state_dict(deepcopy(params["loop_state_dict"]))
+    loop.restarting = False
 
 
 class _LRCallback(Callback):
@@ -453,3 +465,10 @@ def get_lr(self) -> List[float]:  # type: ignore[override]
     @property
     def lr(self) -> Union[float, List[float]]:
         return self._lr
+
+
+def _try_loop_run(trainer: "pl.Trainer", params: Dict[str, Any]) -> None:
+    loop = trainer.fit_loop
+    loop.load_state_dict(deepcopy(params["loop_state_dict"]))
+    loop.restarting = False
+    loop.run()
diff --git a/src/pytorch_lightning/tuner/tuning.py b/src/pytorch_lightning/tuner/tuning.py
diff --git a/tests/tests_pytorch/trainer/test_states.py b/tests/tests_pytorch/trainer/test_states.py
diff --git a/tests/tests_pytorch/tuner/test_lr_finder.py b/tests/tests_pytorch/tuner/test_lr_finder.py