Skip to content

Commit b6a5daf

Browse files
tkngawaelchlipre-commit-ci[bot]
authored andcommitted
Fix the condition for calling update_learning_rates (#7032)
Co-authored-by: Adrian Wälchli <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent f11917f commit b6a5daf

File tree

3 files changed

+35
-14
lines changed

3 files changed

+35
-14
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
344344
* Remove hardcoding of local rank in accelerator connector ([#6878](https://github.com/PyTorchLightning/pytorch-lightning/pull/6878))
345345

346346

347+
- Fixed incorrect number of calls to LR scheduler when `check_val_every_n_epoch > 1` ([#7032](https://github.com/PyTorchLightning/pytorch-lightning/pull/7032))
348+
349+
347350
## [1.2.7] - 2021-04-06
348351

349352
### Fixed

pytorch_lightning/trainer/training_loop.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -474,7 +474,6 @@ def run_training_epoch(self):
474474

475475
train_dataloader = self.trainer.data_connector.get_profiled_train_dataloader(train_dataloader)
476476
dataloader_idx = 0
477-
val_loop_called = False
478477

479478
batch_idx = None
480479
is_last_batch = None
@@ -516,7 +515,6 @@ def run_training_epoch(self):
516515
self.trainer.validating = True
517516
self.trainer.run_evaluation()
518517
self.trainer.training = True
519-
val_loop_called = True
520518

521519
# -----------------------------------------
522520
# SAVE LOGGERS (ie: Tensorboard, etc...)
@@ -565,7 +563,7 @@ def run_training_epoch(self):
565563
should_train_only = self.trainer.disable_validation or should_skip_eval
566564

567565
# update epoch level lr_schedulers if no val loop outside train loop is triggered
568-
if (val_loop_called and not should_check_val) or should_train_only:
566+
if not should_check_val or should_train_only:
569567
self.trainer.optimizer_connector.update_learning_rates(interval='epoch')
570568

571569
if should_train_only:

tests/trainer/optimization/test_optimizers.py

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
from unittest import mock
15+
1416
import pytest
1517
import torch
1618
from torch import optim
@@ -577,21 +579,21 @@ def configure_optimizers(self):
577579
trainer.fit(model)
578580

579581

580-
class TestModel(BoringModel):
582+
@RunIf(min_gpus=2, special=True)
583+
def test_optimizer_state_on_device(tmpdir):
584+
""" Test that optimizers that create state initially at instantiation still end up with the state on the GPU. """
581585

582-
def configure_optimizers(self):
583-
# Adagrad creates state tensors immediately, model is not yet on GPU.
584-
return optim.Adagrad(self.parameters())
586+
class TestModel(BoringModel):
585587

586-
def on_train_start(self, *args, **kwargs):
587-
opt = self.optimizers()
588-
_, state = next(iter(opt.state.items()))
589-
assert state["sum"].device == torch.device("cuda", self.local_rank) == self.device
588+
def configure_optimizers(self):
589+
# Adagrad creates state tensors immediately, model is not yet on GPU.
590+
return optim.Adagrad(self.parameters())
590591

592+
def on_train_start(self, *args, **kwargs):
593+
opt = self.optimizers()
594+
_, state = next(iter(opt.state.items()))
595+
assert state["sum"].device == torch.device("cuda", self.local_rank) == self.device
591596

592-
@RunIf(min_gpus=2, special=True)
593-
def test_optimizer_state_on_device(tmpdir):
594-
""" Test that optimizers that create state initially at instantiation still end up with the state on the GPU. """
595597
model = TestModel()
596598
trainer = Trainer(
597599
default_root_dir=tmpdir,
@@ -600,3 +602,21 @@ def test_optimizer_state_on_device(tmpdir):
600602
fast_dev_run=True,
601603
)
602604
trainer.fit(model)
605+
606+
607+
@pytest.mark.parametrize("check_val_every_n_epoch", [1, 2])
608+
@mock.patch("torch.optim.lr_scheduler.StepLR.step")
609+
def test_lr_scheduler_epoch_step_frequency(mocked_sched, check_val_every_n_epoch, tmpdir):
610+
epochs = 4
611+
expected_steps = epochs + 1 # every LRScheduler gets called once at init
612+
613+
model = BoringModel()
614+
trainer = Trainer(
615+
default_root_dir=tmpdir,
616+
limit_train_batches=2,
617+
limit_val_batches=2,
618+
check_val_every_n_epoch=check_val_every_n_epoch,
619+
max_epochs=epochs,
620+
)
621+
trainer.fit(model)
622+
assert mocked_sched.call_count == expected_steps

0 commit comments

Comments
 (0)