Lightning-AI · kaushikb11 · Dec 1, 2021 · Sep 26, 2021 · Sep 26, 2021 · Sep 26, 2021
@@ -57,7 +57,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Raised an error if the `batch_size` cannot be inferred from the current batch if it contained a string or was a custom batch object ([#10541](https://github.com/PyTorchLightning/pytorch-lightning/pull/10541))
 
 
--
+- Disable validation completely when `overfit_batches > 0` ([#9709](https://github.com/PyTorchLightning/pytorch-lightning/pull/9709))
 
 
 -

@@ -626,8 +626,7 @@ def _determine_data_use_amount(self, overfit_batches: float) -> None:
         """Use less data for debugging purposes."""
         if overfit_batches > 0:
             self.limit_train_batches = overfit_batches
-            self.limit_val_batches = overfit_batches
-            self.limit_test_batches = overfit_batches
+            self.limit_val_batches = 0
 
     def _setup_on_init(self, num_sanity_val_steps: int) -> None:
         self._log_device_info()

@@ -228,7 +228,13 @@ def validation_epoch_end(self, outputs):
     early_stopping = EarlyStopping(
         monitor="abc", stopping_threshold=stopping_threshold, divergence_threshold=divergence_theshold
     )
-    trainer = Trainer(default_root_dir=tmpdir, callbacks=[early_stopping], overfit_batches=0.20, max_epochs=20)
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        callbacks=[early_stopping],
+        limit_train_batches=0.2,
+        limit_val_batches=0.2,
+        max_epochs=20,
+    )
     trainer.fit(model)
     assert trainer.current_epoch == expected_epoch, "early_stopping failed"
 
@@ -246,7 +252,13 @@ def validation_epoch_end(self, outputs):
 
     model = CurrentModel()
     early_stopping = EarlyStopping(monitor="val_loss", check_finite=True)
-    trainer = Trainer(default_root_dir=tmpdir, callbacks=[early_stopping], overfit_batches=0.20, max_epochs=10)
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        callbacks=[early_stopping],
+        limit_train_batches=0.2,
+        limit_val_batches=0.2,
+        max_epochs=10,
+    )
     trainer.fit(model)
     assert trainer.current_epoch == expected_stop_epoch
     assert early_stopping.stopped_epoch == expected_stop_epoch
@@ -426,7 +438,8 @@ def test_multiple_early_stopping_callbacks(
     trainer = Trainer(
         default_root_dir=tmpdir,
         callbacks=callbacks,
-        overfit_batches=0.20,
+        limit_train_batches=0.1,
+        limit_val_batches=0.1,
         max_epochs=20,
         strategy=strategy,
         accelerator="cpu",

@@ -106,7 +106,6 @@ def validation_step(self, *args, **kwargs):
         callbacks=[stopping],
         default_root_dir=tmpdir,
         gradient_clip_val=1.0,
-        overfit_batches=0.20,
         track_grad_norm=2,
         enable_progress_bar=False,
         accumulate_grad_batches=2,

@@ -19,40 +19,6 @@
 from tests.helpers.boring_model import BoringModel, RandomDataset
 
 
-def test_overfit_multiple_val_loaders(tmpdir):
-    """Tests that overfit batches works with multiple val dataloaders."""
-    val_dl_count = 2
-    overfit_batches = 3
-
-    class TestModel(BoringModel):
-        def validation_step(self, batch, batch_idx, dataloader_idx):
-            output = self.layer(batch[0])
-            loss = self.loss(batch, output)
-            return {"x": loss}
-
-        def validation_epoch_end(self, outputs) -> None:
-            pass
-
-        def val_dataloader(self):
-            dls = [torch.utils.data.DataLoader(RandomDataset(32, 64)) for _ in range(val_dl_count)]
-            return dls
-
-    model = TestModel()
-
-    trainer = Trainer(
-        default_root_dir=tmpdir,
-        max_epochs=2,
-        overfit_batches=overfit_batches,
-        log_every_n_steps=1,
-        enable_model_summary=False,
-    )
-
-    trainer.fit(model)
-    assert trainer.num_training_batches == overfit_batches
-    assert len(trainer.num_val_batches) == val_dl_count
-    assert all(nbatches == overfit_batches for nbatches in trainer.num_val_batches)
-
-
 @pytest.mark.parametrize("overfit_batches", [1, 2, 0.1, 0.25, 1.0])
 def test_overfit_basic(tmpdir, overfit_batches):
     """Tests that only training_step can be used when overfitting."""

@@ -112,33 +112,46 @@ def test_overfit_batch_limits(tmpdir):
     # ------------------------------------------------------
     for split in (RunningStage.VALIDATING, RunningStage.TESTING):
 
+        # ------------------------------------------------------
+        # test overfit_batches action
+        # ------------------------------------------------------
+
         # ------------------------------------------------------
         # test overfit_batches as percent
         # ------------------------------------------------------
         trainer = Trainer(overfit_batches=0.11)
         trainer._data_connector.attach_dataloaders(model)
-        loader_num_batches, dataloaders = trainer._reset_eval_dataloader(split, model=model)
-        assert loader_num_batches[0] == num_train_samples
-
-        # make sure we turned off shuffle for the user
-        assert isinstance(dataloaders[0].sampler, SequentialSampler)
-
-        # make sure the loaders are the same
-        (xb, yb) = next(iter(dataloaders[0]))
-        assert torch.eq(xa, xb).all()
-        assert torch.eq(ya, yb).all()
+        loader_num_batches, _ = trainer._reset_eval_dataloader(split, model=model)
+        if split == RunningStage.VALIDATING:
+            assert loader_num_batches[0] == 0
+        else:
+            assert loader_num_batches[0] == len(test_loader)
 
         # ------------------------------------------------------
         # test overfit_batches as int
         # ------------------------------------------------------
         trainer = Trainer(overfit_batches=1)
         trainer._data_connector.attach_dataloaders(model)
         loader_num_batches, dataloaders = trainer._reset_eval_dataloader(split, model=model)
-        assert loader_num_batches[0] == 1
+        if split == RunningStage.VALIDATING:
+            assert loader_num_batches[0] == 0
+        else:
+            assert loader_num_batches[0] == len(test_loader)
+            # make sure we turned off shuffle for the user
+            assert isinstance(dataloaders[0].sampler, SequentialSampler)
+
+            # make sure the loaders are the same
+            (xb, yb) = next(iter(dataloaders[0]))
+            assert torch.eq(xa, xb).all()
+            assert torch.eq(ya, yb).all()
+
         trainer = Trainer(overfit_batches=5)
         trainer._data_connector.attach_dataloaders(model)
-        loader_num_batches, dataloaders = trainer._reset_eval_dataloader(split, model=model)
-        assert loader_num_batches[0] == 5
+        loader_num_batches, _ = trainer._reset_eval_dataloader(split, model=model)
+        if split == RunningStage.VALIDATING:
+            assert loader_num_batches[0] == 0
+        else:
+            assert loader_num_batches[0] == len(test_loader)
 
         # ------------------------------------------------------
         # test limit_xxx_batches as percent AND int
-Original file line number
+Diff line change
@@ Expand Up @@
     - Raised an error if the `batch_size` cannot be inferred from the current batch if it contained a string or was a custom batch object ([#10541](https://github.com/PyTorchLightning/pytorch-lightning/pull/10541))
-    -
+    - Disable validation completely when `overfit_batches > 0` ([#9709](https://github.com/PyTorchLightning/pytorch-lightning/pull/9709))
     -
@@ Expand Down @@