Skip to content

Add support for len(datamodule) #9895

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Oct 15, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

- Added support for `torch.autograd.set_detect_anomaly` through `Trainer` constructor argument `detect_anomaly` ([#9848](https://github.com/PyTorchLightning/pytorch-lightning/pull/9848))

- Added a `len` method to `LightningDataModule` ([#9895](https://github.com/PyTorchLightning/pytorch-lightning/pull/9895))

### Changed

Expand Down
24 changes: 24 additions & 0 deletions pytorch_lightning/core/datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
from pytorch_lightning.core.mixins import HyperparametersMixin
from pytorch_lightning.utilities import rank_zero_deprecation
from pytorch_lightning.utilities.argparse import add_argparse_args, from_argparse_args, get_init_arguments_and_types
from pytorch_lightning.utilities.data import has_len
from pytorch_lightning.utilities.distributed import rank_zero_warn


class LightningDataModule(CheckpointHooks, DataHooks, HyperparametersMixin):
Expand Down Expand Up @@ -481,3 +483,25 @@ def __getstate__(self) -> dict:
for fn in ("prepare_data", "setup", "teardown"):
del d[fn]
return d

def __len__(self) -> int:
def get_num_batches(dataloader):
if isinstance(dataloader, Sequence):
return sum(get_num_batches(dl) for dl in dataloader)
if isinstance(dataloader, Mapping):
return sum(get_num_batches(dl) for dl in dataloader.values())
if not has_len(dataloader):
rank_zero_warn("`__len__` is not implemented for a `Dataloader`.")
return 0
return len(dataloader)

num_batches = 0
for method_name in ("train_dataloader", "val_dataloader", "test_dataloader", "predict_dataloader"):
dataloader_method = getattr(self, method_name)
try:
dataloader = dataloader_method()
num_batches += get_num_batches(dataloader)
except NotImplementedError:
pass

return num_batches
57 changes: 56 additions & 1 deletion tests/core/test_datamodules.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@
import pytest
import torch
from omegaconf import OmegaConf
from torch.utils.data import DataLoader

from pytorch_lightning import LightningDataModule, Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.utilities import AttributeDict
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from pytorch_lightning.utilities.model_helpers import is_overridden
from tests.helpers import BoringDataModule, BoringModel
from tests.helpers import BoringDataModule, BoringModel, RandomDataset
from tests.helpers.dataloaders import CustomNotImplementedErrorDataloader
from tests.helpers.datamodules import ClassifDataModule
from tests.helpers.runif import RunIf
from tests.helpers.simple_models import ClassificationModel
Expand Down Expand Up @@ -625,3 +627,56 @@ def test_inconsistent_prepare_data_per_node(tmpdir):
trainer.model = model
trainer.datamodule = dm
trainer.data_connector.prepare_data()


DATALOADER = DataLoader(RandomDataset(1, 32))


@pytest.mark.parametrize("method_name", ["train_dataloader", "val_dataloader", "test_dataloader", "predict_dataloader"])
@pytest.mark.parametrize(
["dataloader", "expected"],
[
[DATALOADER, 32],
[[DATALOADER, DATALOADER], 64],
[[[DATALOADER], [DATALOADER, DATALOADER]], 96],
[[{"foo": DATALOADER}, {"foo": DATALOADER, "bar": DATALOADER}], 96],
[{"foo": DATALOADER, "bar": DATALOADER}, 64],
[{"foo": {"foo": DATALOADER}, "bar": {"foo": DATALOADER, "bar": DATALOADER}}, 96],
[{"foo": [DATALOADER], "bar": [DATALOADER, DATALOADER]}, 96],
],
)
def test_len_different_types(method_name, dataloader, expected):
dm = LightningDataModule()
setattr(dm, method_name, lambda: dataloader)
assert len(dm) == expected


@pytest.mark.parametrize("method_name", ["train_dataloader", "val_dataloader", "test_dataloader", "predict_dataloader"])
def test_len_dataloader_no_len(method_name):
dataloader = CustomNotImplementedErrorDataloader(DATALOADER)
dm = LightningDataModule()
setattr(dm, method_name, lambda: dataloader)
with pytest.warns(UserWarning, match="`__len__` is not implemented for a `Dataloader`."):
assert len(dm) == 0


def test_len_all_dataloader_methods_implemented():
class BoringDataModule(LightningDataModule):
def __init__(self, dataloader):
super().__init__()
self.dataloader = dataloader

def train_dataloader(self):
return {"foo": self.dataloader, "bar": self.dataloader}

def val_dataloader(self):
return self.dataloader

def test_dataloader(self):
return [self.dataloader]

def predict_dataloader(self):
return [self.dataloader, self.dataloader]

dm = BoringDataModule(DATALOADER)
assert len(dm) == 192