Skip to content

Update tests/models/*.py to use devices instead of gpus or ipus #11470

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 46 commits into from
Mar 26, 2022
Merged
Show file tree
Hide file tree
Changes from 39 commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
de1401d
update tests for v2
mathemusician Jan 14, 2022
b581409
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 14, 2022
5257653
get rid of devices = 0 or devices = None
mathemusician Feb 9, 2022
9e75a0f
use gpu when accelerator="gpu"
mathemusician Feb 9, 2022
84caafc
use the right way of getting kwargs
mathemusician Feb 9, 2022
0b199dd
use devices instead of tpu_cores
mathemusician Feb 10, 2022
2318b43
switch mocked to match function
mathemusician Feb 10, 2022
63b1dcc
add accelerator
mathemusician Feb 10, 2022
81aa330
revert tests
mathemusician Mar 13, 2022
dcd3b03
put back a few more tests
mathemusician Mar 13, 2022
d9f352b
revert test_horovod
mathemusician Mar 13, 2022
5e19aa3
revert tpu test
mathemusician Mar 13, 2022
e86d265
Merge branch 'PyTorchLightning:master' into update_models
mathemusician Mar 14, 2022
c1f3df1
use tpu instead of gpu
mathemusician Mar 14, 2022
12041e1
Merge branch 'update_models' of https://github.com/mathemusician/pyto…
mathemusician Mar 14, 2022
26e5e66
use devices
mathemusician Mar 14, 2022
88e9752
devices always returns an int or List[int]
mathemusician Mar 14, 2022
1723b83
revert tests
mathemusician Mar 14, 2022
d35aba7
use devices instead of tpu_cores
mathemusician Mar 15, 2022
2638130
revert some tests so we can use depreacation warning instead
mathemusician Mar 15, 2022
9691028
Merge branch 'PyTorchLightning:master' into update_models
mathemusician Mar 18, 2022
c952dff
Merge branch 'master' into update_models
mathemusician Mar 22, 2022
7bb59ac
match current changes directly
mathemusician Mar 22, 2022
4d37f7d
Merge branch 'update_models' of https://github.com/mathemusician/pyto…
mathemusician Mar 22, 2022
ef61e37
Merge branch 'master' into update_models
mathemusician Mar 22, 2022
1e18c21
revert tpu test
mathemusician Mar 24, 2022
bee050c
Merge branch 'PyTorchLightning:master' into update_models
mathemusician Mar 24, 2022
c61b4f7
use accelerator="gpu"
mathemusician Mar 24, 2022
b6226af
Merge branch 'PyTorchLightning:master' into update_models
mathemusician Mar 24, 2022
548426e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 24, 2022
8e71f41
use if statements for readability
mathemusician Mar 24, 2022
0cd25ad
Merge branch 'update_models' of https://github.com/mathemusician/pyto…
mathemusician Mar 24, 2022
fc7d4a7
revert change for now
mathemusician Mar 25, 2022
88d3c0c
try accelerator=gpu again, but with the right indent
mathemusician Mar 25, 2022
68cdb79
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 25, 2022
46088a1
revert test again; it wasn't the indent
mathemusician Mar 25, 2022
8f69374
Merge branch 'update_models' of https://github.com/mathemusician/pyto…
mathemusician Mar 25, 2022
dfb7d36
revert test, it wasn't the indent
mathemusician Mar 25, 2022
e5de2d3
Merge branch 'master' into update_models
mathemusician Mar 25, 2022
45e6dd7
add Callback.state_dict back
mathemusician Mar 25, 2022
33a8ecc
Merge branch 'update_models' of https://github.com/mathemusician/pyto…
mathemusician Mar 25, 2022
a3582b9
add Callback.load_state_dict back to test_trainer_model_hook_system_f…
mathemusician Mar 25, 2022
d05cc21
wow, missed a few things when reverting the test
mathemusician Mar 25, 2022
c8d3819
Merge branch 'PyTorchLightning:master' into update_models
mathemusician Mar 25, 2022
5c812f3
Merge branch 'PyTorchLightning:master' into update_models
mathemusician Mar 25, 2022
2fbd71d
Apply suggestions from code review
rohitgr7 Mar 26, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 23 additions & 8 deletions tests/models/test_amp.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,18 @@ def _assert_autocast_enabled(self):
],
)
@pytest.mark.parametrize("precision", [16, "bf16"])
@pytest.mark.parametrize("num_processes", [1, 2])
def test_amp_cpus(tmpdir, strategy, precision, num_processes):
@pytest.mark.parametrize("devices", [1, 2])
def test_amp_cpus(tmpdir, strategy, precision, devices):
"""Make sure combinations of AMP and training types work if supported."""
tutils.reset_seed()

trainer = Trainer(
default_root_dir=tmpdir, num_processes=num_processes, max_epochs=1, strategy=strategy, precision=precision
default_root_dir=tmpdir,
accelerator="cpu",
devices=devices,
max_epochs=1,
strategy=strategy,
precision=precision,
)

model = AMPTestModel()
Expand All @@ -97,12 +102,19 @@ def test_amp_cpus(tmpdir, strategy, precision, num_processes):
@RunIf(min_gpus=2, min_torch="1.10")
@pytest.mark.parametrize("strategy", [None, "dp", "ddp_spawn"])
@pytest.mark.parametrize("precision", [16, "bf16"])
@pytest.mark.parametrize("gpus", [1, 2])
def test_amp_gpus(tmpdir, strategy, precision, gpus):
@pytest.mark.parametrize("devices", [1, 2])
def test_amp_gpus(tmpdir, strategy, precision, devices):
"""Make sure combinations of AMP and training types work if supported."""
tutils.reset_seed()

trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, gpus=gpus, strategy=strategy, precision=precision)
trainer = Trainer(
default_root_dir=tmpdir,
max_epochs=1,
accelerator="gpu",
devices=devices,
strategy=strategy,
precision=precision,
)

model = AMPTestModel()
trainer.fit(model)
Expand Down Expand Up @@ -141,7 +153,8 @@ def test_amp_gpu_ddp_slurm_managed(tmpdir):
trainer = Trainer(
default_root_dir=tmpdir,
max_epochs=1,
gpus=[0],
accelerator="gpu",
devices=[0],
strategy="ddp_spawn",
precision=16,
callbacks=[checkpoint],
Expand Down Expand Up @@ -195,7 +208,9 @@ def configure_optimizers(self):
model = CustomModel()
model.training_epoch_end = None

trainer = Trainer(default_root_dir=tmpdir, max_steps=5, precision=16, amp_backend="apex", gpus=1)
trainer = Trainer(
default_root_dir=tmpdir, max_steps=5, precision=16, amp_backend="apex", accelerator="gpu", devices=1
)
assert str(trainer.amp_backend) == "AMPType.APEX"
trainer.fit(model)
assert trainer.state.finished, f"Training failed with {trainer.state}"
Expand Down
4 changes: 2 additions & 2 deletions tests/models/test_cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,8 @@ def test_multi_cpu_model_ddp(tmpdir):
max_epochs=1,
limit_train_batches=0.4,
limit_val_batches=0.2,
gpus=None,
num_processes=2,
accelerator="cpu",
devices=2,
strategy="ddp_spawn",
)

Expand Down
48 changes: 25 additions & 23 deletions tests/models/test_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ def test_multi_gpu_none_backend(tmpdir):
max_epochs=1,
limit_train_batches=0.2,
limit_val_batches=0.2,
gpus=2,
accelerator="gpu",
devices=2,
)

dm = ClassifDataModule()
Expand All @@ -56,16 +57,17 @@ def test_multi_gpu_none_backend(tmpdir):


@RunIf(min_gpus=2)
@pytest.mark.parametrize("gpus", [1, [0], [1]])
def test_single_gpu_model(tmpdir, gpus):
@pytest.mark.parametrize("devices", [1, [0], [1]])
def test_single_gpu_model(tmpdir, devices):
"""Make sure single GPU works (DP mode)."""
trainer_options = dict(
default_root_dir=tmpdir,
enable_progress_bar=False,
max_epochs=1,
limit_train_batches=0.1,
limit_val_batches=0.1,
gpus=gpus,
accelerator="gpu",
devices=devices,
)

model = BoringModel()
Expand Down Expand Up @@ -94,7 +96,7 @@ def device_count():

# Asking for a gpu when non are available will result in a MisconfigurationException
@pytest.mark.parametrize(
["gpus", "expected_root_gpu", "strategy"],
["devices", "expected_root_gpu", "strategy"],
[
(1, None, "ddp"),
(3, None, "ddp"),
Expand All @@ -105,13 +107,13 @@ def device_count():
("-1", None, "ddp"),
],
)
def test_root_gpu_property_0_raising(mocked_device_count_0, gpus, expected_root_gpu, strategy):
def test_root_gpu_property_0_raising(mocked_device_count_0, devices, expected_root_gpu, strategy):
with pytest.raises(MisconfigurationException):
Trainer(gpus=gpus, strategy=strategy)
Trainer(accelerator="gpu", devices=devices, strategy=strategy)


@pytest.mark.parametrize(
["gpus", "expected_root_gpu"],
["devices", "expected_root_gpu"],
[
pytest.param(None, None, id="No gpus, expect gpu root device to be None"),
pytest.param([0], 0, id="Oth gpu, expect gpu root device to be 0."),
Expand All @@ -120,12 +122,12 @@ def test_root_gpu_property_0_raising(mocked_device_count_0, gpus, expected_root_
pytest.param([1, 2], 1, id="[1, 2] gpus, expect gpu root device to be 1."),
],
)
def test_determine_root_gpu_device(gpus, expected_root_gpu):
assert device_parser.determine_root_gpu_device(gpus) == expected_root_gpu
def test_determine_root_gpu_device(devices, expected_root_gpu):
assert device_parser.determine_root_gpu_device(devices) == expected_root_gpu


@pytest.mark.parametrize(
["gpus", "expected_gpu_ids"],
["devices", "expected_gpu_ids"],
[
(None, None),
(0, None),
Expand All @@ -143,31 +145,31 @@ def test_determine_root_gpu_device(gpus, expected_root_gpu):
pytest.param("-1", list(range(PRETEND_N_OF_GPUS)), id="'-1' - use all gpus"),
],
)
def test_parse_gpu_ids(mocked_device_count, gpus, expected_gpu_ids):
assert device_parser.parse_gpu_ids(gpus) == expected_gpu_ids
def test_parse_gpu_ids(mocked_device_count, devices, expected_gpu_ids):
assert device_parser.parse_gpu_ids(devices) == expected_gpu_ids


@pytest.mark.parametrize("gpus", [0.1, -2, False, [-1], [None], ["0"], [0, 0]])
def test_parse_gpu_fail_on_unsupported_inputs(mocked_device_count, gpus):
@pytest.mark.parametrize("devices", [0.1, -2, False, [-1], [None], ["0"], [0, 0]])
def test_parse_gpu_fail_on_unsupported_inputs(mocked_device_count, devices):
with pytest.raises(MisconfigurationException):
device_parser.parse_gpu_ids(gpus)
device_parser.parse_gpu_ids(devices)


@pytest.mark.parametrize("gpus", [[1, 2, 19], -1, "-1"])
def test_parse_gpu_fail_on_non_existent_id(mocked_device_count_0, gpus):
@pytest.mark.parametrize("devices", [[1, 2, 19], -1, "-1"])
def test_parse_gpu_fail_on_non_existent_id(mocked_device_count_0, devices):
with pytest.raises(MisconfigurationException):
device_parser.parse_gpu_ids(gpus)
device_parser.parse_gpu_ids(devices)


def test_parse_gpu_fail_on_non_existent_id_2(mocked_device_count):
with pytest.raises(MisconfigurationException):
device_parser.parse_gpu_ids([1, 2, 19])


@pytest.mark.parametrize("gpus", [-1, "-1"])
def test_parse_gpu_returns_none_when_no_devices_are_available(mocked_device_count_0, gpus):
@pytest.mark.parametrize("devices", [-1, "-1"])
def test_parse_gpu_returns_none_when_no_devices_are_available(mocked_device_count_0, devices):
with pytest.raises(MisconfigurationException):
device_parser.parse_gpu_ids(gpus)
device_parser.parse_gpu_ids(devices)


@mock.patch.dict(
Expand Down Expand Up @@ -198,7 +200,7 @@ def test_torchelastic_gpu_parsing(mocked_device_count, mocked_is_available, gpus

@RunIf(min_gpus=1)
def test_single_gpu_batch_parse():
trainer = Trainer(gpus=1)
trainer = Trainer(accelerator="gpu", devices=1)

# non-transferrable types
primitive_objects = [None, {}, [], 1.0, "x", [None, 2], {"x": (1, 2), "y": None}]
Expand Down
6 changes: 2 additions & 4 deletions tests/models/test_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,8 @@ def train_dataloader(self):
max_epochs=1,
enable_model_summary=False,
strategy="ddp",
gpus=2,
accelerator="gpu",
devices=2,
)
trainer.fit(model)

Expand Down Expand Up @@ -553,7 +554,6 @@ def training_step(self, batch, batch_idx):
dict(name="training_epoch_end", args=([dict(loss=ANY)] * train_batches,)),
dict(name="Callback.on_train_epoch_end", args=(trainer, model)),
# `ModelCheckpoint.save_checkpoint` is called here from `Callback.on_train_epoch_end`
dict(name="Callback.state_dict"),
dict(name="Callback.on_save_checkpoint", args=(trainer, model, saved_ckpt)),
dict(name="on_save_checkpoint", args=(saved_ckpt,)),
dict(name="on_train_epoch_end"),
Expand Down Expand Up @@ -627,7 +627,6 @@ def test_trainer_model_hook_system_fit_no_val_and_resume(tmpdir):
dict(name="setup", kwargs=dict(stage="fit")),
dict(name="on_load_checkpoint", args=(loaded_ckpt,)),
dict(name="Callback.on_load_checkpoint", args=(trainer, model, {"foo": True})),
dict(name="Callback.load_state_dict", args=({"foo": True},)),
dict(name="configure_sharded_model"),
dict(name="Callback.on_configure_sharded_model", args=(trainer, model)),
dict(name="configure_optimizers"),
Expand All @@ -649,7 +648,6 @@ def test_trainer_model_hook_system_fit_no_val_and_resume(tmpdir):
*model._train_batch(trainer, model, steps_after_reload, current_batch=1, current_epoch=1),
dict(name="training_epoch_end", args=([dict(loss=ANY)] * train_batches,)),
dict(name="Callback.on_train_epoch_end", args=(trainer, model)),
dict(name="Callback.state_dict"),
dict(name="Callback.on_save_checkpoint", args=(trainer, model, saved_ckpt)),
dict(name="on_save_checkpoint", args=(saved_ckpt,)),
dict(name="on_train_epoch_end"),
Expand Down
3 changes: 2 additions & 1 deletion tests/models/test_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ def test_model_saves_on_multi_gpu(tmpdir):
max_epochs=1,
limit_train_batches=10,
limit_val_batches=10,
gpus=[0, 1],
accelerator="gpu",
devices=[0, 1],
strategy="ddp_spawn",
enable_progress_bar=False,
)
Expand Down
8 changes: 5 additions & 3 deletions tests/models/test_restore.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,8 @@ def test_running_test_pretrained_model_distrib_dp(tmpdir):
limit_val_batches=5,
callbacks=[checkpoint],
logger=logger,
gpus=[0, 1],
accelerator="gpu",
devices=[0, 1],
strategy="dp",
default_root_dir=tmpdir,
)
Expand Down Expand Up @@ -445,7 +446,8 @@ def test_running_test_pretrained_model_distrib_ddp_spawn(tmpdir):
limit_val_batches=2,
callbacks=[checkpoint],
logger=logger,
gpus=[0, 1],
accelerator="gpu",
devices=[0, 1],
strategy="ddp_spawn",
default_root_dir=tmpdir,
)
Expand Down Expand Up @@ -564,7 +566,7 @@ def test_dp_resume(tmpdir):
model = CustomClassificationModelDP(lr=0.1)
dm = ClassifDataModule()

trainer_options = dict(max_epochs=1, gpus=2, strategy="dp", default_root_dir=tmpdir)
trainer_options = dict(max_epochs=1, accelerator="gpu", devices=2, strategy="dp", default_root_dir=tmpdir)

# get logger
logger = tutils.get_default_logger(tmpdir)
Expand Down
8 changes: 4 additions & 4 deletions tests/models/test_tpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def val_dataloader(self):

@RunIf(tpu=True)
@pl_multi_process_test
def test_model_tpu_cores_1(tmpdir):
def test_model_devices_1(tmpdir):
"""Make sure model trains on TPU."""
tutils.reset_seed()
trainer_options = dict(
Expand Down Expand Up @@ -90,7 +90,7 @@ def test_model_tpu_index(tmpdir, tpu_core):

@RunIf(tpu=True)
@pl_multi_process_test
def test_model_tpu_cores_8(tmpdir):
def test_model_devices_8(tmpdir):
"""Make sure model trains on TPU."""
tutils.reset_seed()
trainer_options = dict(
Expand All @@ -110,7 +110,7 @@ def test_model_tpu_cores_8(tmpdir):

@RunIf(tpu=True)
@pl_multi_process_test
def test_model_16bit_tpu_cores_1(tmpdir):
def test_model_16bit_devices_1(tmpdir):
"""Make sure model trains on TPU."""
tutils.reset_seed()
trainer_options = dict(
Expand Down Expand Up @@ -152,7 +152,7 @@ def test_model_16bit_tpu_index(tmpdir, tpu_core):

@RunIf(tpu=True)
@pl_multi_process_test
def test_model_16bit_tpu_cores_8(tmpdir):
def test_model_16bit_devices_8(tmpdir):
"""Make sure model trains on TPU."""
tutils.reset_seed()
trainer_options = dict(
Expand Down