Skip to content

Update accelerator connector messages after the addition of strategy #9937

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Oct 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 26 additions & 33 deletions pytorch_lightning/trainer/connectors/accelerator_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,12 +240,13 @@ def _validate_accelerator_and_devices(self) -> None:
raise MisconfigurationException(
f"You passed `devices={self.devices}` but haven't specified"
" `accelerator=('auto'|'tpu'|'gpu'|'ipu'|'cpu')` for the devices mapping,"
f" got `accelerator={self.distributed_backend}`."
f" got `accelerator={self.distributed_backend!r}`."
)

def _validate_accelerator_type(self) -> None:
if self._accelerator_type and self._accelerator_type != self._device_type:
raise MisconfigurationException(
# internal error: should not happen.
raise ValueError(
f"Mismatch between the requested accelerator type ({self._accelerator_type})"
f" and assigned device type ({self._device_type})."
)
Expand All @@ -255,25 +256,16 @@ def _warn_if_devices_flag_ignored(self) -> None:
if self.devices is None:
return
devices_warning = f"The flag `devices={self.devices}` will be ignored, as you have set"
if self.distributed_backend == "auto":
if self.distributed_backend in ("auto", DeviceType.TPU):
if self.tpu_cores is not None:
rank_zero_warn(f"{devices_warning} `tpu_cores={self.tpu_cores}`")
elif self.ipus is not None:
rank_zero_warn(f"{devices_warning} `ipus={self.ipus}`")
elif self.gpus is not None:
rank_zero_warn(f"{devices_warning} `gpus={self.gpus}`")
elif self.num_processes != 1:
rank_zero_warn(f"{devices_warning} `num_processes={self.num_processes}`")
elif self.distributed_backend == DeviceType.TPU:
if self.tpu_cores is not None:
rank_zero_warn(f"{devices_warning} `tpu_cores={self.tpu_cores}`")
elif self.distributed_backend == DeviceType.IPU:
elif self.distributed_backend in ("auto", DeviceType.IPU):
if self.ipus is not None:
rank_zero_warn(f"{devices_warning} `ipus={self.ipus}`")
elif self.distributed_backend == DeviceType.GPU:
elif self.distributed_backend in ("auto", DeviceType.GPU):
if self.gpus is not None:
rank_zero_warn(f"{devices_warning} `gpus={self.gpus}`")
elif self.distributed_backend == DeviceType.CPU:
elif self.distributed_backend in ("auto", DeviceType.CPU):
if self.num_processes != 1:
rank_zero_warn(f"{devices_warning} `num_processes={self.num_processes}`")

Expand All @@ -294,26 +286,27 @@ def _handle_accelerator_and_distributed_backend(
) -> None:
if distributed_backend is not None:
rank_zero_deprecation(
f"`Trainer(distributed_backend={distributed_backend})` has been deprecated and will be removed in v1.5."
f" Use `Trainer(strategy={distributed_backend})` instead."
f"`Trainer(distributed_backend={distributed_backend!r})` "
"has been deprecated and will be removed in v1.5."
f" Use `Trainer(strategy={distributed_backend!r})` instead."
)
if self.strategy is not None:
raise MisconfigurationException(
f"You have passed `Trainer(strategy={self.strategy})` but have"
f" also passed `Trainer(distributed_backend={distributed_backend})`."
f"HINT: Use just `Trainer(strategy={self.strategy})` instead."
f"You have passed `Trainer(strategy={self.strategy!r})` but have"
f" also passed `Trainer(distributed_backend={distributed_backend!r})`."
f" HINT: Use just `Trainer(strategy={self.strategy!r})` instead."
)

if accelerator is not None and accelerator in list(DistributedType):
rank_zero_deprecation(
f"Passing {accelerator} `strategy` to the `accelerator` flag in Trainer has been deprecated"
f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={accelerator})` instead."
f"Passing `Trainer(accelerator={accelerator!r})` has been deprecated"
f" in v1.5 and will be removed in v1.7. Use `Trainer(strategy={accelerator!r})` instead."
)
if self.strategy is not None:
raise MisconfigurationException(
f"You have passed `Trainer(strategy={self.strategy})` but have"
f" also passed `Trainer(accelerator={accelerator})`."
f"HINT: Use just `Trainer(strategy={self.strategy})` instead."
f"You have passed `Trainer(strategy={self.strategy!r})` but have"
f" also passed `Trainer(accelerator={accelerator!r})`."
f" HINT: Use just `Trainer(strategy={self.strategy!r})` instead."
)

def _set_training_type_plugin(self) -> None:
Expand All @@ -329,7 +322,7 @@ def handle_given_plugins(self) -> None:
for plug in self.plugins:
if self.strategy is not None and self._is_plugin_training_type(plug):
raise MisconfigurationException(
f"You have passed `Trainer(strategy={self.strategy})`"
f"You have passed `Trainer(strategy={self.strategy!r})`"
f" and you can only specify one training type plugin, but you have passed {plug} as a plugin."
)
if self._is_plugin_training_type(plug):
Expand Down Expand Up @@ -503,7 +496,7 @@ def _map_devices_to_accelerator(self, accelerator: str) -> bool:
if accelerator == DeviceType.CPU:
if not isinstance(self.devices, int):
raise MisconfigurationException(
"The flag `devices` only supports integer for `accelerator='cpu'`,"
"The flag `devices` must be an int with `accelerator='cpu'`,"
f" got `devices={self.devices}` instead."
)
self.num_processes = self.devices
Expand Down Expand Up @@ -816,7 +809,7 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
elif self.num_gpus > 1 and not _use_cpu:
rank_zero_warn(
"You requested multiple GPUs but did not specify a backend, e.g."
' `Trainer(accelerator="dp"|"ddp"|"ddp2")`. Setting `accelerator="ddp_spawn"` for you.'
' `Trainer(strategy="dp"|"ddp"|"ddp2")`. Setting `strategy="ddp_spawn"` for you.'
)
self.distributed_backend = DistributedType.DDP_SPAWN

Expand All @@ -833,7 +826,7 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
self._distrib_type = DistributedType.DDP_SPAWN
if self.num_gpus > 0:
rank_zero_warn(
"You requested one or more GPUs, but set the backend to `ddp_cpu`. Training will not use GPUs."
"You requested one or more GPUs, but set `accelerator='ddp_cpu'`. Training will not use GPUs."
)
self.parallel_device_ids = None
if self.num_processes is None:
Expand All @@ -859,7 +852,7 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
if (self.num_nodes and self.num_nodes > 1) or (self.num_processes and self.num_processes > 1):
if self._distrib_type in (DistributedType.DP, DistributedType.DDP2):
rank_zero_warn(
f"{self._distrib_type} is not supported on CPUs, hence setting the distributed type to `ddp`."
f"{self._distrib_type.value!r} is not supported on CPUs, hence setting `strategy='ddp'`."
)
self._distrib_type = DistributedType.DDP
else:
Expand Down Expand Up @@ -887,8 +880,7 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
if self.num_nodes > 1 and not using_valid_distributed:
# throw error to force user to choose a supported distributed type such as ddp or ddp2
raise MisconfigurationException(
"Your chosen distributed type does not support num_nodes > 1. "
"Please set accelerator=ddp or accelerator=ddp2."
"Your chosen strategy does not support `num_nodes > 1`. Please set `strategy=('ddp'|'ddp2')`."
)

def _set_horovod_backend(self):
Expand All @@ -910,7 +902,8 @@ def check_interactive_compatibility(self):

if _IS_INTERACTIVE and self._distrib_type is not None and not self._distrib_type.is_interactive_compatible():
raise MisconfigurationException(
f"Selected distributed backend {self._distrib_type} is not compatible with an interactive"
f"`Trainer(strategy={self._distrib_type.value!r})` or"
f" `Trainer(accelerator={self._distrib_type.value!r})` is not compatible with an interactive"
" environment. Run your code as a script, or choose one of the compatible backends:"
f" {', '.join(DistributedType.interactive_compatible_types())}."
" In case you are spawning processes yourself, make sure to include the Trainer"
Expand Down
8 changes: 4 additions & 4 deletions tests/accelerators/test_accelerator_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,10 +447,10 @@ def on_fit_start(self, trainer, pl_module):
@mock.patch("pytorch_lightning.utilities._IS_INTERACTIVE", return_value=True)
@mock.patch("torch.cuda.device_count", return_value=2)
def test_ipython_incompatible_backend_error(*_):
with pytest.raises(MisconfigurationException, match="backend ddp is not compatible"):
with pytest.raises(MisconfigurationException, match=r"strategy='ddp'\)`.*is not compatible"):
Trainer(accelerator="ddp", gpus=2)

with pytest.raises(MisconfigurationException, match="backend ddp2 is not compatible"):
with pytest.raises(MisconfigurationException, match=r"strategy='ddp2'\)`.*is not compatible"):
Trainer(accelerator="ddp2", gpus=2)


Expand Down Expand Up @@ -615,14 +615,14 @@ def test_set_devices_if_none_gpu():

def test_devices_with_cpu_only_supports_integer():

with pytest.raises(MisconfigurationException, match="The flag `devices` only supports integer"):
with pytest.raises(MisconfigurationException, match="The flag `devices` must be an int"):
Trainer(accelerator="cpu", devices="1,3")


@pytest.mark.parametrize("training_type", ["ddp2", "dp"])
def test_unsupported_distrib_types_on_cpu(training_type):

with pytest.warns(UserWarning, match="is not supported on CPUs, hence setting the distributed type to `ddp`."):
with pytest.warns(UserWarning, match="is not supported on CPUs, hence setting `strategy='ddp"):
trainer = Trainer(accelerator=training_type, num_processes=2)

assert trainer._distrib_type == DistributedType.DDP
Expand Down
1 change: 0 additions & 1 deletion tests/accelerators/test_tpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,6 @@ def on_train_end(self, trainer, pl_module):

@RunIf(tpu=True)
def test_ddp_cpu_not_supported_on_tpus():

with pytest.raises(MisconfigurationException, match="`accelerator='ddp_cpu'` is not supported on TPU machines"):
Trainer(accelerator="ddp_cpu")

Expand Down