diff --git a/CHANGELOG.md b/CHANGELOG.md index 927d71bf9c53f..c189e6ce006c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -547,6 +547,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Deprecated `Trainer.num_processes` in favor of `Trainer.num_devices` ([#12388](https://github.com/PyTorchLightning/pytorch-lightning/pull/12388)) +- Deprecated `Trainer.data_parallel_device_ids` in favor of `Trainer.device_ids` ([#12072](https://github.com/PyTorchLightning/pytorch-lightning/pull/12072)) + + ### Removed - Removed deprecated parameter `method` in `pytorch_lightning.utilities.model_helpers.is_overridden` ([#10507](https://github.com/PyTorchLightning/pytorch-lightning/pull/10507)) @@ -749,6 +752,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Removed `AcceleratorConnector.num_processes` property ([#12388](https://github.com/PyTorchLightning/pytorch-lightning/pull/12388)) +- Removed `AcceleratorConnector.parallel_device_ids` property ([#12072](https://github.com/PyTorchLightning/pytorch-lightning/pull/12072)) + + ### Fixed - Fixed an issue where `ModelCheckpoint` could delete older checkpoints when `dirpath` has changed during resumed training ([#12045](https://github.com/PyTorchLightning/pytorch-lightning/pull/12045)) diff --git a/pytorch_lightning/callbacks/gpu_stats_monitor.py b/pytorch_lightning/callbacks/gpu_stats_monitor.py index 8fb92006708f7..607dc4cf0efbd 100644 --- a/pytorch_lightning/callbacks/gpu_stats_monitor.py +++ b/pytorch_lightning/callbacks/gpu_stats_monitor.py @@ -133,8 +133,7 @@ def setup(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", stage: O ) # The logical device IDs for selected devices - # ignoring mypy check because `trainer.data_parallel_device_ids` is None when using CPU - self._device_ids = sorted(set(trainer.data_parallel_device_ids)) # type: ignore + self._device_ids = sorted(set(trainer.device_ids)) # The unmasked real GPU IDs self._gpu_ids = self._get_gpu_ids(self._device_ids) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index aec44e4989d1a..0d2013c1606cf 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -802,10 +802,6 @@ def tpu_cores(self) -> Optional[Union[List[int], int]]: def gpus(self) -> Optional[Union[List[int], str, int]]: return self._gpus - @property - def parallel_device_ids(self) -> List[int]: - return [i for i in range(len(self.parallel_devices))] if isinstance(self.accelerator, GPUAccelerator) else [] - @property def is_distributed(self) -> bool: # Used for custom plugins. diff --git a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py index 8d9f6e1210d41..6aa8b355fdb28 100644 --- a/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py +++ b/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py @@ -221,7 +221,7 @@ def _log_gpus_metrics(self) -> None: self.trainer.lightning_module.log(key, mem, prog_bar=False, logger=True) else: gpu_id = int(key.split("/")[0].split(":")[1]) - if gpu_id in self.trainer._accelerator_connector.parallel_device_ids: + if gpu_id in self.trainer.device_ids: self.trainer.lightning_module.log( key, mem, prog_bar=False, logger=True, on_step=True, on_epoch=False ) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 287ed2f4d1a3d..0a90521c4c545 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -2114,9 +2114,11 @@ def devices(self) -> int: @property def data_parallel_device_ids(self) -> Optional[List[int]]: - return ( - self._accelerator_connector.parallel_device_ids if self._accelerator_connector.parallel_device_ids else None + rank_zero_deprecation( + "`Trainer.data_parallel_device_ids` was deprecated in v1.6 and will be removed in v1.8." + " Please use `Trainer.device_ids` instead." ) + return self.device_ids if isinstance(self.accelerator, GPUAccelerator) else None @property def lightning_module(self) -> "pl.LightningModule": diff --git a/tests/deprecated_api/test_remove_1-8.py b/tests/deprecated_api/test_remove_1-8.py index 32e89f62c65d0..09a2973143337 100644 --- a/tests/deprecated_api/test_remove_1-8.py +++ b/tests/deprecated_api/test_remove_1-8.py @@ -1027,3 +1027,30 @@ def test_trainer_num_processes(monkeypatch, trainer_kwargs, expected_num_process "Please use `Trainer.num_devices` instead." ): trainer.num_processes == expected_num_processes + + +@pytest.mark.parametrize( + ["trainer_kwargs", "expected_data_parallel_device_ids"], + [ + ({}, None), + ({"devices": 1}, None), + ({"devices": "1"}, None), + ({"accelerator": "gpu", "devices": 1}, [0]), + ({"accelerator": "gpu", "devices": 2}, [0, 1]), + ({"accelerator": "gpu", "devices": [1]}, [1]), + ({"accelerator": "gpu", "devices": "0"}, None), + ({"accelerator": "gpu", "devices": "0,"}, [0]), + ], +) +def test_trainer_data_parallel_device_ids(monkeypatch, trainer_kwargs, expected_data_parallel_device_ids): + """Test multi type argument with bool.""" + if trainer_kwargs.get("accelerator") == "gpu": + monkeypatch.setattr(torch.cuda, "is_available", lambda: True) + monkeypatch.setattr(torch.cuda, "device_count", lambda: 2) + + trainer = Trainer(**trainer_kwargs) + with pytest.deprecated_call( + match="`Trainer.data_parallel_device_ids` was deprecated in v1.6 and will be removed in v1.8." + " Please use `Trainer.device_ids` instead." + ): + assert trainer.data_parallel_device_ids == expected_data_parallel_device_ids diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index 719cfb43024b1..306604737db53 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -184,14 +184,16 @@ def test_parse_gpu_returns_none_when_no_devices_are_available(mocked_device_coun ) @mock.patch("torch.cuda.device_count", return_value=1) @mock.patch("torch.cuda.is_available", return_value=True) -@pytest.mark.parametrize("gpus", [[0, 1, 2], 2, "0"]) +@pytest.mark.parametrize("gpus", [[0, 1, 2], 2, "0", [0, 2]]) def test_torchelastic_gpu_parsing(mocked_device_count, mocked_is_available, gpus): """Ensure when using torchelastic and nproc_per_node is set to the default of 1 per GPU device That we omit sanitizing the gpus as only one of the GPUs is visible.""" trainer = Trainer(gpus=gpus) assert isinstance(trainer._accelerator_connector.cluster_environment, TorchElasticEnvironment) - assert trainer.data_parallel_device_ids == device_parser.parse_gpu_ids(gpus) assert trainer.gpus == gpus + # when use gpu + if device_parser.parse_gpu_ids(gpus) is not None: + assert trainer.device_ids == device_parser.parse_gpu_ids(gpus) @RunIf(min_gpus=1) diff --git a/tests/trainer/test_trainer_cli.py b/tests/trainer/test_trainer_cli.py index 11b904870294b..37a02e895d560 100644 --- a/tests/trainer/test_trainer_cli.py +++ b/tests/trainer/test_trainer_cli.py @@ -161,10 +161,10 @@ def test_argparse_args_parsing_fast_dev_run(cli_args, expected): @pytest.mark.parametrize( - ["cli_args", "expected_parsed", "expected_device_ids"], - [("", None, None), ("--accelerator gpu --devices 1", "1", [0]), ("--accelerator gpu --devices 0,", "0,", [0])], + ["cli_args", "expected_parsed"], + [("", None), ("--accelerator gpu --devices 1", "1"), ("--accelerator gpu --devices 0,", "0,")], ) -def test_argparse_args_parsing_devices(cli_args, expected_parsed, expected_device_ids, monkeypatch): +def test_argparse_args_parsing_devices(cli_args, expected_parsed, monkeypatch): """Test multi type argument with bool.""" monkeypatch.setattr(torch.cuda, "is_available", lambda: True) @@ -177,8 +177,7 @@ def test_argparse_args_parsing_devices(cli_args, expected_parsed, expected_devic args = Trainer.parse_argparser(parser) assert args.devices == expected_parsed - trainer = Trainer.from_argparse_args(args) - assert trainer.data_parallel_device_ids == expected_device_ids + assert Trainer.from_argparse_args(args) @pytest.mark.parametrize( diff --git a/tests/utilities/test_cli.py b/tests/utilities/test_cli.py index 5da16737fc2d7..8afd831901ba7 100644 --- a/tests/utilities/test_cli.py +++ b/tests/utilities/test_cli.py @@ -180,7 +180,9 @@ def test_parse_args_parsing_complex_types(cli_args, expected, instantiate): assert Trainer.from_argparse_args(args) -@pytest.mark.parametrize(["cli_args", "expected_gpu"], [("--gpus 1", [0]), ("--gpus 0,", [0]), ("--gpus 0,1", [0, 1])]) +@pytest.mark.parametrize( + ["cli_args", "expected_gpu"], [("--gpus 1", [0]), ("--gpus 0,", [0]), ("--gpus 1,", [1]), ("--gpus 0,1", [0, 1])] +) def test_parse_args_parsing_gpus(monkeypatch, cli_args, expected_gpu): """Test parsing of gpus and instantiation of Trainer.""" monkeypatch.setattr("torch.cuda.device_count", lambda: 2) @@ -192,7 +194,7 @@ def test_parse_args_parsing_gpus(monkeypatch, cli_args, expected_gpu): args = parser.parse_args() trainer = Trainer.from_argparse_args(args) - assert trainer.data_parallel_device_ids == expected_gpu + assert trainer.device_ids == expected_gpu @pytest.mark.skipif(