From cffe53a370e0ca638795a40495fa10134c1200c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sun, 24 Oct 2021 17:07:51 +0200 Subject: [PATCH 1/9] deprecate property and configure method --- .../connectors/accelerator_connector.py | 32 +++++++++++++------ .../test_accelerator_connector.py | 12 +++---- tests/models/test_restore.py | 2 +- 3 files changed, 29 insertions(+), 17 deletions(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index 8369130778f5a..b28b2e7a982a4 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -135,7 +135,7 @@ def __init__( self.precision = precision self.amp_type = amp_type.lower() if isinstance(amp_type, str) else None self.amp_level = amp_level - self.is_slurm_managing_tasks = False + self._is_slurm_managing_tasks = False self._precision_plugin: Optional[PrecisionPlugin] = None self._training_type_plugin: Optional[TrainingTypePlugin] = None @@ -690,7 +690,7 @@ def select_training_type_plugin(self) -> TrainingTypePlugin: cluster_environment=self.select_cluster_environment(), parallel_devices=self.parallel_devices ) elif self.use_ddp: - use_slurm_ddp = self.use_ddp and self.is_slurm_managing_tasks + use_slurm_ddp = self.use_ddp and self._is_slurm_managing_tasks use_torchelastic_ddp = self.use_ddp and TorchElasticEnvironment.is_using_torchelastic() use_kubeflow_ddp = self.use_ddp and KubeflowEnvironment.is_using_kubeflow() use_ddp_spawn = self._distrib_type == DistributedType.DDP_SPAWN @@ -698,7 +698,7 @@ def select_training_type_plugin(self) -> TrainingTypePlugin: use_tpu_spawn = self.use_tpu and self._distrib_type == DistributedType.TPU_SPAWN use_ddp_cpu_torch_elastic = use_ddp_cpu_spawn and TorchElasticEnvironment.is_using_torchelastic() use_ddp_cpu_kubeflow = use_ddp_cpu_spawn and KubeflowEnvironment.is_using_kubeflow() - use_ddp_cpu_slurm = use_ddp_cpu_spawn and self.is_slurm_managing_tasks + use_ddp_cpu_slurm = use_ddp_cpu_spawn and self._is_slurm_managing_tasks use_ddp_sharded = self._distrib_type == DistributedType.DDP_SHARDED use_ddp_sharded_spawn = self._distrib_type == DistributedType.DDP_SHARDED_SPAWN use_ddp_fully_sharded = self._distrib_type == DistributedType.DDP_FULLY_SHARDED @@ -794,7 +794,7 @@ def select_accelerator(self) -> Accelerator: def select_cluster_environment(self) -> ClusterEnvironment: if self._cluster_environment is not None: return self._cluster_environment - if self.is_slurm_managing_tasks: + if self._is_slurm_managing_tasks: env = SLURMEnvironment() elif TorchElasticEnvironment.is_using_torchelastic(): env = TorchElasticEnvironment() @@ -977,7 +977,19 @@ def update_device_type_if_training_type_plugin_passed(self) -> None: elif self.has_gpu: self._device_type = DeviceType.GPU + @property + def is_slurm_managing_tasks(self) -> bool: + rank_zero_deprecation( + "`AcceleratorConnector.is_slurm_managing_tasks` was deprecated in v1.5 and will be removed in v1.6." + ) + return self._is_slurm_managing_tasks + def configure_slurm_ddp(self): + rank_zero_deprecation( + "`AcceleratorConnector.configure_slurm_ddp()` was deprecated in v1.5 and will be removed in v1.6." + ) + + def _configure_slurm_ddp(self): # extract SLURM flag vars # whenever we have the correct number of tasks, we let slurm manage processes # otherwise we launch the required number of processes @@ -986,29 +998,29 @@ def configure_slurm_ddp(self): num_slurm_tasks = 0 try: num_slurm_tasks = int(os.environ["SLURM_NTASKS"]) - self.is_slurm_managing_tasks = num_slurm_tasks == num_requested_gpus + self._is_slurm_managing_tasks = num_slurm_tasks == num_requested_gpus # enable slurm cpu if num_requested_gpus == 0: - self.is_slurm_managing_tasks = num_slurm_tasks == self.num_processes + self._is_slurm_managing_tasks = num_slurm_tasks == self.num_processes # in interactive mode we don't manage tasks job_name = os.environ["SLURM_JOB_NAME"] if job_name == "bash": - self.is_slurm_managing_tasks = False + self._is_slurm_managing_tasks = False except Exception: # likely not on slurm, so set the slurm managed flag to false - self.is_slurm_managing_tasks = False + self._is_slurm_managing_tasks = False # used for tests only, set this flag to simulate slurm managing a task try: should_fake = int(os.environ["FAKE_SLURM_MANAGING_TASKS"]) if should_fake: - self.is_slurm_managing_tasks = True + self._is_slurm_managing_tasks = True except Exception: pass # notify user the that slurm is managing tasks - if self.is_slurm_managing_tasks: + if self._is_slurm_managing_tasks: rank_zero_info("Multi-processing is handled by Slurm.") diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index ca91b61fa121d..78189def69ad5 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -100,7 +100,7 @@ def test_accelerator_choice_ddp_spawn(cuda_available_mock, device_count_mock): def test_accelerator_choice_ddp_slurm(setup_distributed_mock): class CB(Callback): def on_fit_start(self, trainer, pl_module): - assert trainer.accelerator_connector.is_slurm_managing_tasks + assert trainer.accelerator_connector._is_slurm_managing_tasks assert isinstance(trainer.accelerator, GPUAccelerator) assert isinstance(trainer.training_type_plugin, DDPPlugin) assert isinstance(trainer.training_type_plugin.cluster_environment, SLURMEnvironment) @@ -132,7 +132,7 @@ def on_fit_start(self, trainer, pl_module): def test_accelerator_choice_ddp2_slurm(device_count_mock, setup_distributed_mock): class CB(Callback): def on_fit_start(self, trainer, pl_module): - assert trainer.accelerator_connector.is_slurm_managing_tasks + assert trainer.accelerator_connector._is_slurm_managing_tasks assert isinstance(trainer.accelerator, GPUAccelerator) assert isinstance(trainer.training_type_plugin, DDP2Plugin) assert isinstance(trainer.training_type_plugin.cluster_environment, SLURMEnvironment) @@ -307,7 +307,7 @@ def on_fit_start(self, trainer, pl_module): def test_accelerator_choice_ddp_cpu_slurm(device_count_mock, setup_distributed_mock): class CB(Callback): def on_fit_start(self, trainer, pl_module): - assert trainer.accelerator_connector.is_slurm_managing_tasks + assert trainer.accelerator_connector._is_slurm_managing_tasks assert isinstance(trainer.accelerator, CPUAccelerator) assert isinstance(trainer.training_type_plugin, DDPPlugin) assert isinstance(trainer.training_type_plugin.cluster_environment, SLURMEnvironment) @@ -756,7 +756,7 @@ def test_strategy_choice_ddp_spawn(cuda_available_mock, device_count_mock): def test_strategy_choice_ddp_slurm(setup_distributed_mock): class CB(Callback): def on_fit_start(self, trainer, pl_module): - assert trainer.accelerator_connector.is_slurm_managing_tasks + assert trainer.accelerator_connector._is_slurm_managing_tasks assert isinstance(trainer.accelerator, GPUAccelerator) assert isinstance(trainer.training_type_plugin, DDPPlugin) assert isinstance(trainer.training_type_plugin.cluster_environment, SLURMEnvironment) @@ -788,7 +788,7 @@ def on_fit_start(self, trainer, pl_module): def test_strategy_choice_ddp2_slurm(device_count_mock, setup_distributed_mock): class CB(Callback): def on_fit_start(self, trainer, pl_module): - assert trainer.accelerator_connector.is_slurm_managing_tasks + assert trainer.accelerator_connector._is_slurm_managing_tasks assert isinstance(trainer.accelerator, GPUAccelerator) assert isinstance(trainer.training_type_plugin, DDP2Plugin) assert isinstance(trainer.training_type_plugin.cluster_environment, SLURMEnvironment) @@ -963,7 +963,7 @@ def on_fit_start(self, trainer, pl_module): def test_strategy_choice_ddp_cpu_slurm(device_count_mock, setup_distributed_mock): class CB(Callback): def on_fit_start(self, trainer, pl_module): - assert trainer.accelerator_connector.is_slurm_managing_tasks + assert trainer.accelerator_connector._is_slurm_managing_tasks assert isinstance(trainer.accelerator, CPUAccelerator) assert isinstance(trainer.training_type_plugin, DDPPlugin) assert isinstance(trainer.training_type_plugin.cluster_environment, SLURMEnvironment) diff --git a/tests/models/test_restore.py b/tests/models/test_restore.py index 569c861eb32f0..ba886a9459d87 100644 --- a/tests/models/test_restore.py +++ b/tests/models/test_restore.py @@ -502,7 +502,7 @@ def test_dp_resume(tmpdir): # fit model trainer = Trainer(**trainer_options) - trainer.is_slurm_managing_tasks = True + trainer._is_slurm_managing_tasks = True trainer.fit(model, datamodule=dm) # track epoch before saving. Increment since we finished the current epoch, don't want to rerun From 3fadaf64757bef3d29651cb03a84eeaa8ba498e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sun, 24 Oct 2021 17:09:00 +0200 Subject: [PATCH 2/9] test deprecation --- tests/deprecated_api/test_remove_1-7.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/deprecated_api/test_remove_1-7.py b/tests/deprecated_api/test_remove_1-7.py index edf85c11766e3..27eb8288d8df6 100644 --- a/tests/deprecated_api/test_remove_1-7.py +++ b/tests/deprecated_api/test_remove_1-7.py @@ -387,3 +387,15 @@ def test_v1_7_0_deprecate_gpu_stats_monitor(tmpdir): def test_v1_7_0_deprecate_xla_stats_monitor(tmpdir): with pytest.deprecated_call(match="The `XLAStatsMonitor` callback was deprecated in v1.5"): _ = XLAStatsMonitor() + + +def test_v1_7_0_configure_slurm_ddp(): + trainer = Trainer() + with pytest.deprecated_call(match=r"`AcceleratorConnector.configure_slurm_ddp\(\)` was deprecated in v1.6"): + trainer.accelerator_connector.configure_slurm_ddp() + + +def test_v1_7_0_is_slurm_managing_tasks(): + trainer = Trainer() + with pytest.deprecated_call(match=r"`AcceleratorConnector.is_slurm_managing_tasks` was deprecated in v1.6"): + _ = trainer.accelerator_connector.is_slurm_managing_tasks From ef93ba058bf24dd846bc39f5aa149152f8bab6df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sun, 24 Oct 2021 17:15:33 +0200 Subject: [PATCH 3/9] update changelog --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 655484292ee59..409c7bb341f17 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -408,6 +408,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Deprecated `GPUStatsMonitor` and `XLAStatsMonitor` in favor of `DeviceStatsMonitor` callback ([#9924](https://github.com/PyTorchLightning/pytorch-lightning/pull/9924)) + +- Deprecated access to the `AcceleratorConnector.is_slurm_managing_tasks` attribute and marked it as protected ([#10101](https://github.com/PyTorchLightning/pytorch-lightning/pull/10101)) + + +- Deprecated access to the `AcceleratorConnector.configure_slurm_ddp` method and marked it as protected ([#10101](https://github.com/PyTorchLightning/pytorch-lightning/pull/10101)) + + ### Removed - Removed deprecated `metrics` ([#8586](https://github.com/PyTorchLightning/pytorch-lightning/pull/8586/)) From 4c3358ba861eacf9dbc5d2ccac35c13a615ec133 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sun, 24 Oct 2021 17:21:41 +0200 Subject: [PATCH 4/9] return value --- pytorch_lightning/trainer/connectors/accelerator_connector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index b28b2e7a982a4..1b1118733e42c 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -984,7 +984,7 @@ def is_slurm_managing_tasks(self) -> bool: ) return self._is_slurm_managing_tasks - def configure_slurm_ddp(self): + def configure_slurm_ddp(self) -> None: rank_zero_deprecation( "`AcceleratorConnector.configure_slurm_ddp()` was deprecated in v1.5 and will be removed in v1.6." ) From 3140fa466b6096c0d96b286cb610965e98a7c90d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sun, 24 Oct 2021 17:23:43 +0200 Subject: [PATCH 5/9] update test --- tests/deprecated_api/test_remove_1-7.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/deprecated_api/test_remove_1-7.py b/tests/deprecated_api/test_remove_1-7.py index 27eb8288d8df6..2c36d95f1629b 100644 --- a/tests/deprecated_api/test_remove_1-7.py +++ b/tests/deprecated_api/test_remove_1-7.py @@ -391,11 +391,11 @@ def test_v1_7_0_deprecate_xla_stats_monitor(tmpdir): def test_v1_7_0_configure_slurm_ddp(): trainer = Trainer() - with pytest.deprecated_call(match=r"`AcceleratorConnector.configure_slurm_ddp\(\)` was deprecated in v1.6"): + with pytest.deprecated_call(match=r"`AcceleratorConnector.configure_slurm_ddp\(\)` was deprecated in v1.5"): trainer.accelerator_connector.configure_slurm_ddp() def test_v1_7_0_is_slurm_managing_tasks(): trainer = Trainer() - with pytest.deprecated_call(match=r"`AcceleratorConnector.is_slurm_managing_tasks` was deprecated in v1.6"): + with pytest.deprecated_call(match=r"`AcceleratorConnector.is_slurm_managing_tasks` was deprecated in v1.5"): _ = trainer.accelerator_connector.is_slurm_managing_tasks From 176f9ac05400ecd5697c60adf60e2649faed601d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sun, 24 Oct 2021 18:27:57 +0200 Subject: [PATCH 6/9] rename --- pytorch_lightning/trainer/connectors/accelerator_connector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index 1b1118733e42c..a743faa12848a 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -164,7 +164,7 @@ def __init__( self._set_training_type_plugin() else: self.set_distributed_mode() - self.configure_slurm_ddp() + self._configure_slurm_ddp() self.handle_given_plugins() self.update_device_type_if_ipu_plugin() From f831775628e52ce388cfaa8c105196ff20074d67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sun, 24 Oct 2021 22:34:21 +0200 Subject: [PATCH 7/9] move --- tests/deprecated_api/test_remove_1-6.py | 12 ++++++++++++ tests/deprecated_api/test_remove_1-7.py | 12 ------------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py index a5101d3311bf3..7a2235c71554f 100644 --- a/tests/deprecated_api/test_remove_1-6.py +++ b/tests/deprecated_api/test_remove_1-6.py @@ -406,3 +406,15 @@ def test_v1_6_0_deprecated_accelerator_pass_through_functions(): with pytest.deprecated_call(match="will be removed in v1.6"): accelerator.on_train_batch_start(batch=None, batch_idx=0) + + +def test_v1_6_0_configure_slurm_ddp(): + trainer = Trainer() + with pytest.deprecated_call(match=r"`AcceleratorConnector.configure_slurm_ddp\(\)` was deprecated in v1.5"): + trainer.accelerator_connector.configure_slurm_ddp() + + +def test_v1_6_0_is_slurm_managing_tasks(): + trainer = Trainer() + with pytest.deprecated_call(match=r"`AcceleratorConnector.is_slurm_managing_tasks` was deprecated in v1.5"): + _ = trainer.accelerator_connector.is_slurm_managing_tasks diff --git a/tests/deprecated_api/test_remove_1-7.py b/tests/deprecated_api/test_remove_1-7.py index 2c36d95f1629b..edf85c11766e3 100644 --- a/tests/deprecated_api/test_remove_1-7.py +++ b/tests/deprecated_api/test_remove_1-7.py @@ -387,15 +387,3 @@ def test_v1_7_0_deprecate_gpu_stats_monitor(tmpdir): def test_v1_7_0_deprecate_xla_stats_monitor(tmpdir): with pytest.deprecated_call(match="The `XLAStatsMonitor` callback was deprecated in v1.5"): _ = XLAStatsMonitor() - - -def test_v1_7_0_configure_slurm_ddp(): - trainer = Trainer() - with pytest.deprecated_call(match=r"`AcceleratorConnector.configure_slurm_ddp\(\)` was deprecated in v1.5"): - trainer.accelerator_connector.configure_slurm_ddp() - - -def test_v1_7_0_is_slurm_managing_tasks(): - trainer = Trainer() - with pytest.deprecated_call(match=r"`AcceleratorConnector.is_slurm_managing_tasks` was deprecated in v1.5"): - _ = trainer.accelerator_connector.is_slurm_managing_tasks From dc09be2beab3e51899ad2effa73cfa04a80378e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Mon, 25 Oct 2021 13:58:22 +0200 Subject: [PATCH 8/9] Update pytorch_lightning/trainer/connectors/accelerator_connector.py Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> --- pytorch_lightning/trainer/connectors/accelerator_connector.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index a743faa12848a..17c865ad47192 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -988,6 +988,7 @@ def configure_slurm_ddp(self) -> None: rank_zero_deprecation( "`AcceleratorConnector.configure_slurm_ddp()` was deprecated in v1.5 and will be removed in v1.6." ) + self._configure_slurm_ddp() def _configure_slurm_ddp(self): # extract SLURM flag vars From 5244d0349c3fba449907a961ee1995df9c655c63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Mon, 25 Oct 2021 14:01:11 +0200 Subject: [PATCH 9/9] add setter for deprecation --- .../trainer/connectors/accelerator_connector.py | 7 +++++++ tests/deprecated_api/test_remove_1-6.py | 3 +++ 2 files changed, 10 insertions(+) diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index 17c865ad47192..1d2bc1dc680a2 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -984,6 +984,13 @@ def is_slurm_managing_tasks(self) -> bool: ) return self._is_slurm_managing_tasks + @is_slurm_managing_tasks.setter + def is_slurm_managing_tasks(self, value: bool) -> bool: + rank_zero_deprecation( + "`AcceleratorConnector.is_slurm_managing_tasks` was deprecated in v1.5 and will be removed in v1.6." + ) + self._is_slurm_managing_tasks = value + def configure_slurm_ddp(self) -> None: rank_zero_deprecation( "`AcceleratorConnector.configure_slurm_ddp()` was deprecated in v1.5 and will be removed in v1.6." diff --git a/tests/deprecated_api/test_remove_1-6.py b/tests/deprecated_api/test_remove_1-6.py index 7a2235c71554f..fd0984d671b3e 100644 --- a/tests/deprecated_api/test_remove_1-6.py +++ b/tests/deprecated_api/test_remove_1-6.py @@ -418,3 +418,6 @@ def test_v1_6_0_is_slurm_managing_tasks(): trainer = Trainer() with pytest.deprecated_call(match=r"`AcceleratorConnector.is_slurm_managing_tasks` was deprecated in v1.5"): _ = trainer.accelerator_connector.is_slurm_managing_tasks + + with pytest.deprecated_call(match=r"`AcceleratorConnector.is_slurm_managing_tasks` was deprecated in v1.5"): + trainer.accelerator_connector.is_slurm_managing_tasks = False