@@ -135,7 +135,7 @@ def __init__(
135
135
self .precision = precision
136
136
self .amp_type = amp_type .lower () if isinstance (amp_type , str ) else None
137
137
self .amp_level = amp_level
138
- self .is_slurm_managing_tasks = False
138
+ self ._is_slurm_managing_tasks = False
139
139
140
140
self ._precision_plugin : Optional [PrecisionPlugin ] = None
141
141
self ._training_type_plugin : Optional [TrainingTypePlugin ] = None
@@ -164,7 +164,7 @@ def __init__(
164
164
self ._set_training_type_plugin ()
165
165
else :
166
166
self .set_distributed_mode ()
167
- self .configure_slurm_ddp ()
167
+ self ._configure_slurm_ddp ()
168
168
169
169
self .handle_given_plugins ()
170
170
self .update_device_type_if_ipu_plugin ()
@@ -685,15 +685,15 @@ def select_training_type_plugin(self) -> TrainingTypePlugin:
685
685
cluster_environment = self .select_cluster_environment (), parallel_devices = self .parallel_devices
686
686
)
687
687
elif self .use_ddp :
688
- use_slurm_ddp = self .use_ddp and self .is_slurm_managing_tasks
688
+ use_slurm_ddp = self .use_ddp and self ._is_slurm_managing_tasks
689
689
use_torchelastic_ddp = self .use_ddp and TorchElasticEnvironment .is_using_torchelastic ()
690
690
use_kubeflow_ddp = self .use_ddp and KubeflowEnvironment .is_using_kubeflow ()
691
691
use_ddp_spawn = self ._distrib_type == DistributedType .DDP_SPAWN
692
692
use_ddp_cpu_spawn = use_ddp_spawn and self .use_cpu
693
693
use_tpu_spawn = self .use_tpu and self ._distrib_type == DistributedType .TPU_SPAWN
694
694
use_ddp_cpu_torch_elastic = use_ddp_cpu_spawn and TorchElasticEnvironment .is_using_torchelastic ()
695
695
use_ddp_cpu_kubeflow = use_ddp_cpu_spawn and KubeflowEnvironment .is_using_kubeflow ()
696
- use_ddp_cpu_slurm = use_ddp_cpu_spawn and self .is_slurm_managing_tasks
696
+ use_ddp_cpu_slurm = use_ddp_cpu_spawn and self ._is_slurm_managing_tasks
697
697
use_ddp_sharded = self ._distrib_type == DistributedType .DDP_SHARDED
698
698
use_ddp_sharded_spawn = self ._distrib_type == DistributedType .DDP_SHARDED_SPAWN
699
699
use_ddp_fully_sharded = self ._distrib_type == DistributedType .DDP_FULLY_SHARDED
@@ -789,7 +789,7 @@ def select_accelerator(self) -> Accelerator:
789
789
def select_cluster_environment (self ) -> ClusterEnvironment :
790
790
if self ._cluster_environment is not None :
791
791
return self ._cluster_environment
792
- if self .is_slurm_managing_tasks :
792
+ if self ._is_slurm_managing_tasks :
793
793
env = SLURMEnvironment ()
794
794
elif TorchElasticEnvironment .is_using_torchelastic ():
795
795
env = TorchElasticEnvironment ()
@@ -972,7 +972,27 @@ def update_device_type_if_training_type_plugin_passed(self) -> None:
972
972
elif self .has_gpu :
973
973
self ._device_type = DeviceType .GPU
974
974
975
- def configure_slurm_ddp (self ):
975
+ @property
976
+ def is_slurm_managing_tasks (self ) -> bool :
977
+ rank_zero_deprecation (
978
+ "`AcceleratorConnector.is_slurm_managing_tasks` was deprecated in v1.5 and will be removed in v1.6."
979
+ )
980
+ return self ._is_slurm_managing_tasks
981
+
982
+ @is_slurm_managing_tasks .setter
983
+ def is_slurm_managing_tasks (self , value : bool ) -> bool :
984
+ rank_zero_deprecation (
985
+ "`AcceleratorConnector.is_slurm_managing_tasks` was deprecated in v1.5 and will be removed in v1.6."
986
+ )
987
+ self ._is_slurm_managing_tasks = value
988
+
989
+ def configure_slurm_ddp (self ) -> None :
990
+ rank_zero_deprecation (
991
+ "`AcceleratorConnector.configure_slurm_ddp()` was deprecated in v1.5 and will be removed in v1.6."
992
+ )
993
+ self ._configure_slurm_ddp ()
994
+
995
+ def _configure_slurm_ddp (self ):
976
996
# extract SLURM flag vars
977
997
# whenever we have the correct number of tasks, we let slurm manage processes
978
998
# otherwise we launch the required number of processes
@@ -981,21 +1001,21 @@ def configure_slurm_ddp(self):
981
1001
num_slurm_tasks = 0
982
1002
try :
983
1003
num_slurm_tasks = int (os .environ ["SLURM_NTASKS" ])
984
- self .is_slurm_managing_tasks = num_slurm_tasks == num_requested_gpus
1004
+ self ._is_slurm_managing_tasks = num_slurm_tasks == num_requested_gpus
985
1005
986
1006
# enable slurm cpu
987
1007
if num_requested_gpus == 0 :
988
- self .is_slurm_managing_tasks = num_slurm_tasks == self .num_processes
1008
+ self ._is_slurm_managing_tasks = num_slurm_tasks == self .num_processes
989
1009
990
1010
# in interactive mode we don't manage tasks
991
1011
job_name = os .environ ["SLURM_JOB_NAME" ]
992
1012
if job_name == "bash" :
993
- self .is_slurm_managing_tasks = False
1013
+ self ._is_slurm_managing_tasks = False
994
1014
995
1015
except Exception :
996
1016
# likely not on slurm, so set the slurm managed flag to false
997
- self .is_slurm_managing_tasks = False
1017
+ self ._is_slurm_managing_tasks = False
998
1018
999
1019
# notify user the that slurm is managing tasks
1000
- if self .is_slurm_managing_tasks :
1020
+ if self ._is_slurm_managing_tasks :
1001
1021
rank_zero_info ("Multi-processing is handled by Slurm." )
0 commit comments