Skip to content

Mark trainer.accelerator_connector as protected #10032

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 25, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pytorch_lightning/loops/optimization/optimizer_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,7 @@ def _track_and_norm_grad(self, optimizer: torch.optim.Optimizer, opt_idx: int) -
)

# clip gradients
if not self.trainer.accelerator_connector.use_deepspeed:
if not self.trainer._accelerator_connector.use_deepspeed:
self.trainer.lightning_module.configure_gradient_clipping(
optimizer,
opt_idx,
Expand Down
6 changes: 3 additions & 3 deletions pytorch_lightning/plugins/training_type/deepspeed.py
Original file line number Diff line number Diff line change
Expand Up @@ -631,9 +631,9 @@ def _auto_select_batch_size(self):
return batch_size

def _format_precision_config(self):
amp_type = self.lightning_module.trainer.accelerator_connector.amp_type
amp_level = self.lightning_module.trainer.accelerator_connector.amp_level
precision = self.lightning_module.trainer.accelerator_connector.precision
amp_type = self.lightning_module.trainer._accelerator_connector.amp_type
amp_level = self.lightning_module.trainer._accelerator_connector.amp_level
precision = self.lightning_module.trainer._accelerator_connector.precision
if precision in (16, "mixed"):
if "fp16" not in self.config and amp_type == AMPType.NATIVE:
# FP16 is a DeepSpeed standalone AMP implementation
Expand Down
2 changes: 1 addition & 1 deletion pytorch_lightning/trainer/configuration_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def __verify_dp_batch_transfer_support(trainer: "pl.Trainer", model: "pl.Lightni
# TODO: Remove this blocker once batch transfer to device is integrated in Lightning for DP mode.
batch_transfer_hooks = ("on_before_batch_transfer", "transfer_batch_to_device", "on_after_batch_transfer")
for hook in batch_transfer_hooks:
if trainer.accelerator_connector.use_dp and is_overridden(hook, model):
if trainer._accelerator_connector.use_dp and is_overridden(hook, model):
raise MisconfigurationException(f"Overriding `{hook}` is not supported in DP mode.")


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def _log_gpus_metrics(self) -> None:
self.trainer.lightning_module.log(key, mem, prog_bar=False, logger=True)
else:
gpu_id = int(key.split("/")[0].split(":")[1])
if gpu_id in self.trainer.accelerator_connector.parallel_device_ids:
if gpu_id in self.trainer._accelerator_connector.parallel_device_ids:
self.trainer.lightning_module.log(
key, mem, prog_bar=False, logger=True, on_step=True, on_epoch=False
)
Expand Down
8 changes: 4 additions & 4 deletions pytorch_lightning/trainer/data_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def _worker_check(self, dataloader: DataLoader, name: str) -> None:
if not isinstance(dataloader, DataLoader):
return

using_spawn = self.accelerator_connector._distrib_type == DistributedType.DDP_SPAWN
using_spawn = self._accelerator_connector._distrib_type == DistributedType.DDP_SPAWN
num_cpus = multiprocessing.cpu_count()

# ddp_spawn + num_workers > 0 don't mix! tell the user
Expand Down Expand Up @@ -120,8 +120,8 @@ def auto_add_worker_init_fn(self, dataloader: DataLoader) -> None:

def _requires_distributed_sampler(self, dataloader) -> bool:
return (
self.accelerator_connector.replace_sampler_ddp
and self.accelerator_connector.is_distributed
self._accelerator_connector.replace_sampler_ddp
and self._accelerator_connector.is_distributed
and not isinstance(dataloader.sampler, DistributedSampler)
and not has_iterable_dataset(dataloader)
)
Expand All @@ -147,7 +147,7 @@ def prepare_dataloader(self, dataloader: Any, shuffle: bool, mode: Optional[Runn
_fault_tolerant_training() # injects components to track the state
or self._requires_distributed_sampler(dataloader) # sets the distributed sampler
or mode == RunningStage.PREDICTING # to track indices for the predictions
or self.accelerator_connector.use_ipu # IPUs use a custom `DataLoader`
or self._accelerator_connector.use_ipu # IPUs use a custom `DataLoader`
):
sampler = self._resolve_sampler(dataloader, shuffle=shuffle, mode=mode)
dataloader = self._update_dataloader(dataloader, sampler, mode=mode)
Expand Down
26 changes: 13 additions & 13 deletions pytorch_lightning/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ def __init__(
self._data_connector = DataConnector(self, multiple_trainloader_mode)
self.optimizer_connector = OptimizerConnector(self)

self.accelerator_connector = AcceleratorConnector(
self._accelerator_connector = AcceleratorConnector(
num_processes,
devices,
tpu_cores,
Expand Down Expand Up @@ -1507,7 +1507,7 @@ def _on_exception(self):

@property
def accelerator(self) -> Accelerator:
return self.accelerator_connector.accelerator
return self._accelerator_connector.accelerator

@property
def training_type_plugin(self) -> TrainingTypePlugin:
Expand Down Expand Up @@ -1542,43 +1542,43 @@ def should_rank_save_checkpoint(self) -> bool:

@property
def _distrib_type(self) -> DistributedType:
return self.accelerator_connector._distrib_type
return self._accelerator_connector._distrib_type

@property
def _device_type(self) -> DeviceType:
return self.accelerator_connector._device_type
return self._accelerator_connector._device_type

@property
def num_nodes(self) -> int:
return self.accelerator_connector.num_nodes
return self._accelerator_connector.num_nodes

@property
def num_processes(self) -> int:
return self.accelerator_connector.num_processes
return self._accelerator_connector.num_processes

@property
def root_gpu(self) -> Optional[int]:
return self.accelerator_connector.root_gpu
return self._accelerator_connector.root_gpu

@property
def tpu_cores(self) -> int:
return self.accelerator_connector.tpu_cores
return self._accelerator_connector.tpu_cores

@property
def ipus(self) -> int:
return self.accelerator_connector.num_ipus
return self._accelerator_connector.num_ipus

@property
def num_gpus(self) -> int:
return self.accelerator_connector.num_gpus
return self._accelerator_connector.num_gpus

@property
def devices(self) -> Optional[Union[List[int], str, int]]:
return self.accelerator_connector.devices
return self._accelerator_connector.devices

@property
def data_parallel_device_ids(self) -> Optional[List[int]]:
return self.accelerator_connector.parallel_device_ids
return self._accelerator_connector.parallel_device_ids

@property
def lightning_module(self) -> "pl.LightningModule":
Expand Down Expand Up @@ -1627,7 +1627,7 @@ def scaler(self):

@property
def gpus(self) -> Optional[Union[List[int], str, int]]:
return self.accelerator_connector.gpus
return self._accelerator_connector.gpus

@property
def model(self) -> torch.nn.Module:
Expand Down
16 changes: 8 additions & 8 deletions tests/accelerators/test_accelerator_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def test_accelerator_choice_ddp_spawn(cuda_available_mock, device_count_mock):
def test_accelerator_choice_ddp_slurm(setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert trainer.accelerator_connector.is_slurm_managing_tasks
assert trainer._accelerator_connector.is_slurm_managing_tasks
assert isinstance(trainer.accelerator, GPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, SLURMEnvironment)
Expand Down Expand Up @@ -132,7 +132,7 @@ def on_fit_start(self, trainer, pl_module):
def test_accelerator_choice_ddp2_slurm(device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert trainer.accelerator_connector.is_slurm_managing_tasks
assert trainer._accelerator_connector.is_slurm_managing_tasks
assert isinstance(trainer.accelerator, GPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDP2Plugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, SLURMEnvironment)
Expand Down Expand Up @@ -307,7 +307,7 @@ def on_fit_start(self, trainer, pl_module):
def test_accelerator_choice_ddp_cpu_slurm(device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert trainer.accelerator_connector.is_slurm_managing_tasks
assert trainer._accelerator_connector.is_slurm_managing_tasks
assert isinstance(trainer.accelerator, CPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, SLURMEnvironment)
Expand Down Expand Up @@ -402,7 +402,7 @@ class TrainTypePlugin(SingleDevicePlugin):
assert isinstance(trainer.accelerator, Accel)
assert isinstance(trainer.training_type_plugin, TrainTypePlugin)
assert isinstance(trainer.precision_plugin, Prec)
assert trainer.accelerator_connector.training_type_plugin is ttp
assert trainer._accelerator_connector.training_type_plugin is ttp

class DistributedPlugin(DDPPlugin):
pass
Expand All @@ -413,7 +413,7 @@ class DistributedPlugin(DDPPlugin):
assert isinstance(trainer.accelerator, Accel)
assert isinstance(trainer.training_type_plugin, DistributedPlugin)
assert isinstance(trainer.precision_plugin, Prec)
assert trainer.accelerator_connector.training_type_plugin is ttp
assert trainer._accelerator_connector.training_type_plugin is ttp


@mock.patch.dict(
Expand Down Expand Up @@ -756,7 +756,7 @@ def test_strategy_choice_ddp_spawn(cuda_available_mock, device_count_mock):
def test_strategy_choice_ddp_slurm(setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert trainer.accelerator_connector.is_slurm_managing_tasks
assert trainer._accelerator_connector.is_slurm_managing_tasks
assert isinstance(trainer.accelerator, GPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, SLURMEnvironment)
Expand Down Expand Up @@ -788,7 +788,7 @@ def on_fit_start(self, trainer, pl_module):
def test_strategy_choice_ddp2_slurm(device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert trainer.accelerator_connector.is_slurm_managing_tasks
assert trainer._accelerator_connector.is_slurm_managing_tasks
assert isinstance(trainer.accelerator, GPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDP2Plugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, SLURMEnvironment)
Expand Down Expand Up @@ -963,7 +963,7 @@ def on_fit_start(self, trainer, pl_module):
def test_strategy_choice_ddp_cpu_slurm(device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert trainer.accelerator_connector.is_slurm_managing_tasks
assert trainer._accelerator_connector.is_slurm_managing_tasks
assert isinstance(trainer.accelerator, CPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, SLURMEnvironment)
Expand Down
4 changes: 2 additions & 2 deletions tests/models/test_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,8 +239,8 @@ def test_torchelastic_gpu_parsing(mocked_device_count, gpus):
"""Ensure when using torchelastic and nproc_per_node is set to the default of 1 per GPU device That we omit
sanitizing the gpus as only one of the GPUs is visible."""
trainer = Trainer(gpus=gpus)
assert isinstance(trainer.accelerator_connector.cluster_environment, TorchElasticEnvironment)
assert trainer.accelerator_connector.parallel_device_ids == device_parser.parse_gpu_ids(gpus)
assert isinstance(trainer._accelerator_connector.cluster_environment, TorchElasticEnvironment)
assert trainer._accelerator_connector.parallel_device_ids == device_parser.parse_gpu_ids(gpus)
assert trainer.gpus == gpus


Expand Down
2 changes: 1 addition & 1 deletion tests/models/test_tpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ def test_tpu_choice(tmpdir, tpu_cores, expected_tpu_id, error_expected):
Trainer(default_root_dir=tmpdir, tpu_cores=tpu_cores)
else:
trainer = Trainer(default_root_dir=tmpdir, tpu_cores=tpu_cores)
assert trainer.accelerator_connector.tpu_id == expected_tpu_id
assert trainer._accelerator_connector.tpu_id == expected_tpu_id


@pytest.mark.parametrize(
Expand Down
4 changes: 2 additions & 2 deletions tests/overrides/test_data_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def training_step(self, batch, batch_idx):
model = TestModel()
trainer = MagicMock()
trainer.state.stage = RunningStage.TRAINING
trainer.accelerator_connector._init_deterministic(False)
trainer._accelerator_connector._init_deterministic(False)

model.trainer = trainer
batch = torch.rand(2, 32).cuda()
Expand Down Expand Up @@ -128,7 +128,7 @@ def training_step(self, batch, batch_idx):
model = TestModel().to(device)
trainer = MagicMock()
trainer.state.stage = RunningStage.TRAINING
trainer.accelerator_connector._init_deterministic(False)
trainer._accelerator_connector._init_deterministic(False)
model.trainer = trainer
batch = torch.rand(2, 32).to(device)
batch_idx = 0
Expand Down
2 changes: 1 addition & 1 deletion tests/trainer/test_data_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def _get_warning_msg():
@pytest.mark.parametrize("num_workers", [0, 1])
def test_dataloader_warnings(tmpdir, num_workers):
trainer = Trainer(default_root_dir=tmpdir, strategy="ddp_spawn", num_processes=2, fast_dev_run=4)
assert trainer.accelerator_connector._distrib_type == DistributedType.DDP_SPAWN
assert trainer._accelerator_connector._distrib_type == DistributedType.DDP_SPAWN
trainer.fit(TestSpawnBoringModel(num_workers))


Expand Down
4 changes: 2 additions & 2 deletions tests/trainer/test_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1352,15 +1352,15 @@ def write_on_batch_end(self, trainer, pl_module, prediction, batch_indices, *arg
self.write_on_batch_end_called = True

def write_on_epoch_end(self, trainer, pl_module, predictions, batch_indices):
expected = 1 if trainer.accelerator_connector.is_distributed else 2
expected = 1 if trainer._accelerator_connector.is_distributed else 2
assert len(predictions) == 2
assert len(predictions[0]) == expected
assert len(batch_indices) == 2
assert len(batch_indices[0]) == expected
self.write_on_epoch_end_called = True

def on_predict_epoch_end(self, trainer, pl_module, outputs):
if trainer.accelerator_connector.is_distributed:
if trainer._accelerator_connector.is_distributed:
for idx in range(2):
assert isinstance(trainer.predict_dataloaders[idx].batch_sampler.sampler, UnrepeatedDistributedSampler)
assert isinstance(trainer.predict_dataloaders[idx].batch_sampler, IndexBatchSamplerWrapper)
Expand Down
2 changes: 1 addition & 1 deletion tests/utilities/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ def test_lightning_cli_args_cluster_environments(tmpdir):
class TestModel(BoringModel):
def on_fit_start(self):
# Ensure SLURMEnvironment is set, instead of default LightningEnvironment
assert isinstance(self.trainer.accelerator_connector._cluster_environment, SLURMEnvironment)
assert isinstance(self.trainer._accelerator_connector._cluster_environment, SLURMEnvironment)
self.trainer.ran_asserts = True

with mock.patch("sys.argv", ["any.py", "fit", f"--trainer.plugins={json.dumps(plugins)}"]):
Expand Down