Skip to content

Commit e23368c

Browse files
committed
Merge branch 'master' into bugfix/lite-device
2 parents b021310 + af4af3d commit e23368c

File tree

4 files changed

+71
-44
lines changed

4 files changed

+71
-44
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
148148
- Fixed `CombinedLoader` and `max_size_cycle` didn't receive a `DistributedSampler` ([#10374](https://github.com/PyTorchLightning/pytorch-lightning/issues/10374))
149149

150150

151-
- Fixed `to_torchscript()` causing false positive deprecation warnings ([#10470](https://github.com/PyTorchLightning/pytorch-lightning/issues/10470))
151+
- Fixed scripting causing false positive deprecation warnings ([#10470](https://github.com/PyTorchLightning/pytorch-lightning/pull/10470), [#10555](https://github.com/PyTorchLightning/pytorch-lightning/pull/10555))
152152

153153

154154
- Fixed `isinstance` not working with `init_meta_context`, materialized model not being moved to the device ([#10493](https://github.com/PyTorchLightning/metrics/pull/10493))

pytorch_lightning/loggers/tensorboard.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,9 @@ def log_graph(self, model: "pl.LightningModule", input_array=None):
240240

241241
if input_array is not None:
242242
input_array = model._apply_batch_transfer_handler(input_array)
243+
model._running_torchscript = True
243244
self.experiment.add_graph(model, input_array)
245+
model._running_torchscript = False
244246
else:
245247
rank_zero_warn(
246248
"Could not log computational graph since the"

pytorch_lightning/plugins/training_type/ipu.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -237,21 +237,25 @@ def to_tensor(x):
237237
args = apply_to_collection(args, dtype=(int, float), function=to_tensor)
238238
return args
239239

240-
def training_step(self, *args, **kwargs):
240+
def _step(self, stage: RunningStage, *args: Any, **kwargs: Any):
241241
args = self._prepare_input(args)
242-
return self.poptorch_models[RunningStage.TRAINING](*args, **kwargs)
242+
poptorch_model = self.poptorch_models[stage]
243+
self.lightning_module._running_torchscript = True
244+
out = poptorch_model(*args, **kwargs)
245+
self.lightning_module._running_torchscript = False
246+
return out
247+
248+
def training_step(self, *args, **kwargs):
249+
return self._step(RunningStage.TRAINING, *args, **kwargs)
243250

244251
def validation_step(self, *args, **kwargs):
245-
args = self._prepare_input(args)
246-
return self.poptorch_models[RunningStage.VALIDATING](*args, **kwargs)
252+
return self._step(RunningStage.VALIDATING, *args, **kwargs)
247253

248254
def test_step(self, *args, **kwargs):
249-
args = self._prepare_input(args)
250-
return self.poptorch_models[RunningStage.TESTING](*args, **kwargs)
255+
return self._step(RunningStage.TESTING, *args, **kwargs)
251256

252257
def predict_step(self, *args, **kwargs):
253-
args = self._prepare_input(args)
254-
return self.poptorch_models[RunningStage.PREDICTING](*args, **kwargs)
258+
return self._step(RunningStage.PREDICTING, *args, **kwargs)
255259

256260
def teardown(self) -> None:
257261
# undo dataloader patching

tests/accelerators/test_accelerator_connector.py

Lines changed: 56 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ def test_accelerator_choice_ddp_spawn(cuda_available_mock, device_count_mock):
8686
assert isinstance(trainer.training_type_plugin.cluster_environment, LightningEnvironment)
8787

8888

89-
@RunIf(min_gpus=2)
9089
@mock.patch.dict(
9190
os.environ,
9291
{
@@ -98,8 +97,10 @@ def test_accelerator_choice_ddp_spawn(cuda_available_mock, device_count_mock):
9897
"SLURM_LOCALID": "1",
9998
},
10099
)
100+
@mock.patch("torch.cuda.set_device")
101+
@mock.patch("torch.cuda.device_count", return_value=2)
101102
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
102-
def test_accelerator_choice_ddp_slurm(setup_distributed_mock):
103+
def test_accelerator_choice_ddp_slurm(set_device_mock, device_count_mock, setup_distributed_mock):
103104
class CB(Callback):
104105
def on_fit_start(self, trainer, pl_module):
105106
assert trainer._accelerator_connector._is_slurm_managing_tasks
@@ -111,13 +112,13 @@ def on_fit_start(self, trainer, pl_module):
111112
raise SystemExit()
112113

113114
model = BoringModel()
114-
trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2, callbacks=[CB()])
115+
with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"):
116+
trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2, callbacks=[CB()])
115117

116118
with pytest.raises(SystemExit):
117119
trainer.fit(model)
118120

119121

120-
@RunIf(min_gpus=2)
121122
@mock.patch.dict(
122123
os.environ,
123124
{
@@ -129,9 +130,10 @@ def on_fit_start(self, trainer, pl_module):
129130
"SLURM_LOCALID": "1",
130131
},
131132
)
133+
@mock.patch("torch.cuda.set_device")
132134
@mock.patch("torch.cuda.device_count", return_value=2)
133135
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
134-
def test_accelerator_choice_ddp2_slurm(device_count_mock, setup_distributed_mock):
136+
def test_accelerator_choice_ddp2_slurm(set_device_mock, device_count_mock, setup_distributed_mock):
135137
class CB(Callback):
136138
def on_fit_start(self, trainer, pl_module):
137139
assert trainer._accelerator_connector._is_slurm_managing_tasks
@@ -143,13 +145,15 @@ def on_fit_start(self, trainer, pl_module):
143145
raise SystemExit()
144146

145147
model = BoringModel()
146-
trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2, callbacks=[CB()])
148+
with pytest.deprecated_call(match=r"accelerator='ddp2'\)` has been deprecated in v1.5"):
149+
trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2, callbacks=[CB()])
147150

148151
with pytest.raises(SystemExit):
149152
trainer.fit(model)
150153

154+
set_device_mock.assert_called_once()
155+
151156

152-
@RunIf(min_gpus=1)
153157
@mock.patch.dict(
154158
os.environ,
155159
{
@@ -161,9 +165,10 @@ def on_fit_start(self, trainer, pl_module):
161165
"GROUP_RANK": "0",
162166
},
163167
)
164-
@mock.patch("torch.cuda.device_count", return_value=2)
168+
@mock.patch("torch.cuda.set_device")
169+
@mock.patch("torch.cuda.device_count", return_value=1)
165170
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
166-
def test_accelerator_choice_ddp_te(device_count_mock, setup_distributed_mock):
171+
def test_accelerator_choice_ddp_te(set_device_mock, device_count_mock, setup_distributed_mock):
167172
class CB(Callback):
168173
def on_fit_start(self, trainer, pl_module):
169174
assert isinstance(trainer.accelerator, GPUAccelerator)
@@ -174,13 +179,15 @@ def on_fit_start(self, trainer, pl_module):
174179
raise SystemExit()
175180

176181
model = BoringModel()
177-
trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2, callbacks=[CB()])
182+
with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"):
183+
trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2, callbacks=[CB()])
178184

179185
with pytest.raises(SystemExit):
180186
trainer.fit(model)
181187

188+
set_device_mock.assert_called_once()
189+
182190

183-
@RunIf(min_gpus=1)
184191
@mock.patch.dict(
185192
os.environ,
186193
{
@@ -192,9 +199,10 @@ def on_fit_start(self, trainer, pl_module):
192199
"GROUP_RANK": "0",
193200
},
194201
)
195-
@mock.patch("torch.cuda.device_count", return_value=2)
202+
@mock.patch("torch.cuda.set_device")
203+
@mock.patch("torch.cuda.device_count", return_value=1)
196204
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
197-
def test_accelerator_choice_ddp2_te(device_count_mock, setup_distributed_mock):
205+
def test_accelerator_choice_ddp2_te(set_device_mock, device_count_mock, setup_distributed_mock):
198206
class CB(Callback):
199207
def on_fit_start(self, trainer, pl_module):
200208
assert isinstance(trainer.accelerator, GPUAccelerator)
@@ -205,11 +213,14 @@ def on_fit_start(self, trainer, pl_module):
205213
raise SystemExit()
206214

207215
model = BoringModel()
208-
trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2, callbacks=[CB()])
216+
with pytest.deprecated_call(match=r"accelerator='ddp2'\)` has been deprecated in v1.5"):
217+
trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2, callbacks=[CB()])
209218

210219
with pytest.raises(SystemExit):
211220
trainer.fit(model)
212221

222+
set_device_mock.assert_called_once()
223+
213224

214225
@mock.patch.dict(
215226
os.environ, {"WORLD_SIZE": "2", "LOCAL_WORLD_SIZE": "2", "RANK": "1", "LOCAL_RANK": "1", "GROUP_RANK": "0"}
@@ -233,7 +244,6 @@ def on_fit_start(self, trainer, pl_module):
233244
trainer.fit(model)
234245

235246

236-
@RunIf(min_gpus=1)
237247
@mock.patch.dict(
238248
os.environ,
239249
{
@@ -245,9 +255,10 @@ def on_fit_start(self, trainer, pl_module):
245255
"RANK": "1",
246256
},
247257
)
258+
@mock.patch("torch.cuda.set_device")
248259
@mock.patch("torch.cuda.device_count", return_value=1)
249260
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
250-
def test_accelerator_choice_ddp_kubeflow(device_count_mock, setup_distributed_mock):
261+
def test_accelerator_choice_ddp_kubeflow(set_device_mock, device_count_mock, setup_distributed_mock):
251262
class CB(Callback):
252263
def on_fit_start(self, trainer, pl_module):
253264
assert isinstance(trainer.accelerator, GPUAccelerator)
@@ -258,11 +269,14 @@ def on_fit_start(self, trainer, pl_module):
258269
raise SystemExit()
259270

260271
model = BoringModel()
261-
trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=1, callbacks=[CB()])
272+
with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"):
273+
trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=1, callbacks=[CB()])
262274

263275
with pytest.raises(SystemExit):
264276
trainer.fit(model)
265277

278+
set_device_mock.assert_called_once()
279+
266280

267281
@mock.patch.dict(
268282
os.environ,
@@ -323,29 +337,28 @@ def on_fit_start(self, trainer, pl_module):
323337
trainer.fit(model)
324338

325339

326-
@RunIf(special=True)
327-
def test_accelerator_choice_ddp_cpu_and_plugin(tmpdir):
340+
@RunIf(skip_windows=True, special=True)
341+
def test_accelerator_choice_ddp_cpu_and_strategy(tmpdir):
328342
"""Test that accelerator="ddp_cpu" can work together with an instance of DDPPlugin."""
329-
_test_accelerator_choice_ddp_cpu_and_plugin(tmpdir, ddp_plugin_class=DDPPlugin)
343+
_test_accelerator_choice_ddp_cpu_and_strategy(tmpdir, ddp_strategy_class=DDPPlugin)
330344

331345

332-
@RunIf(special=True)
333-
def test_accelerator_choice_ddp_cpu_and_plugin_spawn(tmpdir):
346+
@RunIf(skip_windows=True)
347+
def test_accelerator_choice_ddp_cpu_and_strategy_spawn(tmpdir):
334348
"""Test that accelerator="ddp_cpu" can work together with an instance of DDPPSpawnPlugin."""
335-
_test_accelerator_choice_ddp_cpu_and_plugin(tmpdir, ddp_plugin_class=DDPSpawnPlugin)
336-
349+
_test_accelerator_choice_ddp_cpu_and_strategy(tmpdir, ddp_strategy_class=DDPSpawnPlugin)
337350

338-
def _test_accelerator_choice_ddp_cpu_and_plugin(tmpdir, ddp_plugin_class):
339351

352+
def _test_accelerator_choice_ddp_cpu_and_strategy(tmpdir, ddp_strategy_class):
340353
model = BoringModel()
341354
trainer = Trainer(
342355
default_root_dir=tmpdir,
343-
plugins=[ddp_plugin_class(find_unused_parameters=True)],
356+
strategy=ddp_strategy_class(find_unused_parameters=True),
344357
fast_dev_run=True,
345358
accelerator="ddp_cpu",
346359
num_processes=2,
347360
)
348-
assert isinstance(trainer.training_type_plugin, ddp_plugin_class)
361+
assert isinstance(trainer.training_type_plugin, ddp_strategy_class)
349362
assert isinstance(trainer.accelerator, CPUAccelerator)
350363
assert trainer.training_type_plugin.num_processes == 2
351364
assert trainer.training_type_plugin.parallel_devices == [torch.device("cpu")] * 2
@@ -793,7 +806,6 @@ def on_fit_start(self, trainer, pl_module):
793806
trainer.fit(model)
794807

795808

796-
@RunIf(min_gpus=2)
797809
@mock.patch.dict(
798810
os.environ,
799811
{
@@ -805,10 +817,11 @@ def on_fit_start(self, trainer, pl_module):
805817
"SLURM_LOCALID": "1",
806818
},
807819
)
820+
@mock.patch("torch.cuda.set_device")
808821
@mock.patch("torch.cuda.device_count", return_value=2)
809822
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
810823
@pytest.mark.parametrize("strategy", ["ddp2", DDP2Plugin()])
811-
def test_strategy_choice_ddp2_slurm(device_count_mock, setup_distributed_mock, strategy):
824+
def test_strategy_choice_ddp2_slurm(set_device_mock, device_count_mock, setup_distributed_mock, strategy):
812825
class CB(Callback):
813826
def on_fit_start(self, trainer, pl_module):
814827
assert trainer._accelerator_connector._is_slurm_managing_tasks
@@ -825,8 +838,9 @@ def on_fit_start(self, trainer, pl_module):
825838
with pytest.raises(SystemExit):
826839
trainer.fit(model)
827840

841+
set_device_mock.assert_called_once()
842+
828843

829-
@RunIf(min_gpus=1)
830844
@mock.patch.dict(
831845
os.environ,
832846
{
@@ -838,9 +852,10 @@ def on_fit_start(self, trainer, pl_module):
838852
"GROUP_RANK": "0",
839853
},
840854
)
855+
@mock.patch("torch.cuda.set_device")
841856
@mock.patch("torch.cuda.device_count", return_value=2)
842857
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
843-
def test_strategy_choice_ddp_te(device_count_mock, setup_distributed_mock):
858+
def test_strategy_choice_ddp_te(set_device_mock, device_count_mock, setup_distributed_mock):
844859
class CB(Callback):
845860
def on_fit_start(self, trainer, pl_module):
846861
assert isinstance(trainer.accelerator, GPUAccelerator)
@@ -856,8 +871,9 @@ def on_fit_start(self, trainer, pl_module):
856871
with pytest.raises(SystemExit):
857872
trainer.fit(model)
858873

874+
set_device_mock.assert_called_once()
875+
859876

860-
@RunIf(min_gpus=1)
861877
@mock.patch.dict(
862878
os.environ,
863879
{
@@ -869,9 +885,10 @@ def on_fit_start(self, trainer, pl_module):
869885
"GROUP_RANK": "0",
870886
},
871887
)
888+
@mock.patch("torch.cuda.set_device")
872889
@mock.patch("torch.cuda.device_count", return_value=2)
873890
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
874-
def test_strategy_choice_ddp2_te(device_count_mock, setup_distributed_mock):
891+
def test_strategy_choice_ddp2_te(set_device_mock, device_count_mock, setup_distributed_mock):
875892
class CB(Callback):
876893
def on_fit_start(self, trainer, pl_module):
877894
assert isinstance(trainer.accelerator, GPUAccelerator)
@@ -887,6 +904,8 @@ def on_fit_start(self, trainer, pl_module):
887904
with pytest.raises(SystemExit):
888905
trainer.fit(model)
889906

907+
set_device_mock.assert_called_once()
908+
890909

891910
@mock.patch.dict(
892911
os.environ, {"WORLD_SIZE": "2", "LOCAL_WORLD_SIZE": "2", "RANK": "1", "LOCAL_RANK": "1", "GROUP_RANK": "0"}
@@ -910,7 +929,6 @@ def on_fit_start(self, trainer, pl_module):
910929
trainer.fit(model)
911930

912931

913-
@RunIf(min_gpus=1)
914932
@mock.patch.dict(
915933
os.environ,
916934
{
@@ -922,9 +940,10 @@ def on_fit_start(self, trainer, pl_module):
922940
"RANK": "1",
923941
},
924942
)
943+
@mock.patch("torch.cuda.set_device")
925944
@mock.patch("torch.cuda.device_count", return_value=1)
926945
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
927-
def test_strategy_choice_ddp_kubeflow(device_count_mock, setup_distributed_mock):
946+
def test_strategy_choice_ddp_kubeflow(set_device_mock, device_count_mock, setup_distributed_mock):
928947
class CB(Callback):
929948
def on_fit_start(self, trainer, pl_module):
930949
assert isinstance(trainer.accelerator, GPUAccelerator)
@@ -940,6 +959,8 @@ def on_fit_start(self, trainer, pl_module):
940959
with pytest.raises(SystemExit):
941960
trainer.fit(model)
942961

962+
set_device_mock.assert_called_once()
963+
943964

944965
@mock.patch.dict(
945966
os.environ,

0 commit comments

Comments
 (0)