Skip to content

Commit 1149a74

Browse files
committed
Fix for deepspeed
1 parent b2d4c7e commit 1149a74

File tree

3 files changed

+10
-7
lines changed

3 files changed

+10
-7
lines changed

pytorch_lightning/core/lightning.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -632,7 +632,7 @@ def training_step(self, *args, **kwargs) -> STEP_OUTPUT:
632632
- ``None`` - Training will skip to the next batch
633633
634634
Note:
635-
Returning ``None`` is currently not supported for multi-GPU or TPU.
635+
Returning ``None`` is currently not supported for multi-GPU or TPU, or using `DeepSpeed`.
636636
637637
In this step you'd normally do the forward pass and calculate the loss for a batch.
638638
You can also do fancier things like multiple forward passes or something model specific.

pytorch_lightning/plugins/precision/deepspeed_precision.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import pytorch_lightning as pl
2121
from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin
2222
from pytorch_lightning.utilities import GradClipAlgorithmType
23+
from pytorch_lightning.utilities.exceptions import MisconfigurationException
2324
from pytorch_lightning.utilities.model_helpers import is_overridden
2425
from pytorch_lightning.utilities.warnings import WarningCache
2526

@@ -44,12 +45,14 @@ def pre_optimizer_step(
4445
"""Hook to do something before each optimizer step."""
4546
result = lambda_closure() # DeepSpeed does not support closures
4647
super().pre_optimizer_step(model, optimizer, optimizer_idx, lambda_closure, **kwargs)
47-
skipped_backward = result is None
4848
# in manual optimization, the closure does not return a value
49-
if not model.automatic_optimization or not skipped_backward:
50-
# the following should be in a `optimizer_step` hook but we don't have one in the precision plugin.
51-
deepspeed_engine = model.trainer.model
52-
deepspeed_engine.step()
49+
if model.automatic_optimization and result is None:
50+
raise MisconfigurationException(
51+
"Skipping backward by returning `None` from your `training_step` is not supported by `DeepSpeed`"
52+
)
53+
# the following should be in a `optimizer_step` hook but we don't have one in the precision plugin.
54+
deepspeed_engine = model.trainer.model
55+
deepspeed_engine.step()
5356
return False
5457

5558
def backward(self, model: "pl.LightningModule", closure_loss: Tensor, *args: Any, **kwargs: Any) -> None:

pytorch_lightning/plugins/precision/native_amp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def pre_optimizer_step(
9696
f"native PyTorch amp and lbfgs are not compatible (optimizer {optimizer_idx})."
9797
" To request, please file a Github issue in PyTorch and tag @mcarilli"
9898
)
99-
result = lambda_closure()
99+
result = lambda_closure() # native amp does not support closures
100100
self.scaler.unscale_(optimizer)
101101
super().pre_optimizer_step(model, optimizer, optimizer_idx, lambda_closure, **kwargs)
102102
skipped_backward = result is None

0 commit comments

Comments
 (0)