Skip to content

Commit 16213b1

Browse files
daniellepintzkaushikb11rohitgr7
authored
Deprecate log_gpu_memory, gpu_metrics, and util funcs in favor of DeviceStatsMonitor callback (#9921)
Co-authored-by: Kaushik B <[email protected]> Co-authored-by: Rohit Gupta <[email protected]>
1 parent afbf703 commit 16213b1

File tree

6 files changed

+52
-35
lines changed

6 files changed

+52
-35
lines changed

CHANGELOG.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
345345
- Deprecated passing `progress_bar_refresh_rate` to the `Trainer` constructor in favor of adding the `ProgressBar` callback with `refresh_rate` directly to the list of callbacks, or passing `enable_progress_bar=False` to disable the progress bar ([#9616](https://github.com/PyTorchLightning/pytorch-lightning/pull/9616))
346346

347347

348-
- Deprecate `LightningDistributed` and move the broadcast logic to `DDPPlugin` and `DDPSpawnPlugin` directly ([#9691](https://github.com/PyTorchLightning/pytorch-lightning/pull/9691))
348+
- Deprecated `LightningDistributed` and move the broadcast logic to `DDPPlugin` and `DDPSpawnPlugin` directly ([#9691](https://github.com/PyTorchLightning/pytorch-lightning/pull/9691))
349349

350350

351351
- Deprecated passing `stochastic_weight_avg` from the `Trainer` constructor in favor of adding the `StochasticWeightAveraging` callback directly to the list of callbacks ([#8989](https://github.com/PyTorchLightning/pytorch-lightning/pull/8989))
@@ -366,6 +366,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
366366
- Deprecated passing `weights_summary` to the `Trainer` constructor in favor of adding the `ModelSummary` callback with `max_depth` directly to the list of callbacks ([#9699](https://github.com/PyTorchLightning/pytorch-lightning/pull/9699))
367367

368368

369+
- Deprecated `log_gpu_memory`, `gpu_metrics`, and util funcs in favor of `DeviceStatsMonitor` callback ([#9921](https://github.com/PyTorchLightning/pytorch-lightning/pull/9921))
370+
371+
369372
- Deprecated `GPUStatsMonitor` and `XLAStatsMonitor` in favor of `DeviceStatsMonitor` callback ([#9924](https://github.com/PyTorchLightning/pytorch-lightning/pull/9924))
370373

371374
### Removed

docs/source/common/trainer.rst

Lines changed: 1 addition & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -528,7 +528,7 @@ Example::
528528
checkpoint_callback
529529
^^^^^^^^^^^^^^^^^^^
530530

531-
Deprecated: This has been deprecated in v1.5 and will be removed in v.17. Please use ``enable_checkpointing`` instead.
531+
Deprecated: This has been deprecated in v1.5 and will be removed in v1.7. Please use ``enable_checkpointing`` instead.
532532

533533
default_root_dir
534534
^^^^^^^^^^^^^^^^
@@ -838,36 +838,6 @@ How often to add logging rows (does not write to disk)
838838
See Also:
839839
- :doc:`logging <../extensions/logging>`
840840

841-
log_gpu_memory
842-
^^^^^^^^^^^^^^
843-
844-
.. raw:: html
845-
846-
<video width="50%" max-width="400px" controls
847-
poster="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/pl_docs/trainer_flags/thumb/log_gpu_memory.jpg"
848-
src="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/pl_docs/trainer_flags/log_gpu_memory.mp4"></video>
849-
850-
|
851-
852-
Options:
853-
854-
- None
855-
- 'min_max'
856-
- 'all'
857-
858-
.. testcode::
859-
860-
# default used by the Trainer
861-
trainer = Trainer(log_gpu_memory=None)
862-
863-
# log all the GPUs (on master node only)
864-
trainer = Trainer(log_gpu_memory="all")
865-
866-
# log only the min and max memory on the master node
867-
trainer = Trainer(log_gpu_memory="min_max")
868-
869-
.. note:: Might slow performance because it uses the output of ``nvidia-smi``.
870-
871841
logger
872842
^^^^^^
873843

pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@
2929
class LoggerConnector:
3030
def __init__(self, trainer: "pl.Trainer", log_gpu_memory: Optional[str] = None) -> None:
3131
self.trainer = trainer
32+
if log_gpu_memory is not None:
33+
rank_zero_deprecation(
34+
"Setting `log_gpu_memory` with the trainer flag is deprecated in v1.5 and will be removed in v1.7. "
35+
"Please monitor GPU stats with the `DeviceStatsMonitor` callback directly instead."
36+
)
3237
self.log_gpu_memory = log_gpu_memory
3338
self.eval_loop_results: List[_OUT_DICT] = []
3439
self._val_log_step: int = 0
@@ -222,6 +227,7 @@ def update_train_step_metrics(self) -> None:
222227
if self.trainer.fit_loop._should_accumulate() and self.trainer.lightning_module.automatic_optimization:
223228
return
224229

230+
# TODO: remove this call in v1.7
225231
self._log_gpus_metrics()
226232

227233
# when metrics should be logged
@@ -239,6 +245,11 @@ def update_train_epoch_metrics(self) -> None:
239245
self.trainer._results.reset(metrics=True)
240246

241247
def _log_gpus_metrics(self) -> None:
248+
"""
249+
.. deprecated:: v1.5
250+
This function was deprecated in v1.5 in favor of
251+
`pytorch_lightning.accelerators.gpu._get_nvidia_gpu_stats` and will be removed in v1.7.
252+
"""
242253
for key, mem in self.gpus_metrics.items():
243254
if self.log_gpu_memory == "min_max":
244255
self.trainer.lightning_module.log(key, mem, prog_bar=False, logger=True)
@@ -309,6 +320,14 @@ def metrics(self) -> _METRICS:
309320

310321
@property
311322
def gpus_metrics(self) -> Dict[str, float]:
323+
"""
324+
.. deprecated:: v1.5
325+
Will be removed in v1.7.
326+
"""
327+
rank_zero_deprecation(
328+
"The property `LoggerConnector.gpus_metrics` was deprecated in v1.5"
329+
" and will be removed in 1.7. Use the `DeviceStatsMonitor` callback instead."
330+
)
312331
if self.trainer._device_type == DeviceType.GPU and self.log_gpu_memory:
313332
mem_map = memory.get_memory_profile(self.log_gpu_memory)
314333
self._gpus_metrics.update(mem_map)

pytorch_lightning/trainer/trainer.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def __init__(
134134
auto_select_gpus: bool = False,
135135
tpu_cores: Optional[Union[List[int], str, int]] = None,
136136
ipus: Optional[int] = None,
137-
log_gpu_memory: Optional[str] = None,
137+
log_gpu_memory: Optional[str] = None, # TODO: Remove in 1.7
138138
progress_bar_refresh_rate: Optional[int] = None, # TODO: remove in v1.7
139139
enable_progress_bar: bool = True,
140140
overfit_batches: Union[int, float] = 0.0,
@@ -277,6 +277,10 @@ def __init__(
277277
278278
log_gpu_memory: None, 'min_max', 'all'. Might slow performance.
279279
280+
.. deprecated:: v1.5
281+
Deprecated in v1.5.0 and will be removed in v1.7.0
282+
Please use the ``DeviceStatsMonitor`` callback directly instead.
283+
280284
log_every_n_steps: How often to log within steps (defaults to every 50 steps).
281285
282286
prepare_data_per_node: If True, each LOCAL_RANK=0 will call prepare data.

pytorch_lightning/utilities/memory.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,12 @@ def garbage_collection_cuda() -> None:
9696

9797

9898
def get_memory_profile(mode: str) -> Dict[str, float]:
99-
"""Get a profile of the current memory usage.
99+
r"""
100+
.. deprecated:: v1.5
101+
This function was deprecated in v1.5 in favor of
102+
`pytorch_lightning.accelerators.gpu._get_nvidia_gpu_stats` and will be removed in v1.7.
103+
104+
Get a profile of the current memory usage.
100105
101106
Args:
102107
mode: There are two modes:
@@ -124,7 +129,12 @@ def get_memory_profile(mode: str) -> Dict[str, float]:
124129

125130

126131
def get_gpu_memory_map() -> Dict[str, float]:
127-
"""Get the current gpu usage.
132+
r"""
133+
.. deprecated:: v1.5
134+
This function was deprecated in v1.5 in favor of
135+
`pytorch_lightning.accelerators.gpu._get_nvidia_gpu_stats` and will be removed in v1.7.
136+
137+
Get the current gpu usage.
128138
129139
Return:
130140
A dictionary in which the keys are device ids as integers and

tests/deprecated_api/test_remove_1-7.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from pytorch_lightning.callbacks.gpu_stats_monitor import GPUStatsMonitor
2222
from pytorch_lightning.callbacks.xla_stats_monitor import XLAStatsMonitor
2323
from pytorch_lightning.loggers import LoggerCollection, TestTubeLogger
24+
from pytorch_lightning.trainer.connectors.logger_connector import LoggerConnector
2425
from tests.deprecated_api import _soft_unimport_module
2526
from tests.helpers import BoringModel
2627
from tests.helpers.datamodules import MNISTDataModule
@@ -370,6 +371,16 @@ def test_v1_7_0_weights_summary_trainer(tmpdir):
370371
t.weights_summary = "blah"
371372

372373

374+
def test_v1_7_0_trainer_log_gpu_memory(tmpdir):
375+
with pytest.deprecated_call(
376+
match="Setting `log_gpu_memory` with the trainer flag is deprecated in v1.5 and will be removed"
377+
):
378+
trainer = Trainer(log_gpu_memory="min_max")
379+
with pytest.deprecated_call(match="The property `LoggerConnector.gpus_metrics` was deprecated in v1.5"):
380+
lg = LoggerConnector(trainer)
381+
_ = lg.gpus_metrics
382+
383+
373384
@RunIf(min_gpus=1)
374385
def test_v1_7_0_deprecate_gpu_stats_monitor(tmpdir):
375386
with pytest.deprecated_call(match="The `GPUStatsMonitor` callback was deprecated in v1.5"):

0 commit comments

Comments
 (0)