Skip to content

Commit 81145ca

Browse files
authored
Fig logging with log_gpu_memory='min_max' (#9013)
1 parent 33ffd67 commit 81145ca

File tree

3 files changed

+18
-7
lines changed

3 files changed

+18
-7
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
242242
- Fixed `accelerator=ddp` choice for CPU ([#8645](https://github.com/PyTorchLightning/pytorch-lightning/pull/8645))
243243

244244

245+
- Fixed a bug causing logging with `log_gpu_memory='min_max'` not working ([#9013](https://github.com/PyTorchLightning/pytorch-lightning/pull/9013))
246+
245247
## [1.4.0] - 2021-07-27
246248

247249
### Added

pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -224,9 +224,14 @@ def update_train_epoch_metrics(self) -> None:
224224

225225
def _log_gpus_metrics(self):
226226
for key, mem in self.gpus_metrics.items():
227-
gpu_id = int(key.split("/")[0].split(":")[1])
228-
if gpu_id in self.trainer.accelerator_connector.parallel_device_ids:
229-
self.trainer.lightning_module.log(key, mem, prog_bar=False, logger=True, on_step=True, on_epoch=False)
227+
if self.log_gpu_memory == "min_max":
228+
self.trainer.lightning_module.log(key, mem, prog_bar=False, logger=True)
229+
else:
230+
gpu_id = int(key.split("/")[0].split(":")[1])
231+
if gpu_id in self.trainer.accelerator_connector.parallel_device_ids:
232+
self.trainer.lightning_module.log(
233+
key, mem, prog_bar=False, logger=True, on_step=True, on_epoch=False
234+
)
230235

231236
"""
232237
Utilities and properties

tests/trainer/logging_/test_train_loop_logging.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -645,18 +645,22 @@ def training_step(self, batch, batch_idx):
645645

646646

647647
@RunIf(min_gpus=2)
648-
def test_log_gpu_memory_without_logging_on_step(tmpdir):
648+
@pytest.mark.parametrize("log_gpu_memory", ["all", "min_max"])
649+
def test_log_gpu_memory_without_logging_on_step(tmpdir, log_gpu_memory):
649650

650651
model = BoringModel()
651652
trainer = Trainer(
652653
default_root_dir=tmpdir,
653654
max_epochs=1,
654655
limit_train_batches=1,
655656
limit_val_batches=0,
656-
log_gpu_memory="all",
657+
log_gpu_memory=log_gpu_memory,
657658
log_every_n_steps=1,
658659
gpus=[1],
659660
)
660661
trainer.fit(model)
661-
662-
assert "gpu_id: 1/memory.used (MB)" in trainer.logged_metrics
662+
if log_gpu_memory == "min_max":
663+
assert "min_gpu_mem" in trainer.logged_metrics
664+
assert "max_gpu_mem" in trainer.logged_metrics
665+
else:
666+
assert "gpu_id: 1/memory.used (MB)" in trainer.logged_metrics

0 commit comments

Comments
 (0)