|
1 | 1 | import os
|
| 2 | +from time import sleep |
2 | 3 |
|
3 | 4 | import torch
|
4 | 5 | from torch.utils.data import DataLoader, Dataset
|
@@ -29,37 +30,31 @@ def forward(self, x):
|
29 | 30 | def training_step(self, batch, batch_idx):
|
30 | 31 | loss = self(batch).sum()
|
31 | 32 | self.log("train_loss", loss)
|
| 33 | + for i in range(9999999999): |
| 34 | + print(f"[rank {self.local_rank}]", "zzz", i, os.getpid(), self.trainer.received_sigterm) |
| 35 | + sleep(3) |
32 | 36 | return {"loss": loss}
|
33 | 37 |
|
34 |
| - def validation_step(self, batch, batch_idx): |
35 |
| - loss = self(batch).sum() |
36 |
| - self.log("valid_loss", loss) |
37 |
| - |
38 |
| - def test_step(self, batch, batch_idx): |
39 |
| - loss = self(batch).sum() |
40 |
| - self.log("test_loss", loss) |
41 |
| - |
42 | 38 | def configure_optimizers(self):
|
43 | 39 | return torch.optim.SGD(self.layer.parameters(), lr=0.1)
|
44 | 40 |
|
45 | 41 |
|
46 | 42 | def run():
|
47 | 43 | train_data = DataLoader(RandomDataset(32, 64), batch_size=2)
|
48 |
| - val_data = DataLoader(RandomDataset(32, 64), batch_size=2) |
49 |
| - test_data = DataLoader(RandomDataset(32, 64), batch_size=2) |
50 | 44 |
|
51 | 45 | model = BoringModel()
|
52 | 46 | trainer = Trainer(
|
53 | 47 | default_root_dir=os.getcwd(),
|
54 | 48 | limit_train_batches=1,
|
55 |
| - limit_val_batches=1, |
56 |
| - limit_test_batches=1, |
57 |
| - num_sanity_val_steps=0, |
58 |
| - max_epochs=1, |
| 49 | + accelerator="cpu", |
| 50 | + strategy="ddp", |
| 51 | + devices=2, |
59 | 52 | enable_model_summary=False,
|
| 53 | + logger=False, |
| 54 | + enable_checkpointing=False, |
| 55 | + enable_progress_bar=False, |
60 | 56 | )
|
61 |
| - trainer.fit(model, train_dataloaders=train_data, val_dataloaders=val_data) |
62 |
| - trainer.test(model, dataloaders=test_data) |
| 57 | + trainer.fit(model, train_dataloaders=train_data) |
63 | 58 |
|
64 | 59 |
|
65 | 60 | if __name__ == "__main__":
|
|
0 commit comments