Skip to content

Commit 33a3ef0

Browse files
authored
Merge branch 'master' into collective1
2 parents 573db3e + fabb364 commit 33a3ef0

File tree

405 files changed

+17055
-7798
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

405 files changed

+17055
-7798
lines changed

.azure-pipelines/gpu-benchmark.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ jobs:
2828
cancelTimeoutInMinutes: "2"
2929
pool: gridai-spot-pool
3030
container:
31-
# base ML image: mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04
32-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.8-torch1.8"
31+
# should match the one in '.azure-pipelines/gpu-benchmark.yml'
32+
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.7-torch1.8"
3333
options: "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=32g"
3434
workspace:
3535
clean: all

.azure-pipelines/gpu-tests.yml

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ jobs:
2323
# how much time to give 'run always even if cancelled tasks' before stopping them
2424
cancelTimeoutInMinutes: "2"
2525

26-
pool: gridai-spot-pool
26+
pool: azure-gpus-spot
2727

2828
# ToDo: this need to have installed docker in the base image...
2929
container:
@@ -50,8 +50,8 @@ jobs:
5050
5151
- bash: |
5252
python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)"
53-
pip install fairscale>=0.3.4
54-
pip install "deepspeed==0.4.3" # FIXME: bug with >= 0.4.4
53+
pip install fairscale==0.4.0
54+
pip install deepspeed==0.5.4
5555
pip install . --requirement requirements/devel.txt
5656
pip list
5757
displayName: 'Install dependencies'
@@ -106,10 +106,8 @@ jobs:
106106
set -e
107107
python -m pytest pl_examples -v --maxfail=2 --durations=0
108108
bash pl_examples/run_examples.sh --trainer.gpus=1
109-
bash pl_examples/run_examples.sh --trainer.gpus=2 --trainer.accelerator=ddp
110-
bash pl_examples/run_examples.sh --trainer.gpus=2 --trainer.accelerator=ddp --trainer.precision=16
111-
bash pl_examples/run_examples.sh --trainer.gpus=2 --trainer.accelerator=dp
112-
bash pl_examples/run_examples.sh --trainer.gpus=2 --trainer.accelerator=dp --trainer.precision=16
109+
bash pl_examples/run_examples.sh --trainer.gpus=2 --trainer.strategy=ddp
110+
bash pl_examples/run_examples.sh --trainer.gpus=2 --trainer.strategy=ddp --trainer.precision=16
113111
env:
114112
PL_USE_MOCKED_MNIST: "1"
115113
displayName: 'Testing: examples'

.codecov.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ coverage:
4545
patch:
4646
default:
4747
target: 50% # specify the target "X%" coverage to hit
48-
# threshold: 50% # allow this much decrease on patch
48+
threshold: 5% # allow this much decrease on patch
4949
changes: false
5050

5151
# https://docs.codecov.com/docs/github-checks#disabling-github-checks-patch-annotations

.deepsource.toml

Lines changed: 0 additions & 26 deletions
This file was deleted.

.github/CODEOWNERS

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# the repo. Unless a later match takes precedence,
66
# @global-owner1 and @global-owner2 will be requested for
77
# review when someone opens a pull request.
8-
* @williamfalcon @borda @tchaton @SeanNaren @carmocca @awaelchli @justusschock @kaushikb11
8+
* @williamfalcon @borda @tchaton @SeanNaren @carmocca @awaelchli @justusschock @kaushikb11 @rohitgr7
99

1010
# CI/CD and configs
1111
/.github/ @borda @tchaton @carmocca
@@ -23,6 +23,7 @@
2323
/pytorch_lightning/callbacks @williamfalcon @tchaton @carmocca @borda @kaushikb11
2424
/pytorch_lightning/core @tchaton @SeanNaren @borda @carmocca @justusschock @kaushikb11
2525
/pytorch_lightning/distributed @williamfalcon @tchaton @awaelchli @kaushikb11
26+
/pytorch_lightning/lite @tchaton @awaelchli @carmocca
2627
/pytorch_lightning/loggers @tchaton @awaelchli @borda
2728
/pytorch_lightning/loggers/wandb.py @borisdayma
2829
/pytorch_lightning/loggers/neptune.py @shnela @HubertJaworski @pkasprzyk @pitercl @Raalsky @aniezurawski @kamil-kaczmarek

.github/CONTRIBUTING.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -316,8 +316,6 @@ def test_explain_what_is_being_tested(tmpdir):
316316
Test description about text reason to be
317317
"""
318318

319-
# os.environ["PL_DEV_DEBUG"] = '1' # [OPTIONAL] When activated, you can use internal trainer.dev_debugger
320-
321319
class ExtendedModel(BoringModel):
322320
...
323321

.github/ISSUE_TEMPLATE/code_improvement.md

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,12 @@ ______________________________________________________________________
2626

2727
#### If you enjoy Lightning, check out our other projects! ⚡
2828

29-
<sub>
30-
3129
- [**Metrics**](https://github.com/PyTorchLightning/metrics): Machine learning metrics for distributed, scalable PyTorch applications.
3230

33-
- [**Flash**](https://github.com/PyTorchLightning/lightning-flash): The fastest way to get a Lightning baseline! A collection of tasks for fast prototyping, baselining, finetuning and solving problems with deep learning
31+
- [**Lite**](https://pytorch-lightning.readthedocs.io/en/latest/starter/lightning_lite.html): enables pure PyTorch users to scale their existing code on any kind of device while retaining full control over their own loops and optimization logic.
3432

35-
- [**Bolts**](https://github.com/PyTorchLightning/lightning-bolts): Pretrained SOTA Deep Learning models, callbacks and more for research and production with PyTorch Lightning and PyTorch
33+
- [**Flash**](https://github.com/PyTorchLightning/lightning-flash): The fastest way to get a Lightning baseline! A collection of tasks for fast prototyping, baselining, fine-tuning, and solving problems with deep learning.
3634

37-
- [**Lightning Transformers**](https://github.com/PyTorchLightning/lightning-transformers): Flexible interface for high performance research using SOTA Transformers leveraging Pytorch Lightning, Transformers, and Hydra.
35+
- [**Bolts**](https://github.com/PyTorchLightning/lightning-bolts): Pretrained SOTA Deep Learning models, callbacks, and more for research and production with PyTorch Lightning and PyTorch.
3836

39-
</sub>
37+
- [**Lightning Transformers**](https://github.com/PyTorchLightning/lightning-transformers): Flexible interface for high-performance research using SOTA Transformers leveraging Pytorch Lightning, Transformers, and Hydra.

.github/ISSUE_TEMPLATE/documentation.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,17 @@ For typos and doc fixes, please go ahead and:
1717
For very simple fixes, you can submit a PR without a linked issue.
1818

1919
Thanks!
20+
21+
______________________________________________________________________
22+
23+
#### If you enjoy Lightning, check out our other projects! ⚡
24+
25+
- [**Metrics**](https://github.com/PyTorchLightning/metrics): Machine learning metrics for distributed, scalable PyTorch applications.
26+
27+
- [**Lite**](https://pytorch-lightning.readthedocs.io/en/latest/starter/lightning_lite.html): enables pure PyTorch users to scale their existing code on any kind of device while retaining full control over their own loops and optimization logic.
28+
29+
- [**Flash**](https://github.com/PyTorchLightning/lightning-flash): The fastest way to get a Lightning baseline! A collection of tasks for fast prototyping, baselining, fine-tuning, and solving problems with deep learning.
30+
31+
- [**Bolts**](https://github.com/PyTorchLightning/lightning-bolts): Pretrained SOTA Deep Learning models, callbacks, and more for research and production with PyTorch Lightning and PyTorch.
32+
33+
- [**Lightning Transformers**](https://github.com/PyTorchLightning/lightning-transformers): Flexible interface for high-performance research using SOTA Transformers leveraging Pytorch Lightning, Transformers, and Hydra.

.github/ISSUE_TEMPLATE/feature_request.md

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,12 @@ ______________________________________________________________________
3030

3131
#### If you enjoy Lightning, check out our other projects! ⚡
3232

33-
<sub>
34-
3533
- [**Metrics**](https://github.com/PyTorchLightning/metrics): Machine learning metrics for distributed, scalable PyTorch applications.
3634

37-
- [**Flash**](https://github.com/PyTorchLightning/lightning-flash): The fastest way to get a Lightning baseline! A collection of tasks for fast prototyping, baselining, finetuning and solving problems with deep learning
35+
- [**Lite**](https://pytorch-lightning.readthedocs.io/en/latest/starter/lightning_lite.html): enables pure PyTorch users to scale their existing code on any kind of device while retaining full control over their own loops and optimization logic.
3836

39-
- [**Bolts**](https://github.com/PyTorchLightning/lightning-bolts): Pretrained SOTA Deep Learning models, callbacks and more for research and production with PyTorch Lightning and PyTorch
37+
- [**Flash**](https://github.com/PyTorchLightning/lightning-flash): The fastest way to get a Lightning baseline! A collection of tasks for fast prototyping, baselining, fine-tuning, and solving problems with deep learning.
4038

41-
- [**Lightning Transformers**](https://github.com/PyTorchLightning/lightning-transformers): Flexible interface for high performance research using SOTA Transformers leveraging Pytorch Lightning, Transformers, and Hydra.
39+
- [**Bolts**](https://github.com/PyTorchLightning/lightning-bolts): Pretrained SOTA Deep Learning models, callbacks, and more for research and production with PyTorch Lightning and PyTorch.
4240

43-
</sub>
41+
- [**Lightning Transformers**](https://github.com/PyTorchLightning/lightning-transformers): Flexible interface for high-performance research using SOTA Transformers leveraging Pytorch Lightning, Transformers, and Hydra.

.github/workflows/ci_dockers.yml

Lines changed: 22 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: CI build Docker
1+
name: Docker
22
# https://www.docker.com/blog/first-docker-github-action-is-here
33
# https://github.com/docker/build-push-action
44
# see: https://help.github.com/en/actions/reference/events-that-trigger-workflows
@@ -23,8 +23,9 @@ jobs:
2323
strategy:
2424
fail-fast: false
2525
matrix:
26-
python_version: ["3.8"]
27-
pytorch_version: ["1.6", "1.8"]
26+
# the config used in '.azure-pipelines/gpu-tests.yml' since the Dockerfile uses the cuda image
27+
python_version: ["3.7"]
28+
pytorch_version: ["1.8"]
2829
steps:
2930
- name: Checkout
3031
uses: actions/checkout@v2
@@ -45,8 +46,9 @@ jobs:
4546
strategy:
4647
fail-fast: false
4748
matrix:
49+
# the config used in '.circleci/config.yml`'
4850
python_version: ["3.7"]
49-
xla_version: ["1.6", "1.8", "nightly"]
51+
xla_version: ["1.8"]
5052
steps:
5153
- name: Checkout
5254
uses: actions/checkout@v2
@@ -60,58 +62,46 @@ jobs:
6062
XLA_VERSION=${{ matrix.xla_version }}
6163
file: dockers/base-xla/Dockerfile
6264
push: false
63-
timeout-minutes: 50
65+
timeout-minutes: 60
6466

6567
build-CUDA:
6668
runs-on: ubuntu-20.04
6769
strategy:
6870
fail-fast: false
6971
matrix:
70-
include:
71-
# todo: see notes in Dockerfile
72-
- python_version: "3.7"
73-
pytorch_version: "1.6"
74-
- python_version: "3.9"
75-
pytorch_version: "1.9"
72+
# the config used in '.azure-pipelines/gpu-tests.yml'
73+
python_version: ["3.7"]
74+
pytorch_version: ["1.8"]
7675
steps:
7776
- name: Checkout
7877
uses: actions/checkout@v2
79-
8078
- name: Build CUDA Docker
8179
# publish master/release
8280
uses: docker/build-push-action@v2
8381
with:
8482
build-args: |
8583
PYTHON_VERSION=${{ matrix.python_version }}
8684
PYTORCH_VERSION=${{ matrix.pytorch_version }}
87-
CUDA_VERSION=10.2
8885
file: dockers/base-cuda/Dockerfile
8986
push: false
90-
timeout-minutes: 50
87+
timeout-minutes: 75
9188

9289
build-Conda:
9390
runs-on: ubuntu-20.04
9491
strategy:
9592
fail-fast: false
9693
matrix:
97-
include:
98-
- python_version: "3.7"
99-
pytorch_version: "1.6"
100-
- python_version: "3.8"
101-
pytorch_version: "1.9"
102-
- python_version: "3.9"
103-
pytorch_version: "1.10"
94+
# the config used in '.github/workflows/ci_test-conda.yml'
95+
python_version: ["3.8"]
96+
pytorch_version: ["1.7", "1.8", "1.9", "1.10"]
10497
steps:
10598
- name: Checkout
10699
uses: actions/checkout@v2
107-
108-
# see: https://pytorch.org/get-started/previous-versions/
109100
- run: |
110101
cuda=$(python -c "from distutils.version import LooseVersion as LVer ; print(11.1 if LVer('${{matrix.pytorch_version}}') > LVer('1.7') else 10.2)" 2>&1)
111102
echo "::set-output name=CUDA::$cuda"
112103
id: extend
113-
114-
- name: Build CUDA Docker
104+
- name: Build Conda Docker
115105
# publish master/release
116106
uses: docker/build-push-action@v2
117107
with:
@@ -121,16 +111,18 @@ jobs:
121111
CUDA_VERSION=${{ steps.extend.outputs.CUDA }}
122112
file: dockers/base-conda/Dockerfile
123113
push: false
124-
timeout-minutes: 50
114+
timeout-minutes: 75
125115

126116
build-ipu:
127117
runs-on: ubuntu-20.04
128118
strategy:
129119
fail-fast: false
130120
matrix:
131-
include:
132-
- python_version: "3.8"
133-
pytorch_version: "1.7"
121+
# the config used in 'dockers/ipu-ci-runner/Dockerfile'
122+
python_version: ["3.9"] # latest
123+
# TODO: upgrade - PopTorch 2.2 uses torch 1.9, see:
124+
# https://docs.graphcore.ai/projects/poptorch-user-guide/en/latest/installation.html#version-compatibility
125+
pytorch_version: ["1.7"]
134126
steps:
135127
- name: Checkout
136128
uses: actions/checkout@v2
@@ -154,4 +146,4 @@ jobs:
154146
PYTORCH_VERSION=${{ matrix.pytorch_version }}
155147
file: dockers/ipu-ci-runner/Dockerfile
156148
push: false
157-
timeout-minutes: 50
149+
timeout-minutes: 60

.github/workflows/ci_pkg-install.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Install pkg
1+
name: Package
22

33
# see: https://help.github.com/en/actions/reference/events-that-trigger-workflows
44
on: # Trigger the workflow on push or pull request, but only for the master branch
@@ -9,7 +9,7 @@ on: # Trigger the workflow on push or pull request, but only for the master bra
99

1010
jobs:
1111

12-
pkg-install:
12+
install:
1313
runs-on: ${{ matrix.os }}
1414
strategy:
1515
fail-fast: false

.github/workflows/ci_schema.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
name: CI action schema
1+
name: Schema
22
on: # Trigger the workflow on push or pull request, but only for the master branch
33
push: {}
44
pull_request:
55
branches: [master, "release/*"]
66

77
jobs:
8-
validate-schema:
8+
check:
99
runs-on: ubuntu-20.04
1010
steps:
1111
- name: Checkout

.github/workflows/ci_test-base.yml

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
name: CI basic testing
1+
# this jobs runs `pytest` over the source directory. It does not install any extra dependencies.
2+
# this is useful to catch errors where an import has been added which is not part of the basic dependencies.
3+
name: Test
24

35
# see: https://help.github.com/en/actions/reference/events-that-trigger-workflows
46
on: # Trigger the workflow on push or pull request, but only for the master branch
@@ -8,15 +10,14 @@ on: # Trigger the workflow on push or pull request, but only for the master bra
810
branches: [master, "release/*"]
911

1012
jobs:
11-
doctest:
12-
13+
source:
1314
runs-on: ${{ matrix.os }}
1415
strategy:
1516
fail-fast: false
16-
# max-parallel: 6
1717
matrix:
18-
os: [ubuntu-20.04, windows-2019, macOS-10.15]
19-
python-version: [3.8]
18+
os: [ubuntu-20.04]
19+
# this will install stable torch
20+
python-version: [3.9]
2021

2122
# Timeout: https://stackoverflow.com/a/59076067/4521646
2223
timeout-minutes: 20
@@ -27,12 +28,6 @@ jobs:
2728
with:
2829
python-version: ${{ matrix.python-version }}
2930

30-
# Github Actions: Run step on specific OS: https://stackoverflow.com/a/57948488/4521646
31-
- name: Setup macOS
32-
if: runner.os == 'macOS'
33-
run: |
34-
brew install libomp # https://github.com/pytorch/pytorch/issues/20030
35-
3631
- name: Weekly reset caching
3732
run: echo "::set-output name=period::$(python -c 'import time ; days = time.time() / 60 / 60 / 24 ; print(int(days / 7))' 2>&1)"
3833
id: times
@@ -54,21 +49,14 @@ jobs:
5449
5550
- name: Install dependencies
5651
run: |
57-
python -m pip install --upgrade --user pip
58-
pip install --requirement ./requirements.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --upgrade
59-
pip install "pytest>6.0" "pytest-cov>2.10" --upgrade-strategy only-if-needed
6052
python --version
53+
python -m pip install --upgrade --user pip
6154
pip --version
55+
pip install --requirement requirements.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --upgrade
56+
pip install --requirement requirements/test.txt
6257
pip list
6358
shell: bash
6459

65-
- name: Cache datasets
66-
uses: actions/cache@v2
67-
with:
68-
path: Datasets # This path is specific to Ubuntu
69-
# Look to see if there is a cache hit for the corresponding requirements file
70-
key: PL-dataset
71-
7260
- name: Test Package [only]
7361
run: |
7462
# NOTE: run coverage on tests does not propagate failure status for Win, https://github.com/nedbat/coveragepy/issues/1003

0 commit comments

Comments
 (0)