Skip to content

Commit 80cfbff

Browse files
author
Shuying Sun
committed
2 parents 89f284d + 51b10f7 commit 80cfbff

File tree

118 files changed

+1907
-5159
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

118 files changed

+1907
-5159
lines changed

.github/workflows/ci_dockers.yml

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,6 @@ jobs:
2929
- name: Checkout
3030
uses: actions/checkout@v2
3131

32-
# https://github.com/docker/setup-buildx-action
33-
# Set up Docker Buildx - to use cache-from and cache-to argument of buildx command
34-
- uses: docker/setup-buildx-action@v1
3532
- name: Build PL Docker
3633
# publish master/release
3734
uses: docker/build-push-action@v2
@@ -54,9 +51,6 @@ jobs:
5451
- name: Checkout
5552
uses: actions/checkout@v2
5653

57-
# https://github.com/docker/setup-buildx-action
58-
# Set up Docker Buildx - to use cache-from and cache-to argument of buildx command
59-
- uses: docker/setup-buildx-action@v1
6054
- name: Build XLA Docker
6155
# publish master/release
6256
uses: docker/build-push-action@v2
@@ -93,9 +87,6 @@ jobs:
9387
echo "::set-output name=CUDA::$cuda"
9488
id: extend
9589
96-
# https://github.com/docker/setup-buildx-action
97-
# Set up Docker Buildx - to use cache-from and cache-to argument of buildx command
98-
- uses: docker/setup-buildx-action@v1
9990
- name: Build CUDA Docker
10091
# publish master/release
10192
uses: docker/build-push-action@v2
@@ -130,9 +121,6 @@ jobs:
130121
echo "::set-output name=CUDA::$cuda"
131122
id: extend
132123
133-
# https://github.com/docker/setup-buildx-action
134-
# Set up Docker Buildx - to use cache-from and cache-to argument of buildx command
135-
- uses: docker/setup-buildx-action@v1
136124
- name: Build CUDA Docker
137125
# publish master/release
138126
uses: docker/build-push-action@v2
@@ -150,10 +138,8 @@ jobs:
150138
steps:
151139
- name: Checkout
152140
uses: actions/checkout@v2
153-
# https://github.com/docker/setup-buildx-action
154-
# Set up Docker Buildx - to use cache-from and cache-to argument of buildx command
155-
- uses: docker/setup-buildx-action@v1
156-
- name: Build CUDA Docker
141+
142+
- name: Build NVIDIA Docker
157143
uses: docker/build-push-action@v2
158144
with:
159145
file: dockers/nvidia/Dockerfile

.github/workflows/docs-checks.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ jobs:
9898
# First run the same pipeline as Read-The-Docs
9999
cd docs
100100
make clean
101-
make html --debug --jobs $(nproc) SPHINXOPTS="-W"
101+
make html --debug --jobs $(nproc) SPHINXOPTS="-W --keep-going"
102102
103103
- name: Upload built docs
104104
uses: actions/upload-artifact@v2

.github/workflows/events-nightly.yml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,3 +126,26 @@ jobs:
126126
push: true
127127
tags: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}
128128
timeout-minutes: 55
129+
130+
# docker-nvidia:
131+
# runs-on: ubuntu-20.04
132+
# steps:
133+
# - name: Checkout
134+
# uses: actions/checkout@v2
135+
#
136+
# # https://github.com/docker/setup-buildx-action
137+
# # Set up Docker Buildx - to use cache-from and cache-to argument of buildx command
138+
# - uses: docker/setup-buildx-action@v1
139+
# - name: Login to DockerHub
140+
# uses: docker/login-action@v1
141+
# with:
142+
# username: ${{ secrets.DOCKER_USERNAME }}
143+
# password: ${{ secrets.DOCKER_PASSWORD }}
144+
#
145+
# - name: Publish NVIDIA to Docker Hub
146+
# uses: docker/build-push-action@v2
147+
# with:
148+
# file: dockers/nvidia/Dockerfile
149+
# push: true
150+
# tags: nvcr.io/pytorchlightning/pytorch_lightning:nvidia
151+
# timeout-minutes: 55

.github/workflows/release-docker.yml

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ on:
88
types: [created]
99

1010
jobs:
11-
build-PL:
11+
cuda-PL:
1212
runs-on: ubuntu-20.04
1313
strategy:
1414
fail-fast: false
@@ -36,3 +36,27 @@ jobs:
3636
build_args: PYTHON_VERSION=${{ matrix.python_version }},PYTORCH_VERSION=${{ matrix.pytorch_version }},LIGHTNING_VERSION=${{ steps.get_version.outputs.RELEASE_VERSION }}
3737
tags: "${{ steps.get_version.outputs.RELEASE_VERSION }}-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }},latest-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}"
3838
timeout-minutes: 55
39+
40+
# nvidia-PL:
41+
# runs-on: ubuntu-20.04
42+
# steps:
43+
# - name: Checkout
44+
# uses: actions/checkout@v2
45+
#
46+
# - name: Get release version
47+
# if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'release'
48+
# id: get_version
49+
# run: echo "::set-output name=RELEASE_VERSION::$(echo ${GITHUB_REF##*/})"
50+
#
51+
# - name: Publish Releases to Docker
52+
# # only on releases
53+
# uses: docker/[email protected]
54+
# if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'release'
55+
# with:
56+
# repository: nvcr.io/pytorchlightning/pytorch_lightning
57+
# username: ${{ secrets.DOCKER_USERNAME }}
58+
# password: ${{ secrets.DOCKER_PASSWORD }}
59+
# dockerfile: dockers/nvidia/Dockerfile
60+
# build_args: LIGHTNING_VERSION=${{ steps.get_version.outputs.RELEASE_VERSION }}
61+
# tags: "${{ steps.get_version.outputs.RELEASE_VERSION }}-nvidia"
62+
# timeout-minutes: 55

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,3 +157,4 @@ tags
157157
data
158158
MNIST
159159
runs
160+
*trace*

.pre-commit-config.yaml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,3 @@ repos:
3333
hooks:
3434
- id: yapf
3535
args: [--parallel, --in-place]
36-
37-
- repo: https://github.com/pre-commit/mirrors-mypy
38-
rev: v0.790
39-
hooks:
40-
- id: mypy

CHANGELOG.md

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
99

1010
### Added
1111

12-
- Added `RetrievalMAP` metric, the corresponding functional version `retrieval_average_precision` and a generic superclass for retrieval metrics `RetrievalMetric` ([#5032](https://github.com/PyTorchLightning/pytorch-lightning/pull/5032))
13-
1412

1513
- Added a way to print to terminal without breaking up the progress bar ([#5470](https://github.com/PyTorchLightning/pytorch-lightning/pull/5470))
1614

15+
1716
- Added support to checkpoint after training steps in `ModelCheckpoint` callback ([#6146](https://github.com/PyTorchLightning/pytorch-lightning/pull/6146))
1817

18+
1919
- Added `checkpoint` parameter to callback's `on_save_checkpoint` hook ([#6072](https://github.com/PyTorchLightning/pytorch-lightning/pull/6072))
2020

2121

@@ -37,11 +37,25 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
3737
- Added arg to `self.log` that enables users to give custom names when dealing with multiple dataloaders ([#6274](https://github.com/PyTorchLightning/pytorch-lightning/pull/6274))
3838

3939

40+
- Added `teardown` method to `BaseProfiler` to enable subclasses defining post-profiling steps outside of `__del__` ([#6370](https://github.com/PyTorchLightning/pytorch-lightning/pull/6370))
41+
42+
43+
- Added `setup` method to `BaseProfiler` to enable subclasses defining pre-profiling steps for every process ([#6633](https://github.com/PyTorchLightning/pytorch-lightning/pull/6633))
44+
45+
4046
- Added no return warning to predict ([#6139](https://github.com/PyTorchLightning/pytorch-lightning/pull/6139))
4147

4248

43-
- Added `outputs` parameter to callback's `on_validation_epoch_end` & `on_test_epoch_end` hooks ([#6120](https://github.com/PyTorchLightning/pytorch-lightning/pull/6120))
49+
- Added `Trainer.predict` config validation ([#6543](https://github.com/PyTorchLightning/pytorch-lightning/pull/6543))
50+
4451

52+
- Added `AbstractProfiler` interface ([#6621](https://github.com/PyTorchLightning/pytorch-lightning/pull/6621))
53+
54+
55+
- Added support for including module names for forward in the autograd trace of `PyTorchProfiler` ([#6349](https://github.com/PyTorchLightning/pytorch-lightning/pull/6349))
56+
57+
58+
- Added `outputs` parameter to callback's `on_validation_epoch_end` & `on_test_epoch_end` hooks ([#6120](https://github.com/PyTorchLightning/pytorch-lightning/pull/6120))
4559

4660

4761
### Changed
@@ -58,6 +72,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
5872
- Changed `setup()` and `teardown()` stage argument to take any of `{fit,validate,test,predict}` ([#6386](https://github.com/PyTorchLightning/pytorch-lightning/pull/6386))
5973

6074

75+
- Changed profilers to save separate report files per state and rank ([#6621](https://github.com/PyTorchLightning/pytorch-lightning/pull/6621))
76+
77+
78+
- Changed `PyTorchProfiler` to use `torch.autograd.profiler.record_function` to record functions ([#6349](https://github.com/PyTorchLightning/pytorch-lightning/pull/6349))
79+
80+
6181
### Deprecated
6282

6383
- `period` has been deprecated in favor of `every_n_val_epochs` in the `ModelCheckpoint` callback ([#6146](https://github.com/PyTorchLightning/pytorch-lightning/pull/6146))
@@ -66,6 +86,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
6686
- Deprecated `trainer.running_sanity_check` in favor of `trainer.sanity_checking` ([#4945](https://github.com/PyTorchLightning/pytorch-lightning/pull/4945))
6787

6888

89+
- Deprecated `Profiler(output_filename)` in favor of `dirpath` and `filename` ([#6621](https://github.com/PyTorchLightning/pytorch-lightning/pull/6621))
90+
91+
92+
- Deprecated `PytorchProfiler(profiled_functions)` in favor of `record_functions` ([#6349](https://github.com/PyTorchLightning/pytorch-lightning/pull/6349))
93+
94+
6995
- Deprecated metrics in favor of `torchmetrics` ([#6505](https://github.com/PyTorchLightning/pytorch-lightning/pull/6505),
7096

7197
[#6530](https://github.com/PyTorchLightning/pytorch-lightning/pull/6530),
@@ -78,6 +104,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
78104

79105
[#6573](https://github.com/PyTorchLightning/pytorch-lightning/pull/6573),
80106

107+
[#6584](https://github.com/PyTorchLightning/pytorch-lightning/pull/6584),
108+
109+
[#6636](https://github.com/PyTorchLightning/pytorch-lightning/pull/6636),
110+
111+
[#6637](https://github.com/PyTorchLightning/pytorch-lightning/pull/6637),
112+
81113
)
82114

83115

@@ -114,6 +146,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
114146

115147
### Fixed
116148

149+
- Added Autocast in validation, test and predict modes for Native AMP ([#6565](https://github.com/PyTorchLightning/pytorch-lightning/pull/6565))
150+
151+
117152
- Made the `Plugin.reduce` method more consistent across all Plugins to reflect a mean-reduction by default ([#6011](https://github.com/PyTorchLightning/pytorch-lightning/pull/6011))
118153

119154

@@ -141,9 +176,18 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
141176
- Fixed LightningModule `all_gather` on cpu tensors ([#6416](https://github.com/PyTorchLightning/pytorch-lightning/pull/6416))
142177

143178

179+
- Fixed a bug where `all_gather` would not work correctly with `tpu_cores=8` ([#6587](https://github.com/PyTorchLightning/pytorch-lightning/pull/6587))
180+
181+
182+
- Update Gradient Clipping for the TPU Accelerator ([#6576](https://github.com/PyTorchLightning/pytorch-lightning/pull/6576))
183+
184+
144185
- Fixed torch distributed not available in setup hook for DDP ([#6506](https://github.com/PyTorchLightning/pytorch-lightning/pull/6506))
145186

146187

188+
- Fixed comparing required versions ([#6434](https://github.com/PyTorchLightning/pytorch-lightning/pull/6434))
189+
190+
147191
## [1.2.4] - 2021-03-16
148192

149193
### Changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,4 @@ test: clean
2929

3030
docs: clean
3131
pip install --quiet -r requirements/docs.txt
32-
python -m sphinx -b html -W docs/source docs/build
32+
python -m sphinx -b html -W --keep-going docs/source docs/build

azure-pipelines.yml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -113,12 +113,14 @@ jobs:
113113
python -m pytest benchmarks -v --maxfail=2 --durations=0
114114
displayName: 'Testing: benchmarks'
115115
116-
- bash: |
116+
- script: |
117+
set -e
117118
python -m pytest pl_examples -v --maxfail=2 --durations=0
118119
python setup.py install --user --quiet
119120
bash pl_examples/run_ddp-example.sh
120-
cd pl_examples/basic_examples
121-
bash submit_ddp_job.sh
122-
bash submit_ddp2_job.sh
123-
pip uninstall -y pytorch-lightning
121+
# cd pl_examples/basic_examples
122+
# bash submit_ddp_job.sh
123+
# bash submit_ddp2_job.sh
124+
env:
125+
PL_USE_MOCKED_MNIST: "1"
124126
displayName: 'Examples'

dockers/nvidia/Dockerfile

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
FROM nvcr.io/nvidia/pytorch:20.12-py3
15+
FROM nvcr.io/nvidia/pytorch:21.02-py3
1616

1717
MAINTAINER PyTorchLightning <https://github.com/PyTorchLightning>
1818

@@ -22,16 +22,17 @@ COPY ./ ./pytorch-lightning/
2222

2323
# install dependencies
2424
RUN \
25-
# Disable cache
2625
#conda install "pip>20.1" && \
27-
#pip config set global.cache-dir false && \
28-
if [ -z $LIGHTNING_VERSION ] ; then \
29-
pip install ./pytorch-lightning --no-cache-dir ; \
26+
pip list | grep torch && \
27+
if [ ! -z "$LIGHTNING_VERSION" ] ; then \
3028
rm -rf pytorch-lightning ; \
31-
else \
32-
rm -rf pytorch-lightning ; \
33-
pip install https://github.com/PyTorchLightning/pytorch-lightning/archive/${LIGHTNING_VERSION}.zip --no-cache-dir ; \
34-
fi
29+
wget https://github.com/PyTorchLightning/pytorch-lightning/archive/${LIGHTNING_VERSION}.zip --progress=bar:force:noscroll ; \
30+
unzip ${LIGHTNING_VERSION}.zip ; \
31+
mv pytorch-lightning-*/ pytorch-lightning ; \
32+
rm *.zip ; \
33+
fi && \
34+
pip install ./pytorch-lightning["extra"] --no-cache-dir && \
35+
rm -rf pytorch-lightning
3536

3637
RUN python --version && \
3738
pip --version && \

dockers/release/Dockerfile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ COPY ./ ./pytorch-lightning/
2525

2626
# install dependencies
2727
RUN \
28-
# Disable cache
2928
#conda install "pip>20.1" && \
3029
if [ ! -z "$LIGHTNING_VERSION" ] ; then \
3130
rm -rf pytorch-lightning ; \

docs/source/advanced/multi_gpu.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ Lightning allows multiple ways of training
267267
- TPUs (``tpu_cores=8|x``) (tpu or TPU pod)
268268

269269
.. note::
270-
If you request multiple GPUs or nodes without setting a mode, DDP will be automatically used.
270+
If you request multiple GPUs or nodes without setting a mode, DDP Spawn will be automatically used.
271271

272272
For a deeper understanding of what Lightning is doing, feel free to read this
273273
`guide <https://medium.com/@_willfalcon/9-tips-for-training-lightning-fast-neural-networks-in-pytorch-8e63a502f565>`_.

docs/source/conf.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
# documentation root, use os.path.abspath to make it absolute, like shown here.
1414

1515
# import m2r
16-
import builtins
1716
import glob
1817
import os
1918
import shutil
@@ -27,10 +26,13 @@
2726

2827
FOLDER_GENERATED = 'generated'
2928
SPHINX_MOCK_REQUIREMENTS = int(os.environ.get('SPHINX_MOCK_REQUIREMENTS', True))
30-
if SPHINX_MOCK_REQUIREMENTS:
31-
builtins.__LIGHTNING_SETUP__ = True
3229

33-
import pytorch_lightning # noqa: E402
30+
try:
31+
from pytorch_lightning import info
32+
except ImportError:
33+
# alternative https://stackoverflow.com/a/67692/4521646
34+
sys.path.append(os.path.join(PATH_ROOT, "pytorch_lightning"))
35+
import info
3436

3537
# -- Project documents -------------------------------------------------------
3638

@@ -79,13 +81,13 @@ def _transform_changelog(path_in: str, path_out: str) -> None:
7981
# -- Project information -----------------------------------------------------
8082

8183
project = 'PyTorch Lightning'
82-
copyright = pytorch_lightning.__copyright__
83-
author = pytorch_lightning.__author__
84+
copyright = info.__copyright__
85+
author = info.__author__
8486

8587
# The short X.Y version
86-
version = pytorch_lightning.__version__
88+
version = info.__version__
8789
# The full version, including alpha/beta/rc tags
88-
release = pytorch_lightning.__version__
90+
release = info.__version__
8991

9092
# -- General configuration ---------------------------------------------------
9193

@@ -176,8 +178,8 @@ def _transform_changelog(path_in: str, path_out: str) -> None:
176178
# documentation.
177179

178180
html_theme_options = {
179-
'pytorch_project': pytorch_lightning.__homepage__,
180-
'canonical_url': pytorch_lightning.__homepage__,
181+
'pytorch_project': info.__homepage__,
182+
'canonical_url': info.__homepage__,
181183
'collapse_navigation': False,
182184
'display_version': True,
183185
'logo_only': False,
@@ -279,6 +281,7 @@ def _transform_changelog(path_in: str, path_out: str) -> None:
279281
'torch': ('https://pytorch.org/docs/stable/', None),
280282
'numpy': ('https://numpy.org/doc/stable/', None),
281283
'PIL': ('https://pillow.readthedocs.io/en/stable/', None),
284+
'torchmetrics': ('https://torchmetrics.readthedocs.io/en/stable/', None),
282285
}
283286

284287
# -- Options for todo extension ----------------------------------------------
@@ -331,6 +334,7 @@ def package_list_from_file(file):
331334
}
332335
MOCK_PACKAGES = []
333336
if SPHINX_MOCK_REQUIREMENTS:
337+
MOCK_PACKAGES += ['fairscale']
334338
# mock also base packages when we are on RTD since we don't install them there
335339
MOCK_PACKAGES += package_list_from_file(os.path.join(PATH_ROOT, 'requirements.txt'))
336340
MOCK_PACKAGES += package_list_from_file(os.path.join(PATH_ROOT, 'requirements', 'extra.txt'))

0 commit comments

Comments
 (0)