Lightning-AI
diff --git a/‎.azure-pipelines/gpu-benchmark.yml
Lines changed: 2 additions & 2 deletions b/‎.azure-pipelines/gpu-benchmark.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.azure-pipelines/gpu-tests.yml
Lines changed: 5 additions & 7 deletions b/‎.azure-pipelines/gpu-tests.yml
Lines changed: 5 additions & 7 deletions
diff --git a/‎.codecov.yml
Lines changed: 1 addition & 1 deletion b/‎.codecov.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.deepsource.toml
Lines changed: 0 additions & 26 deletions b/‎.deepsource.toml
Lines changed: 0 additions & 26 deletions
diff --git a/‎.github/CODEOWNERS
Lines changed: 2 additions & 1 deletion b/‎.github/CODEOWNERS
Lines changed: 2 additions & 1 deletion
diff --git a/‎.github/CONTRIBUTING.md
Lines changed: 0 additions & 2 deletions b/‎.github/CONTRIBUTING.md
Lines changed: 0 additions & 2 deletions
diff --git a/‎.github/ISSUE_TEMPLATE/code_improvement.md
Lines changed: 4 additions & 6 deletions b/‎.github/ISSUE_TEMPLATE/code_improvement.md
Lines changed: 4 additions & 6 deletions
diff --git a/‎.github/ISSUE_TEMPLATE/documentation.md
Lines changed: 14 additions & 0 deletions b/‎.github/ISSUE_TEMPLATE/documentation.md
Lines changed: 14 additions & 0 deletions
diff --git a/‎.github/ISSUE_TEMPLATE/feature_request.md
Lines changed: 4 additions & 6 deletions b/‎.github/ISSUE_TEMPLATE/feature_request.md
Lines changed: 4 additions & 6 deletions
diff --git a/‎.github/workflows/ci_dockers.yml
Lines changed: 22 additions & 30 deletions b/‎.github/workflows/ci_dockers.yml
Lines changed: 22 additions & 30 deletions
diff --git a/‎.github/workflows/ci_pkg-install.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/ci_pkg-install.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/ci_schema.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/ci_schema.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/ci_test-base.yml
Lines changed: 10 additions & 22 deletions b/‎.github/workflows/ci_test-base.yml
Lines changed: 10 additions & 22 deletions
@@ -28,8 +28,8 @@ jobs:
     cancelTimeoutInMinutes: "2"
     pool: gridai-spot-pool
     container:
-      # base ML image: mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04
-      image: "pytorchlightning/pytorch_lightning:base-cuda-py3.8-torch1.8"
+      # should match the one in '.azure-pipelines/gpu-benchmark.yml'
+      image: "pytorchlightning/pytorch_lightning:base-cuda-py3.7-torch1.8"
       options: "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=32g"
     workspace:
       clean: all
 
@@ -23,7 +23,7 @@ jobs:
     # how much time to give 'run always even if cancelled tasks' before stopping them
     cancelTimeoutInMinutes: "2"
 
-    pool: gridai-spot-pool
+    pool: azure-gpus-spot
 
     # ToDo: this need to have installed docker in the base image...
     container:
@@ -50,8 +50,8 @@ jobs:
 
     - bash: |
         python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)"
-        pip install fairscale>=0.3.4
-        pip install "deepspeed==0.4.3" # FIXME: bug with >= 0.4.4
+        pip install fairscale==0.4.0
+        pip install deepspeed==0.5.4
         pip install . --requirement requirements/devel.txt
         pip list
       displayName: 'Install dependencies'
@@ -106,10 +106,8 @@ jobs:
         set -e
         python -m pytest pl_examples -v --maxfail=2 --durations=0
         bash pl_examples/run_examples.sh --trainer.gpus=1
-        bash pl_examples/run_examples.sh --trainer.gpus=2 --trainer.accelerator=ddp
-        bash pl_examples/run_examples.sh --trainer.gpus=2 --trainer.accelerator=ddp --trainer.precision=16
-        bash pl_examples/run_examples.sh --trainer.gpus=2 --trainer.accelerator=dp
-        bash pl_examples/run_examples.sh --trainer.gpus=2 --trainer.accelerator=dp --trainer.precision=16
+        bash pl_examples/run_examples.sh --trainer.gpus=2 --trainer.strategy=ddp
+        bash pl_examples/run_examples.sh --trainer.gpus=2 --trainer.strategy=ddp --trainer.precision=16
       env:
         PL_USE_MOCKED_MNIST: "1"
       displayName: 'Testing: examples'
 
@@ -45,7 +45,7 @@ coverage:
     patch:
       default:
         target: 50% # specify the target "X%" coverage to hit
-        # threshold: 50% # allow this much decrease on patch
+        threshold: 5% # allow this much decrease on patch
     changes: false
 
 # https://docs.codecov.com/docs/github-checks#disabling-github-checks-patch-annotations
 
@@ -5,7 +5,7 @@
 # the repo. Unless a later match takes precedence,
 # @global-owner1 and @global-owner2 will be requested for
 # review when someone opens a pull request.
-* @williamfalcon @borda @tchaton @SeanNaren @carmocca @awaelchli @justusschock @kaushikb11
+* @williamfalcon @borda @tchaton @SeanNaren @carmocca @awaelchli @justusschock @kaushikb11 @rohitgr7
 
 # CI/CD and configs
 /.github/       @borda @tchaton @carmocca
@@ -23,6 +23,7 @@
 /pytorch_lightning/callbacks            @williamfalcon @tchaton @carmocca @borda @kaushikb11
 /pytorch_lightning/core                 @tchaton @SeanNaren @borda @carmocca @justusschock @kaushikb11
 /pytorch_lightning/distributed          @williamfalcon @tchaton @awaelchli @kaushikb11
+/pytorch_lightning/lite                 @tchaton @awaelchli @carmocca
 /pytorch_lightning/loggers              @tchaton @awaelchli @borda
 /pytorch_lightning/loggers/wandb.py     @borisdayma
 /pytorch_lightning/loggers/neptune.py   @shnela @HubertJaworski @pkasprzyk @pitercl @Raalsky @aniezurawski @kamil-kaczmarek
 
@@ -316,8 +316,6 @@ def test_explain_what_is_being_tested(tmpdir):
     Test description about text reason to be
     """
 
-    # os.environ["PL_DEV_DEBUG"] = '1' # [OPTIONAL] When activated, you can use internal trainer.dev_debugger
-
     class ExtendedModel(BoringModel):
         ...
 
 
@@ -26,14 +26,12 @@ ______________________________________________________________________
 
 #### If you enjoy Lightning, check out our other projects! ⚡
 
-<sub>
-
 - [**Metrics**](https://github.com/PyTorchLightning/metrics): Machine learning metrics for distributed, scalable PyTorch applications.
 
-- [**Flash**](https://github.com/PyTorchLightning/lightning-flash): The fastest way to get a Lightning baseline! A collection of tasks for fast prototyping, baselining, finetuning and solving problems with deep learning
+- [**Lite**](https://pytorch-lightning.readthedocs.io/en/latest/starter/lightning_lite.html): enables pure PyTorch users to scale their existing code on any kind of device while retaining full control over their own loops and optimization logic.
 
-- [**Bolts**](https://github.com/PyTorchLightning/lightning-bolts): Pretrained SOTA Deep Learning models, callbacks and more for research and production with PyTorch Lightning and PyTorch
+- [**Flash**](https://github.com/PyTorchLightning/lightning-flash): The fastest way to get a Lightning baseline! A collection of tasks for fast prototyping, baselining, fine-tuning, and solving problems with deep learning.
 
-- [**Lightning Transformers**](https://github.com/PyTorchLightning/lightning-transformers): Flexible interface for high performance research using SOTA Transformers leveraging Pytorch Lightning, Transformers, and Hydra.
+- [**Bolts**](https://github.com/PyTorchLightning/lightning-bolts): Pretrained SOTA Deep Learning models, callbacks, and more for research and production with PyTorch Lightning and PyTorch.
 
-</sub>
+- [**Lightning Transformers**](https://github.com/PyTorchLightning/lightning-transformers): Flexible interface for high-performance research using SOTA Transformers leveraging Pytorch Lightning, Transformers, and Hydra.
@@ -17,3 +17,17 @@ For typos and doc fixes, please go ahead and:
 For very simple fixes, you can submit a PR without a linked issue.
 
 Thanks!
+
+______________________________________________________________________
+
+#### If you enjoy Lightning, check out our other projects! ⚡
+
+- [**Metrics**](https://github.com/PyTorchLightning/metrics): Machine learning metrics for distributed, scalable PyTorch applications.
+
+- [**Lite**](https://pytorch-lightning.readthedocs.io/en/latest/starter/lightning_lite.html): enables pure PyTorch users to scale their existing code on any kind of device while retaining full control over their own loops and optimization logic.
+
+- [**Flash**](https://github.com/PyTorchLightning/lightning-flash): The fastest way to get a Lightning baseline! A collection of tasks for fast prototyping, baselining, fine-tuning, and solving problems with deep learning.
+
+- [**Bolts**](https://github.com/PyTorchLightning/lightning-bolts): Pretrained SOTA Deep Learning models, callbacks, and more for research and production with PyTorch Lightning and PyTorch.
+
+- [**Lightning Transformers**](https://github.com/PyTorchLightning/lightning-transformers): Flexible interface for high-performance research using SOTA Transformers leveraging Pytorch Lightning, Transformers, and Hydra.
@@ -30,14 +30,12 @@ ______________________________________________________________________
 
 #### If you enjoy Lightning, check out our other projects! ⚡
 
-<sub>
-
 - [**Metrics**](https://github.com/PyTorchLightning/metrics): Machine learning metrics for distributed, scalable PyTorch applications.
 
-- [**Flash**](https://github.com/PyTorchLightning/lightning-flash): The fastest way to get a Lightning baseline! A collection of tasks for fast prototyping, baselining, finetuning and solving problems with deep learning
+- [**Lite**](https://pytorch-lightning.readthedocs.io/en/latest/starter/lightning_lite.html): enables pure PyTorch users to scale their existing code on any kind of device while retaining full control over their own loops and optimization logic.
 
-- [**Bolts**](https://github.com/PyTorchLightning/lightning-bolts): Pretrained SOTA Deep Learning models, callbacks and more for research and production with PyTorch Lightning and PyTorch
+- [**Flash**](https://github.com/PyTorchLightning/lightning-flash): The fastest way to get a Lightning baseline! A collection of tasks for fast prototyping, baselining, fine-tuning, and solving problems with deep learning.
 
-- [**Lightning Transformers**](https://github.com/PyTorchLightning/lightning-transformers): Flexible interface for high performance research using SOTA Transformers leveraging Pytorch Lightning, Transformers, and Hydra.
+- [**Bolts**](https://github.com/PyTorchLightning/lightning-bolts): Pretrained SOTA Deep Learning models, callbacks, and more for research and production with PyTorch Lightning and PyTorch.
 
-</sub>
+- [**Lightning Transformers**](https://github.com/PyTorchLightning/lightning-transformers): Flexible interface for high-performance research using SOTA Transformers leveraging Pytorch Lightning, Transformers, and Hydra.
@@ -1,4 +1,4 @@
-name: CI build Docker
+name: Docker
 # https://www.docker.com/blog/first-docker-github-action-is-here
 # https://github.com/docker/build-push-action
 # see: https://help.github.com/en/actions/reference/events-that-trigger-workflows
@@ -23,8 +23,9 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python_version: ["3.8"]
-        pytorch_version: ["1.6", "1.8"]
+        # the config used in '.azure-pipelines/gpu-tests.yml' since the Dockerfile uses the cuda image
+        python_version: ["3.7"]
+        pytorch_version: ["1.8"]
     steps:
       - name: Checkout
         uses: actions/checkout@v2
@@ -45,8 +46,9 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
+        # the config used in '.circleci/config.yml`'
         python_version: ["3.7"]
-        xla_version: ["1.6", "1.8", "nightly"]
+        xla_version: ["1.8"]
     steps:
       - name: Checkout
         uses: actions/checkout@v2
@@ -60,58 +62,46 @@ jobs:
             XLA_VERSION=${{ matrix.xla_version }}
           file: dockers/base-xla/Dockerfile
           push: false
-        timeout-minutes: 50
+        timeout-minutes: 60
 
   build-CUDA:
     runs-on: ubuntu-20.04
     strategy:
       fail-fast: false
       matrix:
-        include:
-          # todo: see notes in Dockerfile
-          - python_version: "3.7"
-            pytorch_version: "1.6"
-          - python_version: "3.9"
-            pytorch_version: "1.9"
+        # the config used in '.azure-pipelines/gpu-tests.yml'
+        python_version: ["3.7"]
+        pytorch_version: ["1.8"]
     steps:
       - name: Checkout
         uses: actions/checkout@v2
-
       - name: Build CUDA Docker
         # publish master/release
         uses: docker/build-push-action@v2
         with:
           build-args: |
             PYTHON_VERSION=${{ matrix.python_version }}
             PYTORCH_VERSION=${{ matrix.pytorch_version }}
-            CUDA_VERSION=10.2
           file: dockers/base-cuda/Dockerfile
           push: false
-        timeout-minutes: 50
+        timeout-minutes: 75
 
   build-Conda:
     runs-on: ubuntu-20.04
     strategy:
       fail-fast: false
       matrix:
-        include:
-          - python_version: "3.7"
-            pytorch_version: "1.6"
-          - python_version: "3.8"
-            pytorch_version: "1.9"
-          - python_version: "3.9"
-            pytorch_version: "1.10"
+        # the config used in '.github/workflows/ci_test-conda.yml'
+        python_version: ["3.8"]
+        pytorch_version: ["1.7", "1.8", "1.9", "1.10"]
     steps:
       - name: Checkout
         uses: actions/checkout@v2
-
-      # see: https://pytorch.org/get-started/previous-versions/
       - run: |
           cuda=$(python -c "from distutils.version import LooseVersion as LVer ; print(11.1 if LVer('${{matrix.pytorch_version}}') > LVer('1.7') else 10.2)" 2>&1)
           echo "::set-output name=CUDA::$cuda"
         id: extend
-
-      - name: Build CUDA Docker
+      - name: Build Conda Docker
         # publish master/release
         uses: docker/build-push-action@v2
         with:
@@ -121,16 +111,18 @@ jobs:
             CUDA_VERSION=${{ steps.extend.outputs.CUDA }}
           file: dockers/base-conda/Dockerfile
           push: false
-        timeout-minutes: 50
+        timeout-minutes: 75
 
   build-ipu:
     runs-on: ubuntu-20.04
     strategy:
       fail-fast: false
       matrix:
-        include:
-          - python_version: "3.8"
-            pytorch_version: "1.7"
+        # the config used in 'dockers/ipu-ci-runner/Dockerfile'
+        python_version: ["3.9"]  # latest
+        # TODO: upgrade - PopTorch 2.2 uses torch 1.9, see:
+        # https://docs.graphcore.ai/projects/poptorch-user-guide/en/latest/installation.html#version-compatibility
+        pytorch_version: ["1.7"]
     steps:
       - name: Checkout
         uses: actions/checkout@v2
@@ -154,4 +146,4 @@ jobs:
             PYTORCH_VERSION=${{ matrix.pytorch_version }}
           file: dockers/ipu-ci-runner/Dockerfile
           push: false
-        timeout-minutes: 50
+        timeout-minutes: 60
@@ -1,4 +1,4 @@
-name: Install pkg
+name: Package
 
 # see: https://help.github.com/en/actions/reference/events-that-trigger-workflows
 on:  # Trigger the workflow on push or pull request, but only for the master branch
@@ -9,7 +9,7 @@ on:  # Trigger the workflow on push or pull request, but only for the master bra
 
 jobs:
 
-  pkg-install:
+  install:
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
 
@@ -1,11 +1,11 @@
-name: CI action schema
+name: Schema
 on: # Trigger the workflow on push or pull request, but only for the master branch
   push: {}
   pull_request:
     branches: [master, "release/*"]
 
 jobs:
-  validate-schema:
+  check:
     runs-on: ubuntu-20.04
     steps:
       - name: Checkout
 
@@ -1,4 +1,6 @@
-name: CI basic testing
+# this jobs runs `pytest` over the source directory. It does not install any extra dependencies.
+# this is useful to catch errors where an import has been added which is not part of the basic dependencies.
+name: Test
 
 # see: https://help.github.com/en/actions/reference/events-that-trigger-workflows
 on:  # Trigger the workflow on push or pull request, but only for the master branch
@@ -8,15 +10,14 @@ on:  # Trigger the workflow on push or pull request, but only for the master bra
     branches: [master, "release/*"]
 
 jobs:
-  doctest:
-
+  source:
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
-      # max-parallel: 6
       matrix:
-        os: [ubuntu-20.04, windows-2019, macOS-10.15]
-        python-version: [3.8]
+        os: [ubuntu-20.04]
+        # this will install stable torch
+        python-version: [3.9]
 
     # Timeout: https://stackoverflow.com/a/59076067/4521646
     timeout-minutes: 20
@@ -27,12 +28,6 @@ jobs:
       with:
         python-version: ${{ matrix.python-version }}
 
-    # Github Actions: Run step on specific OS: https://stackoverflow.com/a/57948488/4521646
-    - name: Setup macOS
-      if: runner.os == 'macOS'
-      run: |
-        brew install libomp  # https://github.com/pytorch/pytorch/issues/20030
-
     - name: Weekly reset caching
       run: echo "::set-output name=period::$(python -c 'import time ; days = time.time() / 60 / 60 / 24 ; print(int(days / 7))' 2>&1)"
       id: times
@@ -54,21 +49,14 @@ jobs:
 
     - name: Install dependencies
       run: |
-        python -m pip install --upgrade --user pip
-        pip install --requirement ./requirements.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --upgrade
-        pip install "pytest>6.0" "pytest-cov>2.10" --upgrade-strategy only-if-needed
         python --version
+        python -m pip install --upgrade --user pip
         pip --version
+        pip install --requirement requirements.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --upgrade
+        pip install --requirement requirements/test.txt
         pip list
       shell: bash
 
-    - name: Cache datasets
-      uses: actions/cache@v2
-      with:
-        path: Datasets # This path is specific to Ubuntu
-        # Look to see if there is a cache hit for the corresponding requirements file
-        key: PL-dataset
-
     - name: Test Package [only]
       run: |
         # NOTE: run coverage on tests does not propagate failure status for Win, https://github.com/nedbat/coveragepy/issues/1003