Skip to content

Commit 7893eb2

Browse files
authored
Prepare CI to run on 3090s (#14910)
1 parent 4c53eae commit 7893eb2

File tree

6 files changed

+24
-12
lines changed

6 files changed

+24
-12
lines changed

.azure/gpu-tests-lite.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,13 @@ jobs:
6363
6464
- bash: |
6565
set -e
66-
TORCH_VERSION=$(python -c "import torch; print(torch.__version__.split('+')[0])")
67-
CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
66+
PYTORCH_VERSION=$(python -c "import torch; print(torch.__version__.split('+')[0])")
6867
python ./requirements/pytorch/adjust-versions.py requirements/lite/base.txt ${PYTORCH_VERSION}
68+
69+
CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
6970
pip install -e .[strategies] --find-links https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html
7071
pip install --requirement requirements/pytorch/devel.txt --find-links https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html
72+
7173
pip list
7274
env:
7375
PACKAGE_NAME: pytorch

.azure/gpu-tests.yml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,15 +91,19 @@ jobs:
9191
set -e
9292
python -c "fname = 'requirements/pytorch/strategies.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)"
9393
python -c "fname = 'requirements/pytorch/strategies.txt' ; lines = [line for line in open(fname).readlines() if 'bagua' not in line] ; open(fname, 'w').writelines(lines)"
94-
TORCH_VERSION=$(python -c "import torch; print(torch.__version__.split('+')[0])")
95-
CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
96-
CUDA_VERSION_BAGUA=$(python -c "print([ver for ver in [116,113,111,102] if $CUDA_VERSION_MM >= ver][0])")
94+
95+
PYTORCH_VERSION=$(python -c "import torch; print(torch.__version__.split('+')[0])")
9796
python ./requirements/pytorch/adjust-versions.py requirements/pytorch/base.txt ${PYTORCH_VERSION}
9897
python ./requirements/pytorch/adjust-versions.py requirements/pytorch/extra.txt ${PYTORCH_VERSION}
9998
python ./requirements/pytorch/adjust-versions.py requirements/pytorch/examples.txt ${PYTORCH_VERSION}
100-
pip install "bagua-cuda$CUDA_VERSION_BAGUA"
99+
100+
CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
101101
pip install -e .[strategies] --find-links https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html
102102
pip install --requirement requirements/pytorch/devel.txt --find-links https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html
103+
104+
CUDA_VERSION_BAGUA=$(python -c "print([ver for ver in [116,113,111,102] if $CUDA_VERSION_MM >= ver][0])")
105+
pip install "bagua-cuda$CUDA_VERSION_BAGUA"
106+
103107
pip list
104108
env:
105109
PACKAGE_NAME: pytorch

tests/tests_pytorch/profilers/test_profiler.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -474,10 +474,9 @@ def look_for_trace(trace_dir):
474474

475475

476476
@RunIf(min_cuda_gpus=1, standalone=True)
477-
def test_pytorch_profiler_nested_emit_nvtx(tmpdir):
477+
def test_pytorch_profiler_nested_emit_nvtx():
478478
"""This test check emit_nvtx is correctly supported."""
479479
profiler = PyTorchProfiler(use_cuda=True, emit_nvtx=True)
480-
481480
model = BoringModel()
482481
trainer = Trainer(
483482
fast_dev_run=True,

tests/tests_pytorch/run_standalone_tasks.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,11 @@
1515
set -e
1616
# THIS FILE ASSUMES IT IS RUN INSIDE THE tests/tests_pytorch DIRECTORY
1717

18-
if nvcc --version; then
18+
# this environment variable allows special tests to run
19+
export PL_RUN_STANDALONE_TESTS=1
20+
21+
can_run_nvprof=$(python -c "import torch; print(torch.cuda.is_available() and torch.cuda.get_device_capability()[0] < 8)")
22+
if [[ $can_run_nvprof == "True" ]]; then
1923
echo "Running profilers/test_profiler.py::test_pytorch_profiler_nested_emit_nvtx"
2024
nvprof --profile-from-start off -o trace_name.prof -- python -m coverage run --source pytorch_lightning --append -m pytest --no-header profilers/test_profiler.py::test_pytorch_profiler_nested_emit_nvtx
2125
fi

tests/tests_pytorch/run_standalone_tests.sh

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,6 @@ path_suffix=$(basename "$(dirname "$(pwd)")")/$(basename "$(pwd)")"/" # https:/
4343
parametrizations=${parametrizations//$path_suffix/}
4444
parametrizations_arr=($parametrizations)
4545

46-
# tests to skip - space separated
47-
blocklist='profilers/test_profiler.py::test_pytorch_profiler_nested_emit_nvtx utilities/test_warnings.py'
4846
report=''
4947

5048
rm -f standalone_test_output.txt # in case it exists, remove it
@@ -60,7 +58,8 @@ for i in "${!parametrizations_arr[@]}"; do
6058
parametrization=${parametrizations_arr[$i]}
6159

6260
# check blocklist
63-
if echo $blocklist | grep -F "${parametrization}"; then
61+
if [[ "${parametrization}" == *"test_pytorch_profiler_nested_emit_nvtx"* ]]; then
62+
echo "Skipping $parametrization"
6463
report+="Skipped\t$parametrization\n"
6564
# do not continue the loop because we might need to wait for batched jobs
6665
else

tests/tests_pytorch/strategies/test_bagua_strategy.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ def test_bagua_default(tmpdir):
4545
assert isinstance(trainer.strategy, BaguaStrategy)
4646

4747

48+
@pytest.mark.skipif(
49+
torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8,
50+
reason="Async does not support this CUDA architecture",
51+
)
4852
@RunIf(min_cuda_gpus=2, standalone=True, bagua=True)
4953
def test_async_algorithm(tmpdir):
5054
model = BoringModel()

0 commit comments

Comments
 (0)