diff --git a/.azure/gpu-tests-lite.yml b/.azure/gpu-tests-lite.yml
index 66fc3951b9ce1..bf0ed0a0b9e2f 100644
--- a/.azure/gpu-tests-lite.yml
+++ b/.azure/gpu-tests-lite.yml
@@ -63,11 +63,13 @@ jobs:
 
     - bash: |
         set -e
-        TORCH_VERSION=$(python -c "import torch; print(torch.__version__.split('+')[0])")
-        CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
+        PYTORCH_VERSION=$(python -c "import torch; print(torch.__version__.split('+')[0])")
         python ./requirements/pytorch/adjust-versions.py requirements/lite/base.txt ${PYTORCH_VERSION}
+
+        CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
         pip install -e .[strategies] --find-links https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html
         pip install --requirement requirements/pytorch/devel.txt --find-links https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html
+
         pip list
       env:
         PACKAGE_NAME: pytorch
diff --git a/.azure/gpu-tests.yml b/.azure/gpu-tests.yml
index 1e589e708cb39..42c153a5b50ae 100644
--- a/.azure/gpu-tests.yml
+++ b/.azure/gpu-tests.yml
@@ -91,15 +91,19 @@ jobs:
         set -e
         python -c "fname = 'requirements/pytorch/strategies.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)"
         python -c "fname = 'requirements/pytorch/strategies.txt' ; lines = [line for line in open(fname).readlines() if 'bagua' not in line] ; open(fname, 'w').writelines(lines)"
-        TORCH_VERSION=$(python -c "import torch; print(torch.__version__.split('+')[0])")
-        CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
-        CUDA_VERSION_BAGUA=$(python -c "print([ver for ver in [116,113,111,102] if $CUDA_VERSION_MM >= ver][0])")
+
+        PYTORCH_VERSION=$(python -c "import torch; print(torch.__version__.split('+')[0])")
         python ./requirements/pytorch/adjust-versions.py requirements/pytorch/base.txt ${PYTORCH_VERSION}
         python ./requirements/pytorch/adjust-versions.py requirements/pytorch/extra.txt ${PYTORCH_VERSION}
         python ./requirements/pytorch/adjust-versions.py requirements/pytorch/examples.txt ${PYTORCH_VERSION}
-        pip install "bagua-cuda$CUDA_VERSION_BAGUA"
+
+        CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
         pip install -e .[strategies] --find-links https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html
         pip install --requirement requirements/pytorch/devel.txt --find-links https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html
+
+        CUDA_VERSION_BAGUA=$(python -c "print([ver for ver in [116,113,111,102] if $CUDA_VERSION_MM >= ver][0])")
+        pip install "bagua-cuda$CUDA_VERSION_BAGUA"
+
         pip list
       env:
         PACKAGE_NAME: pytorch
diff --git a/tests/tests_pytorch/profilers/test_profiler.py b/tests/tests_pytorch/profilers/test_profiler.py
index 2e3b868407d7f..1ed1212840234 100644
--- a/tests/tests_pytorch/profilers/test_profiler.py
+++ b/tests/tests_pytorch/profilers/test_profiler.py
@@ -474,10 +474,9 @@ def look_for_trace(trace_dir):
 
 
 @RunIf(min_cuda_gpus=1, standalone=True)
-def test_pytorch_profiler_nested_emit_nvtx(tmpdir):
+def test_pytorch_profiler_nested_emit_nvtx():
     """This test check emit_nvtx is correctly supported."""
     profiler = PyTorchProfiler(use_cuda=True, emit_nvtx=True)
-
     model = BoringModel()
     trainer = Trainer(
         fast_dev_run=True,
diff --git a/tests/tests_pytorch/run_standalone_tasks.sh b/tests/tests_pytorch/run_standalone_tasks.sh
index 0abe25d76c638..9c9971dad01b7 100644
--- a/tests/tests_pytorch/run_standalone_tasks.sh
+++ b/tests/tests_pytorch/run_standalone_tasks.sh
@@ -15,7 +15,11 @@
 set -e
 # THIS FILE ASSUMES IT IS RUN INSIDE THE tests/tests_pytorch DIRECTORY
 
-if nvcc --version; then
+# this environment variable allows special tests to run
+export PL_RUN_STANDALONE_TESTS=1
+
+can_run_nvprof=$(python -c "import torch; print(torch.cuda.is_available() and torch.cuda.get_device_capability()[0] < 8)")
+if [[ $can_run_nvprof == "True" ]]; then
     echo "Running profilers/test_profiler.py::test_pytorch_profiler_nested_emit_nvtx"
     nvprof --profile-from-start off -o trace_name.prof -- python -m coverage run --source pytorch_lightning --append -m pytest --no-header profilers/test_profiler.py::test_pytorch_profiler_nested_emit_nvtx
 fi
diff --git a/tests/tests_pytorch/run_standalone_tests.sh b/tests/tests_pytorch/run_standalone_tests.sh
index fa6bda6706bc8..43021ddbf7d14 100644
--- a/tests/tests_pytorch/run_standalone_tests.sh
+++ b/tests/tests_pytorch/run_standalone_tests.sh
@@ -43,8 +43,6 @@ path_suffix=$(basename "$(dirname "$(pwd)")")/$(basename "$(pwd)")"/"  # https:/
 parametrizations=${parametrizations//$path_suffix/}
 parametrizations_arr=($parametrizations)
 
-# tests to skip - space separated
-blocklist='profilers/test_profiler.py::test_pytorch_profiler_nested_emit_nvtx utilities/test_warnings.py'
 report=''
 
 rm -f standalone_test_output.txt  # in case it exists, remove it
@@ -60,7 +58,8 @@ for i in "${!parametrizations_arr[@]}"; do
   parametrization=${parametrizations_arr[$i]}
 
   # check blocklist
-  if echo $blocklist | grep -F "${parametrization}"; then
+  if [[ "${parametrization}" == *"test_pytorch_profiler_nested_emit_nvtx"* ]]; then
+    echo "Skipping $parametrization"
     report+="Skipped\t$parametrization\n"
     # do not continue the loop because we might need to wait for batched jobs
   else
diff --git a/tests/tests_pytorch/strategies/test_bagua_strategy.py b/tests/tests_pytorch/strategies/test_bagua_strategy.py
index 9c36552789615..4a9912ca00c90 100644
--- a/tests/tests_pytorch/strategies/test_bagua_strategy.py
+++ b/tests/tests_pytorch/strategies/test_bagua_strategy.py
@@ -45,6 +45,10 @@ def test_bagua_default(tmpdir):
     assert isinstance(trainer.strategy, BaguaStrategy)
 
 
+@pytest.mark.skipif(
+    torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8,
+    reason="Async does not support this CUDA architecture",
+)
 @RunIf(min_cuda_gpus=2, standalone=True, bagua=True)
 def test_async_algorithm(tmpdir):
     model = BoringModel()