ci: resolve standalone testing (#20633)

Borda · lexierule · commit edb445184af8 · 2025-03-18T17:08:54.000-04:00
* ci: resolve standalone testing * faster * merge * printenv * here * list * prune * process * printf * stdout * ./ * -e * .coverage * all * rev * notes * notes * notes (cherry picked from commit e8d70bc)
diff --git a/.azure/gpu-benchmarks.yml b/.azure/gpu-benchmarks.yml
@@ -108,5 +108,6 @@ jobs:
         condition: and(succeeded(), eq(variables['PACKAGE_NAME'], 'fabric'))
         env:
           PL_RUN_CUDA_TESTS: "1"
+          PL_RUN_STANDALONE_TESTS: "1"
         displayName: "Testing: fabric standalone tasks"
         timeoutInMinutes: "10"
diff --git a/.azure/gpu-tests-fabric.yml b/.azure/gpu-tests-fabric.yml
@@ -144,6 +144,7 @@ jobs:
         workingDirectory: tests/
         env:
           PL_STANDALONE_TESTS_SOURCE: $(COVERAGE_SOURCE)
+          PL_RUN_STANDALONE_TESTS: "1"
         displayName: "Testing: fabric standalone"
         timeoutInMinutes: "10"
 
diff --git a/.azure/gpu-tests-pytorch.yml b/.azure/gpu-tests-pytorch.yml
@@ -166,6 +166,7 @@ jobs:
         env:
           PL_USE_MOCKED_MNIST: "1"
           PL_STANDALONE_TESTS_SOURCE: $(COVERAGE_SOURCE)
+          PL_RUN_STANDALONE_TESTS: "1"
         displayName: "Testing: PyTorch standalone tests"
         timeoutInMinutes: "35"
 
diff --git a/tests/run_standalone_tests.sh b/tests/run_standalone_tests.sh
@@ -12,44 +12,61 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-set -e
-# THIS FILE ASSUMES IT IS RUN INSIDE THE tests/tests_<package> DIRECTORY
+
+# THIS FILE ASSUMES IT IS RUN INSIDE THE tests DIRECTORY.
 
 # Batch size for testing: Determines how many standalone test invocations run in parallel
-# It can be set through the env variable PL_STANDALONE_TESTS_BATCH_SIZE and defaults to 6 if not set
-test_batch_size="${PL_STANDALONE_TESTS_BATCH_SIZE:-3}"
-source="${PL_STANDALONE_TESTS_SOURCE:-"lightning"}"
-# this is the directory where the tests are located
+# It can be set through the env variable NUM_PARALLEL_TESTS and defaults to 5 if not set
+test_batch_size="${NUM_PARALLEL_TESTS:-5}"
+
+# Source directory for coverage runs can be set with CODECOV_SOURCE and defaults to lightning.
+codecov_source="${CODECOV_SOURCE:-"lightning"}"
+
+# The test directory is passed as the first argument to the script
 test_dir=$1 # parse the first argument
+
+# There is also timeout for the tests.
+# It can be set through the env variable TEST_TIMEOUT and defaults to 1200 seconds if not set 1200 seconds
+test_timeout="${TEST_TIMEOUT:-1200}"
+
+# Temporary file to store the collected tests
 COLLECTED_TESTS_FILE="collected_tests.txt"
 
 ls -lh .  # show the contents of the directory
 
-# this environment variable allows special tests to run
-export PL_RUN_STANDALONE_TESTS=1
-# python arguments
-defaults=" -m coverage run --source ${source} --append -m pytest --no-header -v -s --timeout 120 "
+# Python arguments for running the tests and coverage
+defaults=" -m coverage run --source ${codecov_source} --append -m pytest --no-header -v -s --color=yes --timeout=${test_timeout} --durations=0 "
 echo "Using defaults: ${defaults}"
 
-# get the list of parametrizations. we need to call them separately. the last two lines are removed.
+# Get the list of parametrizations. we need to call them separately. the last two lines are removed.
 # note: if there's a syntax error, this will fail with some garbled output
-python3 -um pytest $test_dir -q --collect-only --pythonwarnings ignore 2>&1 > $COLLECTED_TESTS_FILE
-# early terminate if collection failed (e.g. syntax error)
+python -um pytest ${test_dir} -q --collect-only --pythonwarnings ignore 2>&1 > $COLLECTED_TESTS_FILE
+# Early terminate if collection failed (e.g. syntax error)
 if [[ $? != 0 ]]; then
   cat $COLLECTED_TESTS_FILE
+  printf "ERROR: test collection failed!\n"
   exit 1
 fi
 
-# removes the last line of the file
-sed -i '$d' $COLLECTED_TESTS_FILE
+# Initialize empty array
+tests=()
 
-# Get test list and run each test individually
-tests=($(grep -oP '\S+::test_\S+' "$COLLECTED_TESTS_FILE"))
+# Read from file line by line
+while IFS= read -r line; do
+    # Only keep lines containing "test_"
+    if [[ $line == *"test_"* ]]; then
+        # Extract part after test_dir/
+        pruned_line="${line#*${test_dir}/}"
+        tests+=("${test_dir}/$pruned_line")
+    fi
+done < $COLLECTED_TESTS_FILE
+
+# Count tests
 test_count=${#tests[@]}
-# present the collected tests
+
+# Display results
 printf "collected $test_count tests:\n-------------------\n"
-# replace space with new line
-echo "${tests[@]}" | tr ' ' '\n'
+printf "%s\n" "${tests[@]}"
 printf "\n===================\n"
 
 # if test count is one print warning
@@ -63,55 +80,73 @@ fi
 # clear all the collected reports
 rm -f parallel_test_output-*.txt  # in case it exists, remove it
 
-
-status=0 # reset the script status
+status=0 # aggregated script status
 report="" # final report
 pids=() # array of PID for running tests
 test_ids=() # array of indexes of running tests
-printf "Running $test_count tests in batches of $test_batch_size\n"
+failed_tests=() # array of failed tests
+printf "Running $test_count tests in batches of $test_batch_size:\n"
 for i in "${!tests[@]}"; do
-  # remove initial "tests/" from the test name
-  test=${tests[$i]/tests\//}
-  printf "Running test $((i+1))/$test_count: $test\n"
+  test=${tests[$i]}
+  printf "* Running test $((i+1))/$test_count: $test\n"
 
   # execute the test in the background
   # redirect to a log file that buffers test output. since the tests will run in the background,
   # we cannot let them output to std{out,err} because the outputs would be garbled together
-  python3 ${defaults} "$test" 2>&1 > "standalone_test_output-$i.txt" &
+  python ${defaults} "$test" &> "parallel_test_output-$i.txt" &
   test_ids+=($i) # save the test's id in an array with running tests
   pids+=($!) # save the PID in an array with running tests
 
   # if we reached the batch size, wait for all tests to finish
   if (( (($i + 1) % $test_batch_size == 0) || $i == $test_count-1 )); then
-    printf "Waiting for batch to finish: $(IFS=' '; echo "${pids[@]}")\n"
+    printf "-> Waiting for batch to finish: $(IFS=' '; echo "${pids[@]}")\n"
     # wait for running tests
     for j in "${!test_ids[@]}"; do
       i=${test_ids[$j]} # restore the global test's id
       pid=${pids[$j]} # restore the particular PID
       test=${tests[$i]} # restore the test name
-      printf "Waiting for $tests >> standalone_test_output-$i.txt (PID: $pid)\n"
+      printf "? Waiting for $tests >> parallel_test_output-$i.txt (PID: $pid)\n"
       wait -n $pid
       # get the exit status of the test
       test_status=$?
       # add row to the final report
       report+="Ran\t$test\t>> exit:$test_status\n"
       if [[ $test_status != 0 ]]; then
-        # show the output of the failed test
-        cat "standalone_test_output-$i.txt"
+        # add the test to the failed tests array
+        failed_tests+=($i)
         # Process exited with a non-zero exit status
         status=$test_status
       fi
     done
+    printf "Starting over with a new batch...\n"
     test_ids=()  # reset the test's id array
     pids=()  # reset the PID array
   fi
 done
 
-# echo test report
+# print test report with exit code for each test
 printf '=%.s' {1..80}
 printf "\n$report"
 printf '=%.s' {1..80}
 printf '\n'
 
-# exit with the worst test result
+# print failed tests from duped logs
+if [[ ${#failed_tests[@]} -gt 0 ]]; then
+  printf "Failed tests:\n"
+  for i in "${failed_tests[@]}"; do
+    printf '\n%.s' {1..5}
+    printf '=%.s' {1..80}
+    printf "\n${tests[$i]}\n"
+    printf '-%.s' {1..80}
+    printf "\n"
+    # show the output of the failed test
+    cat "parallel_test_output-$i.txt"
+    printf "\n"
+    printf '=%.s' {1..80}
+  done
+else
+  printf "All tests passed!\n"
+fi
+
+# exit with the worse test result
 exit $status
diff --git a/tests/tests_fabric/conftest.py b/tests/tests_fabric/conftest.py
@@ -191,12 +191,16 @@ def caplog(caplog):
 
 @pytest.fixture(autouse=True)
 def leave_no_artifacts_behind():
+    """Checks that no artifacts are left behind after the test."""
     tests_root = Path(__file__).parent.parent
+    # Ignore the __pycache__ directories
     files_before = {p for p in tests_root.rglob("*") if "__pycache__" not in p.parts}
     yield
     files_after = {p for p in tests_root.rglob("*") if "__pycache__" not in p.parts}
     difference = files_after - files_before
     difference = {str(f.relative_to(tests_root)) for f in difference}
+    # ignore the .coverage files
+    difference = {f for f in difference if not f.endswith(".coverage")}
     assert not difference, f"Test left artifacts behind: {difference}"
 
 
diff --git a/tests/tests_pytorch/conftest.py b/tests/tests_pytorch/conftest.py
@@ -312,12 +312,16 @@ def single_process_pg():
 
 @pytest.fixture(autouse=True)
 def leave_no_artifacts_behind():
+    """Checks that no artifacts are left behind after the test."""
     tests_root = Path(__file__).parent.parent
+    # Ignore the __pycache__ directories
     files_before = {p for p in tests_root.rglob("*") if "__pycache__" not in p.parts}
     yield
     files_after = {p for p in tests_root.rglob("*") if "__pycache__" not in p.parts}
     difference = files_after - files_before
     difference = {str(f.relative_to(tests_root)) for f in difference}
+    # ignore the .coverage files
+    difference = {f for f in difference if not f.endswith(".coverage")}
     assert not difference, f"Test left artifacts behind: {difference}"