Skip to content

bpo-44708: Only re-run test methods that match names of previously failing test methods #27287

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jul 22, 2021
41 changes: 34 additions & 7 deletions Lib/test/libregrtest/cmdline.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,39 @@
# default (see bpo-30822).
RESOURCE_NAMES = ALL_RESOURCES + ('extralargefile', 'tzdata')


class Namespace(argparse.Namespace):
def __init__(self, **kwargs) -> None:
self.testdir = None
self.verbose = 0
self.quiet = False
self.exclude = False
self.single = False
self.randomize = False
self.fromfile = None
self.findleaks = 1
self.fail_env_changed = False
self.use_resources = None
self.trace = False
self.coverdir = 'coverage'
self.runleaks = False
self.huntrleaks = False
self.verbose2 = False
self.verbose3 = False
self.print_slow = False
self.random_seed = None
self.use_mp = None
self.forever = False
self.header = False
self.failfast = False
self.match_tests = None
self.ignore_tests = None
self.pgo = False
self.pgo_extended = False

super().__init__(**kwargs)


class _ArgParser(argparse.ArgumentParser):

def error(self, message):
Expand Down Expand Up @@ -320,13 +353,7 @@ def resources_list(string):

def _parse_args(args, **kwargs):
# Defaults
ns = argparse.Namespace(testdir=None, verbose=0, quiet=False,
exclude=False, single=False, randomize=False, fromfile=None,
findleaks=1, use_resources=None, trace=False, coverdir='coverage',
runleaks=False, huntrleaks=False, verbose2=False, print_slow=False,
random_seed=None, use_mp=None, verbose3=False, forever=False,
header=False, failfast=False, match_tests=None, ignore_tests=None,
pgo=False)
ns = Namespace()
for k, v in kwargs.items():
if not hasattr(ns, k):
raise TypeError('%r is an invalid keyword argument '
Expand Down
76 changes: 45 additions & 31 deletions Lib/test/libregrtest/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@
import unittest
from test.libregrtest.cmdline import _parse_args
from test.libregrtest.runtest import (
findtests, runtest, get_abs_module,
STDTESTS, NOTTESTS, PASSED, FAILED, ENV_CHANGED, SKIPPED, RESOURCE_DENIED,
INTERRUPTED, CHILD_ERROR, TEST_DID_NOT_RUN, TIMEOUT,
PROGRESS_MIN_TIME, format_test_result, is_failed)
findtests, runtest, get_abs_module, is_failed,
STDTESTS, NOTTESTS, PROGRESS_MIN_TIME,
Passed, Failed, EnvChanged, Skipped, ResourceDenied, Interrupted,
ChildError, DidNotRun)
from test.libregrtest.setup import setup_tests
from test.libregrtest.pgo import setup_pgo_tests
from test.libregrtest.utils import removepy, count, format_duration, printlist
Expand Down Expand Up @@ -99,34 +99,32 @@ def get_executed(self):
| set(self.run_no_tests))

def accumulate_result(self, result, rerun=False):
test_name = result.test_name
ok = result.result
test_name = result.name

if ok not in (CHILD_ERROR, INTERRUPTED) and not rerun:
self.test_times.append((result.test_time, test_name))
if not isinstance(result, (ChildError, Interrupted)) and not rerun:
self.test_times.append((result.duration_sec, test_name))

if ok == PASSED:
if isinstance(result, Passed):
self.good.append(test_name)
elif ok in (FAILED, CHILD_ERROR):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The different order here is required due to how inheritance works. isinstance(Failed) would catch all the lower cases.

if not rerun:
self.bad.append(test_name)
elif ok == ENV_CHANGED:
self.environment_changed.append(test_name)
elif ok == SKIPPED:
self.skipped.append(test_name)
elif ok == RESOURCE_DENIED:
elif isinstance(result, ResourceDenied):
self.skipped.append(test_name)
self.resource_denieds.append(test_name)
elif ok == TEST_DID_NOT_RUN:
elif isinstance(result, Skipped):
self.skipped.append(test_name)
elif isinstance(result, EnvChanged):
self.environment_changed.append(test_name)
elif isinstance(result, Failed):
if not rerun:
self.bad.append(test_name)
self.rerun.append(result)
elif isinstance(result, DidNotRun):
self.run_no_tests.append(test_name)
elif ok == INTERRUPTED:
elif isinstance(result, Interrupted):
self.interrupted = True
elif ok == TIMEOUT:
self.bad.append(test_name)
else:
raise ValueError("invalid test result: %r" % ok)
raise ValueError("invalid test result: %r" % result)

if rerun and ok not in {FAILED, CHILD_ERROR, INTERRUPTED}:
if rerun and not isinstance(result, (Failed, Interrupted)):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ChildError is a Failed subclass so we don't have to enumerate it. Additionally, I feel like this list was missing other failures like uncaught exceptions, env changeds, and refleaks. Now it doesn't.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For refleaks/envchanged I don't know if it makes sense to rerun because it those fail, then there is no way that the test suite is going to succeed after that, no? OTOH I don't see how it would hurt (just let's make sure that it works as we expect and it doesn't interfere with the refleak machinery).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is a little cryptic. The rerun boolean here means "we are already re-running" and this if statement figures out whether we should remove the current test that just re-ran from "bad" tests (which were populated on the first run). The bug that was there previously is that a refleak or env change in the second run would count as an "ok" outcome and the test would be removed from "bad", leading to a green CI badge that's wrong.

self.bad.remove(test_name)

xml_data = result.xml_data
Expand Down Expand Up @@ -314,15 +312,31 @@ def rerun_failed_tests(self):

self.log()
self.log("Re-running failed tests in verbose mode")
self.rerun = self.bad[:]
for test_name in self.rerun:
self.log(f"Re-running {test_name} in verbose mode")
rerun_list = self.rerun[:]
self.rerun = []
for result in rerun_list:
test_name = result.name
errors = result.errors or []
failures = result.failures or []
error_names = [test_full_name.split(" ")[0] for (test_full_name, *_) in errors]
failure_names = [test_full_name.split(" ")[0] for (test_full_name, *_) in failures]
self.ns.verbose = True
orig_match_tests = self.ns.match_tests
if errors or failures:
if self.ns.match_tests is None:
self.ns.match_tests = []
self.ns.match_tests.extend(error_names)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question: Do we want to rerun errors? In theory those are things like failures to set up the test and similar. Do you think that this is something we should run again?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only case where we gather errors is with TestFailedWithDetails which means those are unittest failures and/or errors. We've been re-running the entire file in this scenario before. I think we should still as some of those are exceptions raised due to race conditions. We might think of tightening this up in a future update.

self.ns.match_tests.extend(failure_names)
Comment on lines +328 to +329
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the money shot.

matching = "matching: " + ", ".join(self.ns.match_tests)
self.log(f"Re-running {test_name} in verbose mode ({matching})")
else:
self.log(f"Re-running {test_name} in verbose mode")
result = runtest(self.ns, test_name)
self.ns.match_tests = orig_match_tests

self.accumulate_result(result, rerun=True)

if result.result == INTERRUPTED:
if isinstance(result, Interrupted):
break

if self.bad:
Expand Down Expand Up @@ -383,7 +397,7 @@ def display_result(self):
if self.rerun:
print()
print("%s:" % count(len(self.rerun), "re-run test"))
printlist(self.rerun)
printlist(r.name for r in self.rerun)

if self.run_no_tests:
print()
Expand Down Expand Up @@ -423,14 +437,14 @@ def run_tests_sequential(self):
result = runtest(self.ns, test_name)
self.accumulate_result(result)

if result.result == INTERRUPTED:
if isinstance(result, Interrupted):
break

previous_test = format_test_result(result)
previous_test = str(result)
test_time = time.monotonic() - start_time
if test_time >= PROGRESS_MIN_TIME:
previous_test = "%s in %s" % (previous_test, format_duration(test_time))
elif result.result == PASSED:
elif isinstance(result, Passed):
# be quiet: say nothing if the test passed shortly
previous_test = None

Expand Down
Loading