Skip to content

errors: speedup for large error counts #12631

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 29, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions mypy/checkexpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
)
from typing_extensions import ClassVar, Final, overload, TypeAlias as _TypeAlias

from mypy.errors import report_internal_error
from mypy.errors import ErrorWatcher, report_internal_error
from mypy.typeanal import (
has_any_from_unimported_type, check_for_explicit_any, set_any_tvars, expand_type_alias,
make_optional_type,
Expand Down Expand Up @@ -2293,12 +2293,15 @@ def visit_comparison_expr(self, e: ComparisonExpr) -> Type:
self.msg.add_errors(local_errors)
elif operator in operators.op_methods:
method = self.get_operator_method(operator)
err_count = self.msg.errors.total_errors()
sub_result, method_type = self.check_op(method, left_type, right, e,
allow_reverse=True)

with ErrorWatcher(self.msg.errors) as w:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a similar mechanism being used in the infer_overload_return_type method in this file (grep for .clean_copy). Can we use that here too?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm. It seems like infer_overload_return_type creates a new empty MessageBuilder / Errors and uses that to check if any new errors where triggered, which could be achieved by using the new ErrorWatcher. However it also looks like any such errors would then be ignored (not actually make it into the original Errors), which the ErrorWatcher approach alone cannot do, and if we're creating a new empty Errors then there's not much benefit to the ErrorWatcher approach. Am I missing something?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we use the infer_overload_return_type approach here, so we don't have to introduce the new ErrorWatcher concept? Seems like we could do that here by getting a clean copy, then unconditionally doing self.msg.add_errors like on line 2293 in visit_comparison_expr.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, I think the current approach there needs to be rethought.

Consider on line 1757:

            self.msg = overload_messages
            self.chk.msg = overload_messages
            try:
                # Passing `overload_messages` as the `arg_messages` parameter doesn't
                # seem to reliably catch all possible errors.
                # TODO: Figure out why

which seems obviously pretty related to the way the MessageBuilder in TypeChecker is created:

        self.msg = MessageBuilder(errors, modules)
        self.plugin = plugin
        self.expr_checker = mypy.checkexpr.ExpressionChecker(self, self.msg, self.plugin)
        self.pattern_checker = PatternChecker(self, self.msg, self.plugin)

The explicit patching of the MessageBuilder instance is incomplete (expr_checker and pattern_checker are not patched), but more importantly it is very fragile (there might be other places that need patching, the places that need patching are likely to change over time in ways that hard hard to keep track off), and rather ugly to boot. I think instead of using this pattern in more places we should replace it with some variant of ErrorWatcher that can optionally intercept/discard errors in the original MessageBuilder

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good point. I suspect bug #12665, which we found yesterday, is related to the problem you identify.

sub_result, method_type = self.check_op(method, left_type, right, e,
allow_reverse=True)
has_new_errors = w.has_new_errors()

# Only show dangerous overlap if there are no other errors. See
# testCustomEqCheckStrictEquality for an example.
if self.msg.errors.total_errors() == err_count and operator in ('==', '!='):
if not has_new_errors and operator in ('==', '!='):
right_type = self.accept(right)
# We suppress the error if there is a custom __eq__() method on either
# side. User defined (or even standard library) classes can define this
Expand Down
58 changes: 45 additions & 13 deletions mypy/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
from mypy.backports import OrderedDict
from collections import defaultdict

from typing import Tuple, List, TypeVar, Set, Dict, Optional, TextIO, Callable
from typing_extensions import Final
from typing import Any, Tuple, List, TypeVar, Set, Dict, Optional, TextIO, Callable
from typing_extensions import Final, Literal

from mypy.scope import Scope
from mypy.options import Options
Expand Down Expand Up @@ -122,6 +122,32 @@ def __init__(self,
Optional[ErrorCode]]


class ErrorWatcher:
"""Context manager that can be used to keep track of new errors recorded
around a given operation.
"""
def __init__(self, errors: 'Errors'):
self.errors = errors
self._has_new_errors = False

def __enter__(self) -> 'ErrorWatcher':
self.errors._watchers.add(self)
return self

def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> Literal[False]:
self.errors._watchers.remove(self)
return False

def on_error(self, file: str, info: ErrorInfo) -> None:
"""Handler called when a new error is recorded.

The default implementation just sets the has_new_errors flag"""
self._has_new_errors = True

def has_new_errors(self) -> bool:
return self._has_new_errors


class Errors:
"""Container for compile errors.

Expand All @@ -134,6 +160,9 @@ class Errors:
# files were processed.
error_info_map: Dict[str, List[ErrorInfo]]

# optimization for legacy codebases with many files with errors
has_blockers: Set[str]

# Files that we have reported the errors for
flushed_files: Set[str]

Expand Down Expand Up @@ -178,6 +207,8 @@ class Errors:
# in some cases to avoid reporting huge numbers of errors.
seen_import_error = False

_watchers: Set[ErrorWatcher] = set()

def __init__(self,
show_error_context: bool = False,
show_column_numbers: bool = False,
Expand Down Expand Up @@ -209,6 +240,7 @@ def initialize(self) -> None:
self.used_ignored_lines = defaultdict(lambda: defaultdict(list))
self.ignored_files = set()
self.only_once_messages = set()
self.has_blockers = set()
self.scope = None
self.target_module = None
self.seen_import_error = False
Expand All @@ -234,9 +266,6 @@ def copy(self) -> 'Errors':
new.seen_import_error = self.seen_import_error
return new

def total_errors(self) -> int:
return sum(len(errs) for errs in self.error_info_map.values())

def set_ignore_prefix(self, prefix: str) -> None:
"""Set path prefix that will be removed from all paths."""
prefix = os.path.normpath(prefix)
Expand Down Expand Up @@ -357,6 +386,10 @@ def _add_error_info(self, file: str, info: ErrorInfo) -> None:
if file not in self.error_info_map:
self.error_info_map[file] = []
self.error_info_map[file].append(info)
for w in self._watchers:
w.on_error(file, info)
if info.blocker:
self.has_blockers.add(file)
if info.code is IMPORT:
self.seen_import_error = True

Expand Down Expand Up @@ -476,12 +509,16 @@ def clear_errors_in_targets(self, path: str, targets: Set[str]) -> None:
"""Remove errors in specific fine-grained targets within a file."""
if path in self.error_info_map:
new_errors = []
has_blocker = False
for info in self.error_info_map[path]:
if info.target not in targets:
new_errors.append(info)
has_blocker |= info.blocker
elif info.only_once:
self.only_once_messages.remove(info.message)
self.error_info_map[path] = new_errors
if not has_blocker and path in self.has_blockers:
self.has_blockers.remove(path)

def generate_unused_ignore_errors(self, file: str) -> None:
ignored_lines = self.ignored_lines[file]
Expand Down Expand Up @@ -551,19 +588,14 @@ def is_errors(self) -> bool:
"""Are there any generated messages?"""
return bool(self.error_info_map)

def is_real_errors(self) -> bool:
"""Are there any generated errors (not just notes, for example)?"""
return any(info.severity == 'error'
for infos in self.error_info_map.values() for info in infos)

def is_blockers(self) -> bool:
"""Are the any errors that are blockers?"""
return any(err for errs in self.error_info_map.values() for err in errs if err.blocker)
return bool(self.has_blockers)

def blocker_module(self) -> Optional[str]:
"""Return the module with a blocking error, or None if not possible."""
for errs in self.error_info_map.values():
for err in errs:
for path in self.has_blockers:
for err in self.error_info_map[path]:
if err.blocker:
return err.module
return None
Expand Down