Skip to content

Add caching to bottlenecks in the message store #5605

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions pylint/lint/pylinter.py
Original file line number Diff line number Diff line change
Expand Up @@ -1389,7 +1389,11 @@ def _get_message_state_scope(
return None

def _is_one_message_enabled(self, msgid: str, line: Optional[int]) -> bool:
"""Checks state of a single message"""
"""Checks state of a single message for the current file

This function can't be cached as it depends on self.file_state which can
change.
"""
if line is None:
return self._msgs_state.get(msgid, True)
try:
Expand Down Expand Up @@ -1426,10 +1430,15 @@ def is_message_enabled(
line: Optional[int] = None,
confidence: Optional[interfaces.Confidence] = None,
) -> bool:
"""return whether the message associated to the given message id is
enabled
"""Return whether this message is enabled for the current file, line and confidence level.

This function can't be cached right now as the line is the line of
the currently analysed file (self.file_state), if it changes, then the
result for the same msg_descr/line might need to change.

msgid may be either a numeric or symbolic message id.
:param msg_descr: Either the msgid or the symbol for a MessageDefinition
:param line: The line of the currently analysed file
:param confidence: The confidence of the message
"""
if self.config.confidence and confidence:
if confidence.name not in self.config.confidence:
Expand Down
9 changes: 8 additions & 1 deletion pylint/message/message_definition_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# For details: https://github.com/PyCQA/pylint/blob/main/LICENSE

import collections
import functools
from typing import TYPE_CHECKING, Dict, List, Tuple, ValuesView

from pylint.exceptions import UnknownMessageError
Expand Down Expand Up @@ -46,8 +47,14 @@ def register_message(self, message: MessageDefinition) -> None:
self._messages_definitions[message.msgid] = message
self._msgs_by_category[message.msgid[0]].append(message.msgid)

@functools.lru_cache()
def get_message_definitions(self, msgid_or_symbol: str) -> List[MessageDefinition]:
"""Returns the Message definition for either a numeric or symbolic id."""
"""Returns the Message definition for either a numeric or symbolic id.

The cache has no limit as its size will likely stay minimal. For each message we store
about 1000 characters, so even if we would have 1000 messages the cache would only
take up ~= 1 Mb.
"""
return [
self._messages_definitions[m]
for m in self.message_id_store.get_active_msgids(msgid_or_symbol)
Expand Down
18 changes: 12 additions & 6 deletions pylint/message/message_id_store.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
# For details: https://github.com/PyCQA/pylint/blob/main/LICENSE
import functools
from typing import Dict, List, NoReturn, Optional, Tuple

from pylint.exceptions import InvalidMessageError, UnknownMessageError
Expand Down Expand Up @@ -101,18 +102,23 @@ def _raise_duplicate_msgid(symbol: str, msgid: str, other_msgid: str) -> NoRetur
)
raise InvalidMessageError(error_message)

@functools.lru_cache()
def get_active_msgids(self, msgid_or_symbol: str) -> List[str]:
"""Return msgids but the input can be a symbol."""
# Only msgid can have a digit as second letter
is_msgid: bool = msgid_or_symbol[1:].isdigit()
msgid = None
if is_msgid:
"""Return msgids but the input can be a symbol.

The cache has no limit as its size will likely stay minimal. For each message we store
about 1000 characters, so even if we would have 1000 messages the cache would only
take up ~= 1 Mb.
"""
msgid: Optional[str]
if msgid_or_symbol[1:].isdigit():
# Only msgid can have a digit as second letter
msgid = msgid_or_symbol.upper()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
msgid = msgid_or_symbol.upper()
msgid: Optional[str] = msgid_or_symbol.upper()

Don't know why I didn't think of this sooner, but this should fix it (or this change but then on L118. We don't need an extra line, we just need to add typing to the call that casts either to str instead of Optional[str].

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well I did that at first but then self.__msgid_to_symbol.get(msgid) expects a string and not a null-able string.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did we remove the msgid = None line?

    def get_active_msgids(self, msgid_or_symbol: str) -> List[str]:
        """Return msgids but the input can be a symbol."""
        # Only msgid can have a digit as second letter
        msgid = None
        if msgid_or_symbol[1:].isdigit():
            msgid = msgid_or_symbol.upper()
            symbol = self.__msgid_to_symbol.get(msgid)
        else:
            msgid = self.__symbol_to_msgid.get(msgid_or_symbol)
            symbol = msgid_or_symbol
        if msgid is None or symbol is None or not msgid or not symbol:
            error_msg = f"No such message id or symbol '{msgid_or_symbol}'."
            raise UnknownMessageError(error_msg)
        return self.__old_names.get(msgid, [msgid])

Gives not errors for me with mypy --strict. mypy infers that msgid is Optional[str] I think.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose the type hint is only for type checker when the assignment is really executed at run time and impact performance.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's true!

symbol = self.__msgid_to_symbol.get(msgid)
else:
msgid = self.__symbol_to_msgid.get(msgid_or_symbol)
symbol = msgid_or_symbol
if msgid is None or symbol is None or not msgid or not symbol:
if not msgid or not symbol:
error_msg = f"No such message id or symbol '{msgid_or_symbol}'."
raise UnknownMessageError(error_msg)
return self.__old_names.get(msgid, [msgid])