From 5b0914e782c273cfe424c0228bb36d7535884f4d Mon Sep 17 00:00:00 2001 From: Pierre Sassoulas Date: Tue, 28 Dec 2021 15:14:42 +0100 Subject: [PATCH 1/5] Add caching to critical bottlenecks in the MessageStore Some functions can't be cached without impacting the corectness with the current design. --- pylint/lint/pylinter.py | 16 ++++++++++++---- pylint/message/message_definition_store.py | 2 ++ pylint/message/message_id_store.py | 2 ++ 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/pylint/lint/pylinter.py b/pylint/lint/pylinter.py index e46577615a..69707343d4 100644 --- a/pylint/lint/pylinter.py +++ b/pylint/lint/pylinter.py @@ -1389,7 +1389,10 @@ def _get_message_state_scope( return None def _is_one_message_enabled(self, msgid: str, line: Optional[int]) -> bool: - """Checks state of a single message""" + """Checks state of a single message for the current file + + This function can't be cached it depends on self.file_state that can + changes.""" if line is None: return self._msgs_state.get(msgid, True) try: @@ -1426,10 +1429,15 @@ def is_message_enabled( line: Optional[int] = None, confidence: Optional[interfaces.Confidence] = None, ) -> bool: - """return whether the message associated to the given message id is - enabled + """Is this message enabled for the current file / line / confidence ? + + This function can't be cached right now as the line is the line of + the current analysed file (self.file_state), if it changes, then the + result for the same msg_descr/line need to change. - msgid may be either a numeric or symbolic message id. + :param msg_descr: may be either the msgid or the symbol for a MessageDefinition. + :param line: the line is the line of the currently analysed file + :param confidence: The confidence of the message """ if self.config.confidence and confidence: if confidence.name not in self.config.confidence: diff --git a/pylint/message/message_definition_store.py b/pylint/message/message_definition_store.py index c160a85bab..b62dc93c7a 100644 --- a/pylint/message/message_definition_store.py +++ b/pylint/message/message_definition_store.py @@ -2,6 +2,7 @@ # For details: https://github.com/PyCQA/pylint/blob/main/LICENSE import collections +import functools from typing import TYPE_CHECKING, Dict, List, Tuple, ValuesView from pylint.exceptions import UnknownMessageError @@ -46,6 +47,7 @@ def register_message(self, message: MessageDefinition) -> None: self._messages_definitions[message.msgid] = message self._msgs_by_category[message.msgid[0]].append(message.msgid) + @functools.lru_cache() def get_message_definitions(self, msgid_or_symbol: str) -> List[MessageDefinition]: """Returns the Message definition for either a numeric or symbolic id.""" return [ diff --git a/pylint/message/message_id_store.py b/pylint/message/message_id_store.py index 1fbe684712..0b462d2115 100644 --- a/pylint/message/message_id_store.py +++ b/pylint/message/message_id_store.py @@ -1,5 +1,6 @@ # Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html # For details: https://github.com/PyCQA/pylint/blob/main/LICENSE +import functools from typing import Dict, List, NoReturn, Optional, Tuple from pylint.exceptions import InvalidMessageError, UnknownMessageError @@ -101,6 +102,7 @@ def _raise_duplicate_msgid(symbol: str, msgid: str, other_msgid: str) -> NoRetur ) raise InvalidMessageError(error_message) + @functools.lru_cache() def get_active_msgids(self, msgid_or_symbol: str) -> List[str]: """Return msgids but the input can be a symbol.""" # Only msgid can have a digit as second letter From b30653062ce73a6a5ad45f9af2d7daf608ee251d Mon Sep 17 00:00:00 2001 From: Pierre Sassoulas Date: Tue, 28 Dec 2021 19:00:48 +0100 Subject: [PATCH 2/5] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Daniël van Noord <13665637+DanielNoord@users.noreply.github.com> --- pylint/lint/pylinter.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pylint/lint/pylinter.py b/pylint/lint/pylinter.py index 69707343d4..ad110ce068 100644 --- a/pylint/lint/pylinter.py +++ b/pylint/lint/pylinter.py @@ -1391,8 +1391,9 @@ def _get_message_state_scope( def _is_one_message_enabled(self, msgid: str, line: Optional[int]) -> bool: """Checks state of a single message for the current file - This function can't be cached it depends on self.file_state that can - changes.""" + This function can't be cached as it depends on self.file_state which can + change. + """ if line is None: return self._msgs_state.get(msgid, True) try: @@ -1429,14 +1430,14 @@ def is_message_enabled( line: Optional[int] = None, confidence: Optional[interfaces.Confidence] = None, ) -> bool: - """Is this message enabled for the current file / line / confidence ? + """Return whether this message is enabled for the current file, line and confidence level. This function can't be cached right now as the line is the line of - the current analysed file (self.file_state), if it changes, then the - result for the same msg_descr/line need to change. + the currently analysed file (self.file_state), if it changes, then the + result for the same msg_descr/line might need to change. - :param msg_descr: may be either the msgid or the symbol for a MessageDefinition. - :param line: the line is the line of the currently analysed file + :param msg_descr: Either the msgid or the symbol for a MessageDefinition + :param line: The line of the currently analysed file :param confidence: The confidence of the message """ if self.config.confidence and confidence: From 1cf2cc81c1d52e43eb709042f834cbebb64bd144 Mon Sep 17 00:00:00 2001 From: Pierre Sassoulas Date: Wed, 29 Dec 2021 09:53:04 +0100 Subject: [PATCH 3/5] Add a docstring explaining why the cache is unbounded --- pylint/message/message_definition_store.py | 8 +++++++- pylint/message/message_id_store.py | 18 ++++++++++++------ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/pylint/message/message_definition_store.py b/pylint/message/message_definition_store.py index b62dc93c7a..33058c41d5 100644 --- a/pylint/message/message_definition_store.py +++ b/pylint/message/message_definition_store.py @@ -49,7 +49,13 @@ def register_message(self, message: MessageDefinition) -> None: @functools.lru_cache() def get_message_definitions(self, msgid_or_symbol: str) -> List[MessageDefinition]: - """Returns the Message definition for either a numeric or symbolic id.""" + """Returns the Message definition for either a numeric or symbolic id. + + The cache has no limit as it will never go very high. It's a set number of + message from our own code and from user's checkers. How many message can + the user generate ? I guess it will never be more than 1000, and if each + messages has around 1000 characters, it's still ~= 1 Mb to cache. + """ return [ self._messages_definitions[m] for m in self.message_id_store.get_active_msgids(msgid_or_symbol) diff --git a/pylint/message/message_id_store.py b/pylint/message/message_id_store.py index 0b462d2115..da4217be89 100644 --- a/pylint/message/message_id_store.py +++ b/pylint/message/message_id_store.py @@ -104,17 +104,23 @@ def _raise_duplicate_msgid(symbol: str, msgid: str, other_msgid: str) -> NoRetur @functools.lru_cache() def get_active_msgids(self, msgid_or_symbol: str) -> List[str]: - """Return msgids but the input can be a symbol.""" - # Only msgid can have a digit as second letter - is_msgid: bool = msgid_or_symbol[1:].isdigit() - msgid = None - if is_msgid: + """Return msgids but the input can be a symbol. + + The cache has no limit as it will never go very high. It's a set number of + msgid/symbol pair from our own code and from user's checkers. How many msgid can + the user generate ? I guess it will never be more than 1000, and if each + msgid/symbol pair has around 50 characters, it's still ~= 5 kb to cache. + """ + msgid: Optional[str] + symbol: Optional[str] + if msgid_or_symbol[1:].isdigit(): + # Only msgid can have a digit as second letter msgid = msgid_or_symbol.upper() symbol = self.__msgid_to_symbol.get(msgid) else: msgid = self.__symbol_to_msgid.get(msgid_or_symbol) symbol = msgid_or_symbol - if msgid is None or symbol is None or not msgid or not symbol: + if not msgid or not symbol: error_msg = f"No such message id or symbol '{msgid_or_symbol}'." raise UnknownMessageError(error_msg) return self.__old_names.get(msgid, [msgid]) From e42fad02bd79b0059bb7c425b80f7f864f82e876 Mon Sep 17 00:00:00 2001 From: Pierre Sassoulas Date: Wed, 29 Dec 2021 10:25:28 +0100 Subject: [PATCH 4/5] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Daniël van Noord <13665637+DanielNoord@users.noreply.github.com> --- pylint/message/message_definition_store.py | 7 +++---- pylint/message/message_id_store.py | 7 +++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/pylint/message/message_definition_store.py b/pylint/message/message_definition_store.py index 33058c41d5..766cdd4465 100644 --- a/pylint/message/message_definition_store.py +++ b/pylint/message/message_definition_store.py @@ -51,10 +51,9 @@ def register_message(self, message: MessageDefinition) -> None: def get_message_definitions(self, msgid_or_symbol: str) -> List[MessageDefinition]: """Returns the Message definition for either a numeric or symbolic id. - The cache has no limit as it will never go very high. It's a set number of - message from our own code and from user's checkers. How many message can - the user generate ? I guess it will never be more than 1000, and if each - messages has around 1000 characters, it's still ~= 1 Mb to cache. + The cache has no limit as its size will likely stay minimal. For each message we store + about 1000 characters, so even if we would have 1000 messages the cache would only + take up ~= 1 Mb. """ return [ self._messages_definitions[m] diff --git a/pylint/message/message_id_store.py b/pylint/message/message_id_store.py index da4217be89..bde2722773 100644 --- a/pylint/message/message_id_store.py +++ b/pylint/message/message_id_store.py @@ -106,10 +106,9 @@ def _raise_duplicate_msgid(symbol: str, msgid: str, other_msgid: str) -> NoRetur def get_active_msgids(self, msgid_or_symbol: str) -> List[str]: """Return msgids but the input can be a symbol. - The cache has no limit as it will never go very high. It's a set number of - msgid/symbol pair from our own code and from user's checkers. How many msgid can - the user generate ? I guess it will never be more than 1000, and if each - msgid/symbol pair has around 50 characters, it's still ~= 5 kb to cache. + The cache has no limit as its size will likely stay minimal. For each message we store + about 1000 characters, so even if we would have 1000 messages the cache would only + take up ~= 1 Mb. """ msgid: Optional[str] symbol: Optional[str] From d442f4bdfd07f5cfbac05c93ba33cdac391d5792 Mon Sep 17 00:00:00 2001 From: Pierre Sassoulas Date: Wed, 29 Dec 2021 10:43:40 +0100 Subject: [PATCH 5/5] Update pylint/message/message_id_store.py --- pylint/message/message_id_store.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pylint/message/message_id_store.py b/pylint/message/message_id_store.py index bde2722773..a16d12bfed 100644 --- a/pylint/message/message_id_store.py +++ b/pylint/message/message_id_store.py @@ -111,7 +111,6 @@ def get_active_msgids(self, msgid_or_symbol: str) -> List[str]: take up ~= 1 Mb. """ msgid: Optional[str] - symbol: Optional[str] if msgid_or_symbol[1:].isdigit(): # Only msgid can have a digit as second letter msgid = msgid_or_symbol.upper()