Skip to content

Commit db8b3a4

Browse files
author
Sylvain Thénault
committed
closes #69993: Additional string format checks for logging module
1 parent 88c648a commit db8b3a4

File tree

5 files changed

+183
-108
lines changed

5 files changed

+183
-108
lines changed

ChangeLog

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@ ChangeLog for PyLint
22
====================
33

44
--
5+
6+
* #69993: Additional string format checks for logging module:
7+
check for missing arguments, too many arguments, or invalid string
8+
formats in the logging checker module. Contributed by Daniel Arena
9+
510
* #69220: add column offset to the reports. If you've a custom reporter,
611
this change may break it has now location gain a new item giving the
712
column offset.

checkers/logging.py

Lines changed: 95 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,32 @@
1717
from logilab import astng
1818
from pylint import checkers
1919
from pylint import interfaces
20+
from pylint.checkers import utils
21+
22+
23+
MSGS = {
24+
'W6501': ('Specify string format arguments as logging function parameters',
25+
'Used when a logging statement has a call form of '
26+
'"logging.<logging method>(format_string % (format_args...))". '
27+
'Such calls should leave string interpolation to the logging '
28+
'method itself and be written '
29+
'"logging.<logging method>(format_string, format_args...)" '
30+
'so that the program may avoid incurring the cost of the '
31+
'interpolation in those cases in which no message will be '
32+
'logged. For more, see '
33+
'http://www.python.org/dev/peps/pep-0282/.'),
34+
'E6500': ('Unsupported logging format character %r (%#02x) at index %d',
35+
'Used when an unsupported format character is used in a logging\
36+
statement format string.'),
37+
'E6501': ('Logging format string ends in middle of conversion specifier',
38+
'Used when a logging statement format string terminates before\
39+
the end of a conversion specifier.'),
40+
'E6505': ('Too many arguments for logging format string',
41+
'Used when a logging format string is given too few arguments.'),
42+
'E6506': ('Not enough arguments for logging format string',
43+
'Used when a logging format string is given too many arguments'),
44+
}
2045

21-
EAGER_STRING_INTERPOLATION = 'W6501'
2246

2347
CHECKED_CONVENIENCE_FUNCTIONS = set([
2448
'critical', 'debug', 'error', 'exception', 'fatal', 'info', 'warn',
@@ -29,21 +53,8 @@ class LoggingChecker(checkers.BaseChecker):
2953
"""Checks use of the logging module."""
3054

3155
__implements__ = interfaces.IASTNGChecker
32-
3356
name = 'logging'
34-
35-
msgs = {EAGER_STRING_INTERPOLATION:
36-
('Specify string format arguments as logging function parameters',
37-
'Used when a logging statement has a call form of '
38-
'"logging.<logging method>(format_string % (format_args...))". '
39-
'Such calls should leave string interpolation to the logging '
40-
'method itself and be written '
41-
'"logging.<logging method>(format_string, format_args...)" '
42-
'so that the program may avoid incurring the cost of the '
43-
'interpolation in those cases in which no message will be '
44-
'logged. For more, see '
45-
'http://www.python.org/dev/peps/pep-0282/.')
46-
}
57+
msgs = MSGS
4758

4859
def visit_module(self, unused_node):
4960
"""Clears any state left in this checker from last module checked."""
@@ -67,30 +78,87 @@ def visit_callfunc(self, node):
6778
or not isinstance(node.func.expr, astng.Name)
6879
or node.func.expr.name != self._logging_name):
6980
return
70-
self._CheckConvenienceMethods(node)
71-
self._CheckLogMethod(node)
81+
self._check_convenience_methods(node)
82+
self._check_log_methods(node)
7283

73-
def _CheckConvenienceMethods(self, node):
84+
def _check_convenience_methods(self, node):
7485
"""Checks calls to logging convenience methods (like logging.warn)."""
7586
if node.func.attrname not in CHECKED_CONVENIENCE_FUNCTIONS:
7687
return
77-
if not node.args:
78-
# Either no args, or star args, or double-star args. Beyond the
79-
# scope of this checker in any case.
88+
if node.starargs or node.kwargs or not node.args:
89+
# Either no args, star args, or double-star args. Beyond the
90+
# scope of this checker.
8091
return
8192
if isinstance(node.args[0], astng.BinOp) and node.args[0].op == '%':
82-
self.add_message(EAGER_STRING_INTERPOLATION, node=node)
93+
self.add_message('W6501', node=node)
94+
elif isinstance(node.args[0], astng.Const):
95+
self._check_format_string(node, 0)
8396

84-
def _CheckLogMethod(self, node):
97+
def _check_log_methods(self, node):
8598
"""Checks calls to logging.log(level, format, *format_args)."""
8699
if node.func.attrname != 'log':
87100
return
88-
if len(node.args) < 2:
89-
# Either a malformed call or something with crazy star args or
90-
# double-star args magic. Beyond the scope of this checker.
101+
if node.starargs or node.kwargs or len(node.args) < 2:
102+
# Either a malformed call, star args, or double-star args. Beyond
103+
# the scope of this checker.
91104
return
92105
if isinstance(node.args[1], astng.BinOp) and node.args[1].op == '%':
93-
self.add_message(EAGER_STRING_INTERPOLATION, node=node)
106+
self.add_message('W6501', node=node)
107+
elif isinstance(node.args[1], astng.Const):
108+
self._check_format_string(node, 1)
109+
110+
def _check_format_string(self, node, format_arg):
111+
"""Checks that format string tokens match the supplied arguments.
112+
113+
Args:
114+
node: AST node to be checked.
115+
format_arg: Index of the format string in the node arguments.
116+
"""
117+
num_args = self._count_supplied_tokens(node.args[format_arg + 1:])
118+
if not num_args:
119+
# If no args were supplied, then all format strings are valid -
120+
# don't check any further.
121+
return
122+
format_string = node.args[format_arg].value
123+
if not isinstance(format_string, basestring):
124+
# If the log format is constant non-string (e.g. logging.debug(5)),
125+
# ensure there are no arguments.
126+
required_num_args = 0
127+
else:
128+
try:
129+
keyword_args, required_num_args = \
130+
utils.parse_format_string(format_string)
131+
if keyword_args:
132+
# Keyword checking on logging strings is complicated by
133+
# special keywords - out of scope.
134+
return
135+
except utils.UnsupportedFormatCharacter, e:
136+
c = format_string[e.index]
137+
self.add_message('E6500', node=node, args=(c, ord(c), e.index))
138+
return
139+
except utils.IncompleteFormatString:
140+
self.add_message('E6501', node=node)
141+
return
142+
if num_args > required_num_args:
143+
self.add_message('E6505', node=node)
144+
elif num_args < required_num_args:
145+
self.add_message('E6506', node=node)
146+
147+
def _count_supplied_tokens(self, args):
148+
"""Counts the number of tokens in an args list.
149+
150+
The Python log functions allow for special keyword arguments: func,
151+
exc_info and extra. To handle these cases correctly, we only count
152+
arguments that aren't keywords.
153+
154+
Args:
155+
args: List of AST nodes that are arguments for a log format string.
156+
157+
Returns:
158+
Number of AST nodes that aren't keywords.
159+
"""
160+
return sum(1 for arg in args if not isinstance(arg, astng.Keyword))
161+
94162

95163
def register(linter):
96164
"""Required method to auto-register this checker."""

checkers/string_format.py

Lines changed: 5 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
from logilab import astng
2323
from pylint.interfaces import IASTNGChecker
2424
from pylint.checkers import BaseChecker
25+
from pylint.checkers import utils
26+
2527

2628
MSGS = {
2729
'E9900': ("Unsupported format character %r (%#02x) at index %d",
@@ -57,82 +59,6 @@
5759
specifiers is given too many arguments"),
5860
}
5961

60-
class IncompleteFormatString(Exception):
61-
"""A format string ended in the middle of a format specifier."""
62-
pass
63-
64-
class UnsupportedFormatCharacter(Exception):
65-
"""A format character in a format string is not one of the supported
66-
format characters."""
67-
def __init__(self, index):
68-
Exception.__init__(self, index)
69-
self.index = index
70-
71-
def parse_format_string(format_string):
72-
"""Parses a format string, returning a tuple of (keys, num_args), where keys
73-
is the set of mapping keys in the format string, and num_args is the number
74-
of arguments required by the format string. Raises
75-
IncompleteFormatString or UnsupportedFormatCharacter if a
76-
parse error occurs."""
77-
keys = set()
78-
num_args = 0
79-
def next_char(i):
80-
i += 1
81-
if i == len(format_string):
82-
raise IncompleteFormatString
83-
return (i, format_string[i])
84-
i = 0
85-
while i < len(format_string):
86-
c = format_string[i]
87-
if c == '%':
88-
i, c = next_char(i)
89-
# Parse the mapping key (optional).
90-
key = None
91-
if c == '(':
92-
depth = 1
93-
i, c = next_char(i)
94-
key_start = i
95-
while depth != 0:
96-
if c == '(':
97-
depth += 1
98-
elif c == ')':
99-
depth -= 1
100-
i, c = next_char(i)
101-
key_end = i - 1
102-
key = format_string[key_start:key_end]
103-
104-
# Parse the conversion flags (optional).
105-
while c in '#0- +':
106-
i, c = next_char(i)
107-
# Parse the minimum field width (optional).
108-
if c == '*':
109-
num_args += 1
110-
i, c = next_char(i)
111-
else:
112-
while c in string.digits:
113-
i, c = next_char(i)
114-
# Parse the precision (optional).
115-
if c == '.':
116-
i, c = next_char(i)
117-
if c == '*':
118-
num_args += 1
119-
i, c = next_char(i)
120-
else:
121-
while c in string.digits:
122-
i, c = next_char(i)
123-
# Parse the length modifier (optional).
124-
if c in 'hlL':
125-
i, c = next_char(i)
126-
# Parse the conversion type (mandatory).
127-
if c not in 'diouxXeEfFgGcrs%':
128-
raise UnsupportedFormatCharacter(i)
129-
if key:
130-
keys.add(key)
131-
elif c != '%':
132-
num_args += 1
133-
i += 1
134-
return keys, num_args
135-
13662
OTHER_NODES = (astng.Const, astng.List, astng.Backquote,
13763
astng.Lambda, astng.Function,
13864
astng.ListComp, astng.SetComp, astng.GenExpr)
@@ -158,12 +84,12 @@ def visit_binop(self, node):
15884
format_string = left.value
15985
try:
16086
required_keys, required_num_args = \
161-
parse_format_string(format_string)
162-
except UnsupportedFormatCharacter, e:
87+
utils.parse_format_string(format_string)
88+
except utils.UnsupportedFormatCharacter, e:
16389
c = format_string[e.index]
16490
self.add_message('E9900', node=node, args=(c, ord(c), e.index))
16591
return
166-
except IncompleteFormatString:
92+
except utils.IncompleteFormatString:
16793
self.add_message('E9901', node=node)
16894
return
16995
if required_keys and required_num_args:

checkers/utils.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
"""some functions that may be useful for various checkers
1919
"""
2020

21+
import string
2122
from logilab import astng
2223
from logilab.common.compat import builtins
2324
BUILTINS_NAME = builtins.__name__
@@ -211,3 +212,78 @@ def store_messages(func):
211212
return func
212213
return store_messages
213214

215+
class IncompleteFormatString(Exception):
216+
"""A format string ended in the middle of a format specifier."""
217+
pass
218+
219+
class UnsupportedFormatCharacter(Exception):
220+
"""A format character in a format string is not one of the supported
221+
format characters."""
222+
def __init__(self, index):
223+
Exception.__init__(self, index)
224+
self.index = index
225+
226+
def parse_format_string(format_string):
227+
"""Parses a format string, returning a tuple of (keys, num_args), where keys
228+
is the set of mapping keys in the format string, and num_args is the number
229+
of arguments required by the format string. Raises
230+
IncompleteFormatString or UnsupportedFormatCharacter if a
231+
parse error occurs."""
232+
keys = set()
233+
num_args = 0
234+
def next_char(i):
235+
i += 1
236+
if i == len(format_string):
237+
raise IncompleteFormatString
238+
return (i, format_string[i])
239+
i = 0
240+
while i < len(format_string):
241+
c = format_string[i]
242+
if c == '%':
243+
i, c = next_char(i)
244+
# Parse the mapping key (optional).
245+
key = None
246+
if c == '(':
247+
depth = 1
248+
i, c = next_char(i)
249+
key_start = i
250+
while depth != 0:
251+
if c == '(':
252+
depth += 1
253+
elif c == ')':
254+
depth -= 1
255+
i, c = next_char(i)
256+
key_end = i - 1
257+
key = format_string[key_start:key_end]
258+
259+
# Parse the conversion flags (optional).
260+
while c in '#0- +':
261+
i, c = next_char(i)
262+
# Parse the minimum field width (optional).
263+
if c == '*':
264+
num_args += 1
265+
i, c = next_char(i)
266+
else:
267+
while c in string.digits:
268+
i, c = next_char(i)
269+
# Parse the precision (optional).
270+
if c == '.':
271+
i, c = next_char(i)
272+
if c == '*':
273+
num_args += 1
274+
i, c = next_char(i)
275+
else:
276+
while c in string.digits:
277+
i, c = next_char(i)
278+
# Parse the length modifier (optional).
279+
if c in 'hlL':
280+
i, c = next_char(i)
281+
# Parse the conversion type (mandatory).
282+
if c not in 'diouxXeEfFgGcrs%':
283+
raise UnsupportedFormatCharacter(i)
284+
if key:
285+
keys.add(key)
286+
elif c != '%':
287+
num_args += 1
288+
i += 1
289+
return keys, num_args

test/unittest_lint.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ def test_errors_only(self):
233233
self.linter.error_mode()
234234
checkers = self.linter.prepare_checkers()
235235
checker_names = tuple(c.name for c in checkers)
236-
should_not = ('design', 'format', 'imports', 'logging', 'metrics',
236+
should_not = ('design', 'format', 'imports', 'metrics',
237237
'miscellaneous', 'similarities')
238238
self.failIf(any(name in checker_names for name in should_not))
239239

@@ -249,7 +249,7 @@ def test_disable_alot(self):
249249
# FIXME should it be necessary to explicitly desactivate failures ?
250250
self.linter.set_option('disable', 'R,C,W')
251251
checker_names = [c.name for c in self.linter.prepare_checkers()]
252-
should_not = ('design', 'logging', 'metrics', 'similarities')
252+
should_not = ('design', 'metrics', 'similarities')
253253
rest = [name for name in checker_names if name in should_not]
254254
self.assertListEqual(rest, [])
255255
self.linter.set_option('disable', 'R,C,W,F')

0 commit comments

Comments
 (0)