Skip to content

Commit d66efa8

Browse files
Andreas Freimuthbrycepg
Andreas Freimuth
authored andcommitted
Fix indentation handling with tabs (Issue: #1148)
A Tab is not equal to 8 spaces. So just counting tabs as 8 spaces is wrong. Use the whole 'tabs' and/or 'spaces' string for indentation checks instead of some imaginary number of whitespaces.
1 parent 01ff866 commit d66efa8

File tree

1 file changed

+77
-50
lines changed

1 file changed

+77
-50
lines changed

pylint/checkers/format.py

+77-50
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,17 @@ def _token_followed_by_eol(tokens, position):
168168
tokens.type(position+2) == tokenize.NL)
169169

170170

171+
def _get_indent_string(line):
172+
"""Return the indention string of the given line."""
173+
result = ''
174+
for char in line:
175+
if char == ' ' or char == '\t':
176+
result += char
177+
else:
178+
break
179+
return result
180+
181+
171182
def _get_indent_length(line):
172183
"""Return the length of the indentation on the given token's line."""
173184
result = 0
@@ -185,6 +196,9 @@ def _get_indent_hint_line(bar_positions, bad_position):
185196
"""Return a line with |s for each of the positions in the given lists."""
186197
if not bar_positions:
187198
return ('', '')
199+
# TODO tabs should not be replaced by some random (8) numebr of spaces
200+
bar_positions = [_get_indent_length(indent) for indent in bar_positions]
201+
bad_position = _get_indent_length(bad_position)
188202
delta_message = ''
189203
markers = [(pos, '|') for pos in bar_positions]
190204
if len(markers) == 1:
@@ -203,8 +217,8 @@ def _get_indent_hint_line(bar_positions, bad_position):
203217

204218

205219
class _ContinuedIndent(object):
206-
__slots__ = ('valid_outdent_offsets',
207-
'valid_continuation_offsets',
220+
__slots__ = ('valid_outdent_strings',
221+
'valid_continuation_strings',
208222
'context_type',
209223
'token',
210224
'position')
@@ -213,10 +227,10 @@ def __init__(self,
213227
context_type,
214228
token,
215229
position,
216-
valid_outdent_offsets,
217-
valid_continuation_offsets):
218-
self.valid_outdent_offsets = valid_outdent_offsets
219-
self.valid_continuation_offsets = valid_continuation_offsets
230+
valid_outdent_strings,
231+
valid_continuation_strings):
232+
self.valid_outdent_strings = valid_outdent_strings
233+
self.valid_continuation_strings = valid_continuation_strings
220234
self.context_type = context_type
221235
self.position = position
222236
self.token = token
@@ -247,16 +261,16 @@ def __init__(self,
247261

248262
_CONTINUATION_HINT_MESSAGE = ' (%s %d space%s)' # Ex: (remove 2 spaces)
249263

250-
def _Offsets(*args):
251-
"""Valid indentation offsets for a continued line."""
264+
def _Indentations(*args):
265+
"""Valid indentation strings for a continued line."""
252266
return dict((a, None) for a in args)
253267

254268

255-
def _BeforeBlockOffsets(single, with_body):
256-
"""Valid alternative indent offsets for continued lines before blocks.
269+
def _BeforeBlockIndentations(single, with_body):
270+
"""Valid alternative indentation strings for continued lines before blocks.
257271
258-
:param int single: Valid offset for statements on a single logical line.
259-
:param int with_body: Valid offset for statements on several lines.
272+
:param int single: Valid indentation string for statements on a single logical line.
273+
:param int with_body: Valid indentation string for statements on several lines.
260274
261275
:returns: A dictionary mapping indent offsets to a string representing
262276
whether the indent if for a line or block.
@@ -286,6 +300,13 @@ def start_col(self, idx):
286300
def line(self, idx):
287301
return self._tokens[idx][4]
288302

303+
def line_indent(self, idx):
304+
return _get_indent_string(self.line(idx))
305+
306+
def token_indent(self, idx):
307+
line_indent = self.line_indent(idx)
308+
return line_indent + ' ' * (self.start_col(idx) - len(line_indent))
309+
289310

290311
class ContinuedLineState(object):
291312
"""Tracker for continued indentation inside a logical line."""
@@ -303,8 +324,12 @@ def has_content(self):
303324
return bool(self._cont_stack)
304325

305326
@property
306-
def _block_indent_size(self):
307-
return len(self._config.indent_string.replace('\t', ' ' * _TAB_LENGTH))
327+
def _block_indent_string(self):
328+
return self._config.indent_string.replace('\\t', '\t')
329+
330+
@property
331+
def _continuation_string(self):
332+
return self._block_indent_string[0] * self._config.indent_after_paren
308333

309334
@property
310335
def _continuation_size(self):
@@ -337,10 +362,10 @@ def next_logical_line(self):
337362
self.retained_warnings = []
338363
self._cont_stack = []
339364

340-
def add_block_warning(self, token_position, state, valid_offsets):
341-
self.retained_warnings.append((token_position, state, valid_offsets))
365+
def add_block_warning(self, token_position, state, valid_indentations):
366+
self.retained_warnings.append((token_position, state, valid_indentations))
342367

343-
def get_valid_offsets(self, idx):
368+
def get_valid_indentations(self, idx):
344369
"""Returns the valid offsets for the token at the given position."""
345370
# The closing brace on a dict or the 'for' in a dict comprehension may
346371
# reset two indent levels because the dict value is ended implicitly
@@ -349,30 +374,30 @@ def get_valid_offsets(self, idx):
349374
stack_top = -2
350375
indent = self._cont_stack[stack_top]
351376
if self._tokens.token(idx) in _CLOSING_BRACKETS:
352-
valid_offsets = indent.valid_outdent_offsets
377+
valid_indentations = indent.valid_outdent_strings
353378
else:
354-
valid_offsets = indent.valid_continuation_offsets
355-
return indent, valid_offsets.copy()
379+
valid_indentations = indent.valid_continuation_strings
380+
return indent, valid_indentations.copy()
356381

357382
def _hanging_indent_after_bracket(self, bracket, position):
358383
"""Extracts indentation information for a hanging indent."""
359-
indentation = _get_indent_length(self._tokens.line(position))
360-
if self._is_block_opener and self._continuation_size == self._block_indent_size:
384+
indentation = self._tokens.line_indent(position)
385+
if self._is_block_opener and self._continuation_string == self._block_indent_string:
361386
return _ContinuedIndent(
362387
HANGING_BLOCK,
363388
bracket,
364389
position,
365-
_Offsets(indentation + self._continuation_size, indentation),
366-
_BeforeBlockOffsets(indentation + self._continuation_size,
367-
indentation + self._continuation_size * 2))
390+
_Indentations(indentation + self._continuation_string, indentation),
391+
_BeforeBlockIndentations(indentation + self._continuation_string,
392+
indentation + self._continuation_string * 2))
368393
if bracket == ':':
369394
# If the dict key was on the same line as the open brace, the new
370395
# correct indent should be relative to the key instead of the
371396
# current indent level
372-
paren_align = self._cont_stack[-1].valid_outdent_offsets
373-
next_align = self._cont_stack[-1].valid_continuation_offsets.copy()
397+
paren_align = self._cont_stack[-1].valid_outdent_strings
398+
next_align = self._cont_stack[-1].valid_continuation_strings.copy()
374399
next_align_keys = list(next_align.keys())
375-
next_align[next_align_keys[0] + self._continuation_size] = True
400+
next_align[next_align_keys[0] + self._continuation_string] = True
376401
# Note that the continuation of
377402
# d = {
378403
# 'a': 'b'
@@ -384,27 +409,29 @@ def _hanging_indent_after_bracket(self, bracket, position):
384409
HANGING,
385410
bracket,
386411
position,
387-
_Offsets(indentation, indentation + self._continuation_size),
388-
_Offsets(indentation + self._continuation_size))
412+
_Indentations(indentation, indentation + self._continuation_string),
413+
_Indentations(indentation + self._continuation_string))
389414

390415
def _continuation_inside_bracket(self, bracket, pos):
391416
"""Extracts indentation information for a continued indent."""
392-
indentation = _get_indent_length(self._tokens.line(pos))
393-
token_start = self._tokens.start_col(pos)
394-
next_token_start = self._tokens.start_col(pos + 1)
395-
if self._is_block_opener and next_token_start - indentation == self._block_indent_size:
417+
indentation = self._tokens.line_indent(pos)
418+
token_indent = self._tokens.token_indent(pos)
419+
next_token_indent = self._tokens.token_indent(pos + 1)
420+
if self._is_block_opener and next_token_indent == indentation + self._block_indent_string:
396421
return _ContinuedIndent(
397422
CONTINUED_BLOCK,
398423
bracket,
399424
pos,
400-
_Offsets(token_start),
401-
_BeforeBlockOffsets(next_token_start, next_token_start + self._continuation_size))
425+
_Indentations(token_indent),
426+
_BeforeBlockIndentations(
427+
next_token_indent,
428+
next_token_indent + self._continuation_string))
402429
return _ContinuedIndent(
403430
CONTINUED,
404431
bracket,
405432
pos,
406-
_Offsets(token_start),
407-
_Offsets(next_token_start))
433+
_Indentations(token_indent),
434+
_Indentations(next_token_indent))
408435

409436
def pop_token(self):
410437
self._cont_stack.pop()
@@ -889,9 +916,9 @@ def _check_line_ending(self, line_ending, line_num):
889916
def _process_retained_warnings(self, tokens, current_pos):
890917
single_line_block_stmt = not _last_token_on_line_is(tokens, current_pos, ':')
891918

892-
for indent_pos, state, offsets in self._current_line.retained_warnings:
893-
block_type = offsets[tokens.start_col(indent_pos)]
894-
hints = {k: v for k, v in offsets.items() if v != block_type}
919+
for indent_pos, state, indentations in self._current_line.retained_warnings:
920+
block_type = indentations[tokens.token_indent(indent_pos)]
921+
hints = {k: v for k, v in indentations.items() if v != block_type}
895922
if single_line_block_stmt and block_type == WITH_BODY:
896923
self._add_continuation_message(state, hints, tokens, indent_pos)
897924
elif not single_line_block_stmt and block_type == SINGLE_LINE:
@@ -906,27 +933,27 @@ def same_token_around_nl(token_type):
906933
if not self._current_line.has_content or tokens.type(next_idx) == tokenize.NL:
907934
return
908935

909-
state, valid_offsets = self._current_line.get_valid_offsets(next_idx)
936+
state, valid_indentations = self._current_line.get_valid_indentations(next_idx)
910937
# Special handling for hanging comments and strings. If the last line ended
911938
# with a comment (string) and the new line contains only a comment, the line
912939
# may also be indented to the start of the previous token.
913940
if same_token_around_nl(tokenize.COMMENT) or same_token_around_nl(tokenize.STRING):
914-
valid_offsets[tokens.start_col(next_idx-2)] = True
941+
valid_indentations[tokens.token_indent(next_idx-2)] = True
915942

916943
# We can only decide if the indentation of a continued line before opening
917944
# a new block is valid once we know of the body of the block is on the
918945
# same line as the block opener. Since the token processing is single-pass,
919946
# emitting those warnings is delayed until the block opener is processed.
920947
if (state.context_type in (HANGING_BLOCK, CONTINUED_BLOCK)
921-
and tokens.start_col(next_idx) in valid_offsets):
922-
self._current_line.add_block_warning(next_idx, state, valid_offsets)
923-
elif tokens.start_col(next_idx) not in valid_offsets:
924-
925-
self._add_continuation_message(state, valid_offsets, tokens, next_idx)
948+
and tokens.token_indent(next_idx) in valid_indentations):
949+
self._current_line.add_block_warning(next_idx, state, valid_indentations)
950+
elif tokens.token_indent(next_idx) not in valid_indentations:
951+
self._add_continuation_message(state, valid_indentations, tokens, next_idx)
926952

927-
def _add_continuation_message(self, state, offsets, tokens, position):
953+
def _add_continuation_message(self, state, indentations, tokens, position):
928954
readable_type, readable_position = _CONTINUATION_MSG_PARTS[state.context_type]
929-
hint_line, delta_message = _get_indent_hint_line(offsets, tokens.start_col(position))
955+
hint_line, delta_message = _get_indent_hint_line(indentations,
956+
tokens.token_indent(position))
930957
self.add_message(
931958
'bad-continuation',
932959
line=tokens.start_line(position),

0 commit comments

Comments
 (0)