Skip to content

gh-130167: Optimise textwrap.dedent() #131919

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Mar 31, 2025
50 changes: 50 additions & 0 deletions Lib/test/test_textwrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -769,6 +769,56 @@ def assertUnchanged(self, text):
"""assert that dedent() has no effect on 'text'"""
self.assertEqual(text, dedent(text))

def test_dedent_whitespace(self):
# The empty string.
text = ""
self.assertUnchanged(text)

# Only spaces.
text = " "
expect = ""
self.assertEqual(expect, dedent(text))

# Only tabs.
text = "\t\t\t\t"
expect = ""
self.assertEqual(expect, dedent(text))

# A mixture.
text = " \t \t\t \t "
expect = ""
self.assertEqual(expect, dedent(text))

# ASCII whitespace.
text = "\f\n\r\t\v "
expect = "\n"
self.assertEqual(expect, dedent(text))

# One newline.
text = "\n"
expect = "\n"
self.assertEqual(expect, dedent(text))

# Windows-style newlines.
text = "\r\n" * 5
expect = "\n" * 5
self.assertEqual(expect, dedent(text))

# Whitespace mixture.
text = " \n\t\n \n\t\t\n\n\n "
expect = "\n\n\n\n\n\n"
self.assertEqual(expect, dedent(text))

# Lines consisting only of whitespace are always normalised
text = "a\n \n\t\n"
expect = "a\n\n\n"
self.assertEqual(expect, dedent(text))

# Whitespace characters on non-empty lines are retained
text = "a\r\n\r\n\r\n"
expect = "a\r\n\n\n"
self.assertEqual(expect, dedent(text))

def test_dedent_nomargin(self):
# No lines indented.
text = "Hello there.\nHow are you?\nOh good, I'm glad."
Expand Down
50 changes: 13 additions & 37 deletions Lib/textwrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,9 +413,6 @@ def shorten(text, width, **kwargs):

# -- Loosely related functionality -------------------------------------

_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)

def dedent(text):
"""Remove any common leading whitespace from every line in `text`.

Expand All @@ -429,42 +426,21 @@ def dedent(text):

Entirely blank lines are normalized to a newline character.
"""
# Look for the longest leading string of spaces and tabs common to
# all lines.
margin = None
text = _whitespace_only_re.sub('', text)
indents = _leading_whitespace_re.findall(text)
for indent in indents:
if margin is None:
margin = indent

# Current line more deeply indented than previous winner:
# no change (previous winner is still on top).
elif indent.startswith(margin):
pass

# Current line consistent with and no deeper than previous winner:
# it's the new winner.
elif margin.startswith(indent):
margin = indent

# Find the largest common whitespace between current line and previous
# winner.
else:
for i, (x, y) in enumerate(zip(margin, indent)):
if x != y:
margin = margin[:i]
break
if not text:
return text

lines = text.split('\n')

# sanity check (testing/debugging only)
if 0 and margin:
for line in text.split("\n"):
assert not line or line.startswith(margin), \
"line = %r, margin = %r" % (line, margin)
# Get length of leading whitespace, inspired by ``os.path.commonprefix()``.
non_blank_lines = [l for l in lines if l and not l.isspace()]
l1 = min(non_blank_lines, default='')
l2 = max(non_blank_lines, default='')
margin = 0
for margin, c in enumerate(l1):
if c != l2[margin] or c not in ' \t':
break

if margin:
text = re.sub(r'(?m)^' + margin, '', text)
return text
return '\n'.join([l[margin:] if not l.isspace() else '' for l in lines])


def indent(text, prefix, predicate=None):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Improved performance of :func:`textwrap.dedent` by an average of ~2.4x,
(with improvements of up to 4x for large inputs),
and fixed a bug where blank lines with whitespace characters other than space
or horizontal tab were not normalised to the newline.
Patch by Adam Turner, Marius Juston, and Pieter Eendebak.
Loading