Skip to content

gh-130167: Optimise textwrap.dedent() #131919

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Mar 31, 2025
40 changes: 40 additions & 0 deletions Lib/test/test_textwrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -769,6 +769,46 @@ def assertUnchanged(self, text):
"""assert that dedent() has no effect on 'text'"""
self.assertEqual(text, dedent(text))

def test_dedent_only_whitespace(self):
# The empty string.
text = ""
self.assertUnchanged(text)

# Only spaces.
text = " "
expect = ""
self.assertEqual(expect, dedent(text))

# Only tabs.
text = "\t\t\t\t"
expect = ""
self.assertEqual(expect, dedent(text))

# A mixture.
text = " \t \t\t \t "
expect = ""
self.assertEqual(expect, dedent(text))

# ASCII whitespace.
text = "\f\n\r\t\v "
expect = "\n"
self.assertEqual(expect, dedent(text))

# One newline.
text = "\n"
expect = "\n"
self.assertEqual(expect, dedent(text))

# Windows-style newlines.
text = "\r\n"
expect = "\n"
self.assertEqual(expect, dedent(text))

# Whitespace mixture.
text = " \n\t\n \n\t\t\n\n\n "
expect = "\n\n\n\n\n\n"
self.assertEqual(expect, dedent(text))

def test_dedent_nomargin(self):
# No lines indented.
text = "Hello there.\nHow are you?\nOh good, I'm glad."
Expand Down
56 changes: 19 additions & 37 deletions Lib/textwrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,9 +413,6 @@ def shorten(text, width, **kwargs):

# -- Loosely related functionality -------------------------------------

_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)

def dedent(text):
"""Remove any common leading whitespace from every line in `text`.

Expand All @@ -429,42 +426,27 @@ def dedent(text):

Entirely blank lines are normalized to a newline character.
"""
# Look for the longest leading string of spaces and tabs common to
# all lines.
margin = None
text = _whitespace_only_re.sub('', text)
indents = _leading_whitespace_re.findall(text)
for indent in indents:
if margin is None:
margin = indent

# Current line more deeply indented than previous winner:
# no change (previous winner is still on top).
elif indent.startswith(margin):
pass

# Current line consistent with and no deeper than previous winner:
# it's the new winner.
elif margin.startswith(indent):
margin = indent

# Find the largest common whitespace between current line and previous
# winner.
else:
for i, (x, y) in enumerate(zip(margin, indent)):
if x != y:
margin = margin[:i]
break
if not text:
return text

# If the input is entirely whitespace, return normalized lines
if text.isspace():
return '\n' * text.count('\n')

lines = text.split('\n')

# sanity check (testing/debugging only)
if 0 and margin:
for line in text.split("\n"):
assert not line or line.startswith(margin), \
"line = %r, margin = %r" % (line, margin)
# Get length of leading whitespace, inspired by ``os.path.commonprefix()``
non_blank_lines = [l for l in lines if l and not l.isspace()]
l1 = min(non_blank_lines)
l2 = max(non_blank_lines)
margin = 0
for i, c in enumerate(l1):
if c != l2[i] or c not in ' \t':
break
margin += 1

if margin:
text = re.sub(r'(?m)^' + margin, '', text)
return text
return '\n'.join([l[margin:] if l and not l.isspace() else ''
for l in lines])


def indent(text, prefix, predicate=None):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Improved performance of :func: `textwrap.dedent` by ~2.4x.
Patch by Adam Turner and Marius Juston.
Loading