gh-131791: Improve speed of `textwrap.dedent` by replacing `re` #131792

Marius-Juston · 2025-03-27T09:25:34Z

Current code:

_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)

def dedent(text):
    """Remove any common leading whitespace from every line in `text`.

    This can be used to make triple-quoted strings line up with the left
    edge of the display, while still presenting them in the source code
    in indented form.

    Note that tabs and spaces are both treated as whitespace, but they
    are not equal: the lines "  hello" and "\\thello" are
    considered to have no common leading whitespace.

    Entirely blank lines are normalized to a newline character.
    """
    # Look for the longest leading string of spaces and tabs common to
    # all lines.
    margin = None
    text = _whitespace_only_re.sub('', text)
    indents = _leading_whitespace_re.findall(text)
    for indent in indents:
        if margin is None:
            margin = indent

        # Current line more deeply indented than previous winner:
        # no change (previous winner is still on top).
        elif indent.startswith(margin):
            pass

        # Current line consistent with and no deeper than previous winner:
        # it's the new winner.
        elif margin.startswith(indent):
            margin = indent

        # Find the largest common whitespace between current line and previous
        # winner.
        else:
            for i, (x, y) in enumerate(zip(margin, indent)):
                if x != y:
                    margin = margin[:i]
                    break

    # sanity check (testing/debugging only)
    if 0 and margin:
        for line in text.split("\n"):
            assert not line or line.startswith(margin), \
                   "line = %r, margin = %r" % (line, margin)

    if margin:
        text = re.sub(r'(?m)^' + margin, '', text)
    return text

The new implementation can give upwards to 4x the speed performance on larger files.

if __name__ == '__main__':
    with open("../Objects/unicodeobject.c") as f:
        raw_text = f.read()


    tests = []
    test_names = []
    test_names = ['', "    ", "\t", "abc  \t", ' \t  abc', ' \t  abc  \t  ']

    index = 0

    temp = dedent(raw_text)

    for indent_v in test_names:
        text = indent(raw_text, indent_v)

        tests.append(text)


        output = dedent_faster(text, only_whitespace=False)

        print("Validating large text with not only whitespace indentation:", indent_v.encode("ascii"), output == temp)


    # Basic indented text with empty lines
    text = """
        def hello():
            print("Hello, world!")


        """

    tests.append(text)
    test_names.append("Basic indented text with empty lines")

    # Text with mixed indentation and blank lines
    text = """
        This is a test.

        Another line.

    """

    tests.append(text)
    test_names.append("Text with mixed indentation and blank lines")

    # No indentation (edge case)
    text = """No indents here.
    Just normal text.

    With empty lines."""

    tests.append(text)
    test_names.append("No indentation (edge case)")

    # Only blank lines (should preserve them)
    text = """


    """

    tests.append(text)
    test_names.append("Only blank lines")

    # Edge case: No common prefix to remove
    text = """hello
        world
    """

    tests.append(text)
    test_names.append("Edge case: No common prefix to remove")

    # Edge case: Single indented line
    text = """    Only one indented line"""

    tests.append(text)
    test_names.append("Edge case: Single indented line")

    # Edge case: Single indented line
    text = """    """

    tests.append(text)
    test_names.append("Edge case: Single indented line only")

    # Edge case: Single indented line
    text = """"""

    tests.append(text)
    test_names.append("Edge case: Empty text")

    for text, name in zip(tests, test_names):
        print(f"========= Case: {name.encode('ascii')} =========")

        a = dedent(text)

        for func in [dedent_faster, dedent]:
            single_test = func(text)

            print(func.__name__, a == single_test)

            it = timeit.Timer(lambda: func(text))
            result = it.repeat(number=10_000)
            result.sort()
            print(f"{func.__name__} Min: {result[0]:.4f}msec")

Returning the following:

Validating large text with not only whitespace indentation: b'' True
Validating large text with not only whitespace indentation: b'    ' True
Validating large text with not only whitespace indentation: b'\t' True
Validating large text with not only whitespace indentation: b'abc  \t' True
Validating large text with not only whitespace indentation: b' \t  abc' True
Validating large text with not only whitespace indentation: b' \t  abc  \t  ' True
========= Case: b'' =========
dedent_faster True
dedent_faster Min: 1.5848msec
dedent True
dedent Min: 6.6143msec
========= Case: b'    ' =========
dedent_faster True
dedent_faster Min: 2.5275msec
dedent True
dedent Min: 10.6884msec
========= Case: b'\t' =========
dedent_faster True
dedent_faster Min: 2.3215msec
dedent True
dedent Min: 9.9588msec
========= Case: b'abc  \t' =========
dedent_faster True
dedent_faster Min: 1.5026msec
dedent True
dedent Min: 6.7075msec
========= Case: b' \t  abc' =========
dedent_faster True
dedent_faster Min: 2.5997msec
dedent True
dedent Min: 10.6693msec
========= Case: b' \t  abc  \t  ' =========
dedent_faster True
dedent_faster Min: 2.6204msec
dedent True
dedent Min: 11.7285msec
========= Case: b'Basic indented text with empty lines' =========
dedent_faster True
dedent_faster Min: 0.0016msec
dedent True
dedent Min: 0.0022msec
========= Case: b'Text with mixed indentation and blank lines' =========
dedent_faster True
dedent_faster Min: 0.0016msec
dedent True
dedent Min: 0.0020msec
========= Case: b'No indentation (edge case)' =========
dedent_faster True
dedent_faster Min: 0.0008msec
dedent True
dedent Min: 0.0013msec
========= Case: b'Only blank lines' =========
dedent_faster True
dedent_faster Min: 0.0006msec
dedent True
dedent Min: 0.0005msec
========= Case: b'Edge case: No common prefix to remove' =========
dedent_faster True
dedent_faster Min: 0.0007msec
dedent True
dedent Min: 0.0008msec
========= Case: b'Edge case: Single indented line' =========
dedent_faster True
dedent_faster Min: 0.0004msec
dedent True
dedent Min: 0.0013msec
========= Case: b'Edge case: Single indented line only' =========
dedent_faster True
dedent_faster Min: 0.0002msec
dedent True
dedent Min: 0.0003msec
========= Case: b'Edge case: Empty text' =========
dedent_faster True
dedent_faster Min: 0.0001msec
dedent True
dedent Min: 0.0002msec

Which thus returns a 4x performance boost for large files. Another advantage is this this method allows for people to just remove all common prefixes to the file as an option instead of just whitespaces. Another advantage is that it does not use regex expressions.

( This was optimized iteratively using Claude + ChatGPT models )

Issue: Improve speed of textwrap.dedent #131791

… if desired

ghost · 2025-03-27T09:25:37Z

All commit authors signed the Contributor License Agreement.

bedevere-app · 2025-03-27T09:25:38Z

Most changes to Python require a NEWS entry. Add one using the blurb_it web app or the blurb command-line tool.

If this change has little impact on Python users, wait for a maintainer to apply the skip news label instead.

StanFromIreland

This PR is a mess of optimization and new features. This PR also does not pass existing tests.

ZeroIntensity

( This was optimized iteratively using Claude + ChatGPT models )

Yes, we noticed 😉. This isn't an acceptable use of AI for us.

If we actually are getting a 4x speedup (probably due to the lack of regex) and can pass all tests, then this does seem like something we could maybe investigate adding. But, please remove the added only_whitespace parameter.

Misc/NEWS.d/next/Library/2025-03-27-04-35-17.gh-issue-131792.UtGg3O.rst

Lib/textwrap.py

eendebakpt · 2025-03-27T20:48:04Z

@Marius-Juston I agree with the other reviewers that there are too many changes in this PR. But it is interesting that an AI can generate good ideas (and I appreciate you mentioning this in the corresponding issue). Based on the ideas I tried some variations and I do think it is worthwhile to pursue this.

Sometimes it is better to split a PR with many changes (like this one) into smaller steps (not sure we can do that here though). For example with only commit 2a93206 we can eliminate the _whitespace_only_re (reduces the import time) and improve performance:

textwrap.dedent(single_line): Mean +- std dev: [main] 1.73 us +- 0.14 us -> [pr_v3] 1.48 us +- 0.10 us: 1.17x faster
textwrap.dedent(no_indent): Mean +- std dev: [main] 6.86 us +- 0.42 us -> [pr_v3] 5.68 us +- 0.36 us: 1.21x faster
textwrap.dedent(spaces): Mean +- std dev: [main] 12.0 us +- 0.7 us -> [pr_v3] 9.10 us +- 0.85 us: 1.31x faster
textwrap.dedent(mixed): Mean +- std dev: [main] 19.6 us +- 1.7 us -> [pr_v3] 18.5 us +- 1.1 us: 1.06x faster
textwrap.dedent(large_text): Mean +- std dev: [main] 250 us +- 15 us -> [pr_v3] 204 us +- 18 us: 1.22x faster

Geometric mean: 1.19x faster

(benchmark script: https://github.com/eendebakpt/python_testing/blob/fac5d2457fe894e939d76d1cd5a8991a6b5b4624/benchmarks/bm_textwrap.py)

Marius-Juston · 2025-03-27T20:58:04Z

Yeah, most of this is still AI-generated (except for the final lines, where list comprehension was used instead of for loops).

After reviewing all the comments, I was clearly overeager with the performance increase that I had initially noticed and did not perform the necessary due diligence that I really should have done in the first place.

I was really using AI to see if it would be able to find interesting or new ways to optimize the function as a test ( the use of the os.path.commonprefix which is C optimized was such an interesting example ).

It might be better for me to just close this PR, do some more edits offline, verify all the tests, etc.. and then perform the PR?

eendebakpt · 2025-03-27T21:01:39Z

Lib/textwrap.py

+    # If all lines are blank, normalize them
+    if not non_blank_whites:
+        # Preallocate result list
+        return "".join(["\n" if line.endswith("\n") else "" for line in lines])


How often does this occur? You could leave it out as the os.path.commonprefix([]) is '', which gives margin_len=0 so the case is correctly handled. It makes the code smaller.

… checking of common prefixes for each line

# Conflicts: # Lib/textwrap.py

Marius-Juston · 2025-03-28T03:24:00Z

With the updated code we get approximately 3x speedup for large files:

========= Case: b"' ' * 1000" =========
dedent Min: 165.5519msec
dedent_faster Min: 13.3500msec
========= Case: b'' =========
dedent Min: 6.5183msec
dedent_faster Min: 2.9604msec
========= Case: b'    ' =========
dedent Min: 10.5002msec
dedent_faster Min: 3.2978msec
========= Case: b'\t' =========
dedent Min: 9.8412msec
dedent_faster Min: 3.2125msec
========= Case: b'abc  \t' =========
dedent Min: 6.6309msec
dedent_faster Min: 2.4415msec
========= Case: b' \t  abc' =========
dedent Min: 10.5123msec
dedent_faster Min: 3.4371msec
========= Case: b' \t  abc  \t  ' =========
dedent Min: 11.4295msec
dedent_faster Min: 3.4418msec

picnixz

Which thus returns a 4x performance boost for large files

For benchmarks, please use pyperf instead of showing the minimum. The minimum doesn't help here as we're interested in average time complexity, not best time complexity. What's interesting when doing benchmarks is also the geometric mean and comparisons done on non-DEBUG builds (possibly with LTO+PGO enabled) [I don't know which kind of builds the tests were done on].

I would like to add test cases with much longer test strings than those that are given and also cases that are multi-lines and with multiple blocks of indented texts. I also want to know when a file is considered to be "large". Also, what happens when nothing should be dedented? are we loosing something when we do dedent(dedent(text))?

I haven't seen the benchmarks for really big files. So I suggest generating some fake test instead wich lots of blocks that are indented and that need to be dedented, or when the common margin is far from there. Let's say we're twice faster but are we talking in terms of seconds or microseconds? for very large files, if the reading + loading into memory takes is already the main bottleneck, then optimizing dedent() wouldn't be noticed.

On the "how" this was solved, I really think it's not a good idea to use os.path.commonpath as it's purpose is entirely different and we shouldn't really on this kind of magic. In addition ther docs say:

Raise ValueError if paths contain both absolute and relative pathnames, if paths are on different drives, or if paths is empty. Unlike commonprefix(), this returns a valid path.

What if there is a text that contains absolute and relative-looking path names? this would fail. I think we need to change the approach.

bedevere-app · 2025-03-28T17:57:24Z

A Python core developer has requested some changes be made to your pull request before we can consider merging it. If you could please address their requests along with any other requests in other reviews from core developers that would be appreciated.

Once you have made the requested changes, please leave a comment on this pull request containing the phrase I have made the requested changes; please review again. I will then notify any core developers who have left a review that you're ready for them to take another look at this pull request.

Marius-Juston · 2025-03-28T18:19:33Z

I would like to add test cases with much longer test strings than those that are given and also cases that are multi-lines and with multiple blocks of indented texts. I also want to know when a file is considered to be "large".

Based on what a previous PR did in issue #107369 I followed the same method and validated the "large" file with Objects/unicodeobject.c (this has 16k lines) that I indent at different levels, the timings given are based just on the computation of calling the dedent function as such:

tests = []
test_names = []
test_names = ["abc  \t", '', "    ", "\t",  ' \t  abc', ' \t  abc  \t  ', "' ' * 1000"]
test_indents = ["abc  \t", '', "    ", "\t",  ' \t  abc', ' \t  abc  \t  ', ' ' * 1000]

for indent_v in test_indents:
    text = indent(raw_text, indent_v)

    tests.append(text)

# Basic indented text with empty lines
text = """
    def hello():
        print("Hello, world!")


    """

tests.append(text)
test_names.append("Basic indented text with empty lines")

# Text with mixed indentation and blank lines
text = """
    This is a test.

    Another line.

"""

tests.append(text)
test_names.append("Text with mixed indentation and blank lines")

# No indentation (edge case)
text = """No indents here.
Just normal text.

With empty lines."""

tests.append(text)
test_names.append("No indentation (edge case)")

# Only blank lines (should preserve them)
text = """


"""

tests.append(text)
test_names.append("Only blank lines")

# Edge case: No common prefix to remove
text = """hello
    world
"""

tests.append(text)
test_names.append("Edge case: No common prefix to remove")

# Edge case: Single indented line
text = """    Only one indented line"""

tests.append(text)
test_names.append("Edge case: Single indented line")

# Edge case: Single indented line
text = """    """

tests.append(text)
test_names.append("Edge case: Single indented line only")

# Edge case: Single indented line
text = """"""

tests.append(text)
test_names.append("Edge case: Empty text")

for text, name in zip(tests, test_names):
    print(f"========= Case: {name.encode('ascii')} =========")

    a = dedent(text)

    for func in [dedent_faster_v5, dedent_faster]:
        single_test = func(text)

        print(func.__name__, a == single_test)

        it = timeit.Timer(lambda: func(text))
        result = it.repeat(number=1_000)
        result.sort()
        print(f"{func.__name__} Min: {result[0]:.4f}msec")

Also, what happens when nothing should be dedented? are we loosing something when we do dedent(dedent(text))?

When nothing should be dedented the only the empty lines are normalized as per the documentation of dedent.

On the "how" this was solved, I really think it's not a good idea to use os.path.commonpath as it's purpose is entirely different and we shouldn't really on this kind of magic. In addition ther docs say:

Raise ValueError if paths contain both absolute and relative pathnames, if paths are on different drives, or if paths is empty. Unlike commonprefix(), this returns a valid path.

I am not using os.path.commonpath but rather os.path.commonprefix, which does not raise any exceptions and follows the following documentation:

Return the longest path prefix (taken character-by-character) that is a prefix of all paths in list. If list is empty, return the empty string ('').

So I believe that using this function is okay

For benchmarks, please use pyperf instead of showing the minimum. The minimum doesn't help here as we're interested in average time complexity, not best time complexity. What's interesting when doing benchmarks is also the geometric mean and comparisons done on non-DEBUG builds (possibly with LTO+PGO enabled) [I don't know which kind of builds the tests were done on].

I was using as reference the Issue #107369 so I was performing something similar to them. As for the build for the speed comparison my python environment is an Anaconda 3.13.2; however, I will make sure to setup the comparisons in a more proper environment for validation. Is there a quick link for how to set that up or how would you like it to be setup? I assume I should just be doing

./configure --with-pydebug && make -j
./python speedtest.py

picnixz · 2025-03-28T18:29:29Z

I am not using os.path.commonpath but rather os.path.commonprefix, which does not raise any exceptions and follows the following documentation:

Oh right, my bad. I incorrectly read the function being used. Well, I still think it's kind of weird to use that function, but if it works well for any strings, we can keep it maybe?

that I indent at different levels, the timings given are based just on the computation of calling the dedent function as such:

The method used in the linked PR is fine but I don't see those numbers for this specific file (maybe I missed it but I can't find it in your list). That's why I suggest using pyperf and create a clean benchmark script so that we have a better idea of the performance, not just the minimum.

. Is there a quick link for how to set that up or how would you like it to be setup? I assume I should just be doing

You should do:

./configure -q --enable-optimizations && make -s -j
./python -m pyperf bench.py

where bench.py is a benchmark script (see https://github.com/psf/pyperf). An example of benchmarking:

import os
import random
import uuid

import pyperf

if __name__ == '__main__':
    runner = pyperf.Runner()
    runner.bench_func('uuid1()', uuid.uuid1)
    node = random.getrandbits(48)
    runner.bench_func('uuid1(node, None)', uuid.uuid1, node)
    clock_seq = random.getrandbits(14)
    runner.bench_func('uuid1(None, clock_seq)', uuid.uuid1, None, clock_seq)

    ns = uuid.NAMESPACE_DNS
    runner.bench_func('uuid3(NAMESPACE_DNS, os.urandom(16))',
                      uuid.uuid3, ns, os.urandom(16))
    runner.bench_func('uuid3(NAMESPACE_DNS, os.urandom(1024))',
                      uuid.uuid3, ns, os.urandom(1024))

    runner.bench_func('uuid4()', uuid.uuid4)

    ns = uuid.NAMESPACE_DNS
    runner.bench_func('uuid5(NAMESPACE_DNS, os.urandom(16))',
                      uuid.uuid5, ns, os.urandom(16))
    runner.bench_func('uuid5(NAMESPACE_DNS, os.urandom(1024))',
                      uuid.uuid5, ns, os.urandom(1024))

    runner.bench_func('uuid8()', uuid.uuid8)

In your case, you should use different inputs for dedent. In one run, you would use textwrap.dedent and in another run you would use textwrap.dedent2 (your new function). Then you can compare the two benchmarks using pyperf compare.

AA-Turner · 2025-03-28T19:40:34Z

@Marius-Juston are you confident that this PR now represents entirely your own work, with no contribution from a large language model? For copyright and licensing reasons, we require you to sign the Contributor Licence Agreement, whereas potentially copyrightable code generated by a LLM puts us in difficult territory.

Please see also our AI policy.

A

Marius-Juston · 2025-03-28T20:11:23Z

Yes this now entirely my own code

eendebakpt · 2025-03-28T20:19:51Z

@Marius-Juston Some more high level feedback on your PR.

Performance Based on results so far and my own tests, I suspect performance will be good. But please benchmark as suggested by @picnixz using pyperf (here is possible starting point: bm_textwrap). Also for smaller strings there is value in improving performance. I recall dedent showing up in profiling results for the import time one of larger scientific packages (numpy? scipy? I forgot) because of docstrings being dedented at import.

Readability The implementation in this PR is really different from the orignal one, but I think it is quite readable. The final expression is a bit hard to read, maybe that can be split up? And I would omit the fast path for empty text (who dedents an empty string?)

Memory requirements We will always require memory for the input text, and the output text. In addition for this PR we have lines (list of length the number of input lines), a temporary list of the same size (argument of commonprefix), and a temporary list in the final expression (maybe that can be converted to a generator?). So maximum memory size is roughly 5 times the input size. In current main we also have the input text and the output text, the whitespace only text (same variable text, but the caller probably holds a reference to the original text), the indents (could be much smaller than text, but worse case just as large). Not sure memory will be a bottleneck for real world applications, but the memory usage in this PR seems about as good (but certainly not much worse) as in main.

Marius-Juston · 2025-03-29T05:49:43Z

@picnixz

Am I being stupid or doing something wrong with this benchmark?

https://gist.github.com/Marius-Juston/6947a794937a6e46c0164dbb9058c8af

I am getting returned:

WARNING: the benchmark result may be unstable
* the standard deviation (1.39 ms) is 16% of the mean (8.60 ms)
* the minimum (2.98 ms) is 65% smaller than the mean (8.60 ms)

Try to rerun the benchmark with more runs, values and/or loops.
Run 'python -m pyperf system tune' command to reduce the system jitter.
Use pyperf stats, pyperf dump and pyperf hist to analyze results.
Use --quiet option to hide these warnings.

but most importantly:

./python -m pyperf compare_to dedent.json dedent2.json
b'': Mean +- std dev: [dedent] 8.46 ms +- 0.40 ms -> [dedent2] 8.34 ms +- 0.38 ms: 1.01x faster
b'    ': Mean +- std dev: [dedent] 13.8 ms +- 0.6 ms -> [dedent2] 13.6 ms +- 0.6 ms: 1.02x faster
b'\t': Mean +- std dev: [dedent] 13.0 ms +- 0.6 ms -> [dedent2] 13.0 ms +- 0.6 ms: 1.00x slower
b' \t  abc': Mean +- std dev: [dedent] 14.7 ms +- 0.6 ms -> [dedent2] 15.0 ms +- 0.8 ms: 1.02x slower
b' \t  abc  \t  ': Mean +- std dev: [dedent] 16.1 ms +- 0.7 ms -> [dedent2] 16.2 ms +- 0.6 ms: 1.00x slower

Benchmark hidden because not significant (1): b'abc  \t'

Geometric mean: 1.00x slower

Not sure if I am doing something stupid, I change between the runs from textwrap import dedent as dedent to from textwrap import dedent2 as dedent but it shows absolutely no different.

./configure -q --enable-optimizations && make -s -j
./python -m ensurepip --upgrade # Install pip since it does not exist
./python -m pip install pyperf # Install pyperf

./python tests/bm_dedent.py -o dedent.json
./python tests/bm_dedent.py -o dedent.json # Run in aftewards with the file changed for dedent2
./python -m pyperf compare_to dedent.json dedent2.json

is the commands I did

python -m pyperf system tune does not seem to function for me as well where I am getting:

ERROR: At least one operation failed with permission error, retry as root ;however, when I try sudo python3 -m pyperf system tune I get No module named pyperf

Marius-Juston · 2025-03-30T01:36:29Z

This is the resulting output:

b'whitespace_only'
==================

Mean +- std dev: [dedent] 837 ns +- 87 ns -> [dedent2] 688 ns +- 53 ns: 1.22x faster
ERROR when testing if values are significant

b'large_text'
=============

Mean +- std dev: [dedent] 263 us +- 20 us -> [dedent2] 78.2 us +- 1.5 us: 3.36x faster
ERROR when testing if values are significant

b'mixed'
========

Mean +- std dev: [dedent] 17.1 us +- 1.3 us -> [dedent2] 12.9 us +- 1.0 us: 1.33x faster
ERROR when testing if values are significant

b'spaces'
=========

Mean +- std dev: [dedent] 12.9 us +- 1.2 us -> [dedent2] 3.42 us +- 0.08 us: 3.77x faster
ERROR when testing if values are significant

b'no_indent'
============

Mean +- std dev: [dedent] 7.13 us +- 0.76 us -> [dedent2] 2.86 us +- 0.08 us: 2.49x faster
ERROR when testing if values are significant

b'Edge case: Empty text'
========================

Mean +- std dev: [dedent] 291 ns +- 32 ns -> [dedent2] 65.9 ns +- 3.5 ns: 4.42x faster
ERROR when testing if values are significant

b'Edge case: Single indented line only'
=======================================

Mean +- std dev: [dedent] 441 ns +- 35 ns -> [dedent2] 456 ns +- 16 ns: 1.03x slower
ERROR when testing if values are significant

b'Edge case: Single indented line'
==================================

Mean +- std dev: [dedent] 1.90 us +- 0.17 us -> [dedent2] 1.89 us +- 0.05 us: 1.00x faster
ERROR when testing if values are significant

b'Edge case: No common prefix to remove'
========================================

Mean +- std dev: [dedent] 1.16 us +- 0.08 us -> [dedent2] 1.50 us +- 0.04 us: 1.29x slower
ERROR when testing if values are significant

b'Only blank lines'
===================

Mean +- std dev: [dedent] 721 ns +- 50 ns -> [dedent2] 724 ns +- 26 ns: 1.00x slower
ERROR when testing if values are significant

b'No indentation (edge case)'
=============================

Mean +- std dev: [dedent] 1.71 us +- 0.09 us -> [dedent2] 1.64 us +- 0.05 us: 1.04x faster
ERROR when testing if values are significant

b'Text with mixed indentation and blank lines'
==============================================

Mean +- std dev: [dedent] 2.73 us +- 0.13 us -> [dedent2] 2.08 us +- 0.07 us: 1.31x faster
ERROR when testing if values are significant

b'Basic indented text with empty lines'
=======================================

Mean +- std dev: [dedent] 2.91 us +- 0.14 us -> [dedent2] 2.15 us +- 0.14 us: 1.36x faster
ERROR when testing if values are significant

Geometric mean: 1.56x faster

picnixz · 2025-03-30T11:01:04Z

This is the resulting output:

Can you give me the full benchmark script you used please? Also there's something happening as it says "ERROR when testing if values are significant".

Now, I've run some benchmarks on my side and they are indeed promising (after in-lining os.path.commonprefix; note that benchmarks are a bit unstable but that's just because I'm using --fast). Benchmarks were performed on PGO main:

+------------------------+---------+-----------------------+
| Benchmark              | ref     | new                   |
+========================+=========+=======================+
| empty                  | 101 ns  | 28.2 ns: 3.58x faster |
+------------------------+---------+-----------------------+
| whitespaces            | 272 ns  | 578 ns: 2.13x slower  |
+------------------------+---------+-----------------------+
| 1000 empty lines       | 2.63 us | 4.62 us: 1.76x slower |
+------------------------+---------+-----------------------+
| no indented paragraph  | 3.73 us | 1.43 us: 2.60x faster |
+------------------------+---------+-----------------------+
| indented paragraph     | 3.70 us | 1.50 us: 2.47x faster |
+------------------------+---------+-----------------------+
| no indented paragraphs | 11.5 us | 3.15 us: 3.64x faster |
+------------------------+---------+-----------------------+
| indented paragraphs    | 24.5 us | 6.97 us: 3.52x faster |
+------------------------+---------+-----------------------+
| large objects          | 49.7 us | 23.3 us: 2.14x faster |
+------------------------+---------+-----------------------+
| Geometric mean         | (ref)   | 1.90x faster          |
+------------------------+---------+-----------------------+

What is however a bit unfortunate is that we are relying on a function whose name doesn't help here, and I hope that os.path.commonprefix will not change in the future. Now its implementation is as follows:

# Return the longest prefix of all list elements.
def commonprefix(m):
    "Given a list of pathnames, returns the longest common leading component"
    if not m: return ''
    # Some people pass in a list of pathname parts to operate in an OS-agnostic
    # fashion; don't try to translate in that case as that's an abuse of the
    # API and they are already doing what they need to be OS-agnostic and so
    # they most likely won't be using an os.PathLike object in the sublists.
    if not isinstance(m[0], (list, tuple)):
        m = tuple(map(os.fspath, m))
    s1 = min(m)
    s2 = max(m)
    for i, c in enumerate(s1):
        if c != s2[i]:
            return s1[:i]
    return s1

We can definitely skip the isinstance check as we know that we're passing strings and not something else.

Lib/textwrap.py

AA-Turner · 2025-03-30T17:22:10Z

I hope that os.path.commonprefix will not change in the future

It's specified as behaving as it does, and if the behaviour does change, we should notice this with the textwrap.dedent tests. I agree we could also just copy the code as a private function in textwrap.

picnixz · 2025-03-30T17:25:58Z

I think it's better to inline the code or create a small helper function. This reduces the needs for an import and for dead code (the os.fspath call).

Marius-Juston · 2025-03-30T19:21:27Z

Can you give me the full benchmark script you used please? Also there's something happening as it says "ERROR when testing if values are significant".

Here is the code https://gist.github.com/Marius-Juston/6947a794937a6e46c0164dbb9058c8af yeah I know that the "Error is for when the values are significant" however the parameters I set did not seem to give much of an impact on fixing that ( the inner loops parameters just made the tests run for 0us even though I had set it to 10_000_000 so I am thinking something wrong is on my side )

eendebakpt · 2025-03-30T19:56:13Z

@Marius-Juston Your script seems to work on my system (and I get a speedup for the PR). The warning WARNING: the benchmark result may be unstable might be ok (I get those often on my system), but it could be related to the line runner = pyperf.Runner(values=100, loops=40) in your script where the specify to use 40 loops. The tests here are quite fast, so it might be better to specify more loops (if you set loops to 0 it will auto select. if you run the script in verbose mode -v you will see much more loops are selected).

Marius-Juston · 2025-03-30T19:56:26Z

What is however a bit unfortunate is that we are relying on a function whose name doesn't help here, and I hope that os.path.commonprefix will not change in the future. Now its implementation is as follows:

# Return the longest prefix of all list elements.
def commonprefix(m):
    "Given a list of pathnames, returns the longest common leading component"
    if not m: return ''
    # Some people pass in a list of pathname parts to operate in an OS-agnostic
    # fashion; don't try to translate in that case as that's an abuse of the
    # API and they are already doing what they need to be OS-agnostic and so
    # they most likely won't be using an os.PathLike object in the sublists.
    if not isinstance(m[0], (list, tuple)):
        m = tuple(map(os.fspath, m))
    s1 = min(m)
    s2 = max(m)
    for i, c in enumerate(s1):
        if c != s2[i]:
            return s1[:i]
    return s1

We can definitely skip the isinstance check as we know that we're passing strings and not something else.

You are right, I was under the impression that this was a C optimized function; however, since it is not you are right and we can just further optimize this by removing the if not m: return '' and the instance check

AA-Turner · 2025-03-30T20:26:12Z

I've opened #131919 as a sketch of an alternative, which avoids some allocations by using str.isspace instead of str.strip and inlines the various functions. I observe a 2.37x performance improvement on a non-idle Windows computer, using pyperf's --fast mode.

A

AA-Turner · 2025-03-31T00:17:15Z

Closing in favour of #131919.

Marius-Juston added 2 commits March 27, 2025 04:19

optimized dedent by 4x for larger files, added only_whitespace option…

b722c23

… if desired

removed necessary regex expression

540b462

bedevere-app bot added the awaiting review label Mar 27, 2025

bedevere-app bot mentioned this pull request Mar 27, 2025

Python3.13 logging level not working #131789

Closed

added news entry

2159118

Marius-Juston mentioned this pull request Mar 27, 2025

Improve speed of textwrap.dedent #131791

Closed

updated news inline literals backticks

8cf2153

Marius-Juston changed the title ~~gh-131789: textwrap.dedent by 4x for larger files, added only_whitespace option for non whitespace dedent~~ gh-131792: textwrap.dedent by 4x for larger files, added only_whitespace option for non whitespace dedent Mar 27, 2025

Marius-Juston changed the title ~~gh-131792: textwrap.dedent by 4x for larger files, added only_whitespace option for non whitespace dedent~~ gh-131791: textwrap.dedent by 4x for larger files, added only_whitespace option for non whitespace dedent Mar 27, 2025

StanFromIreland reviewed Mar 27, 2025

View reviewed changes

ZeroIntensity requested changes Mar 27, 2025

View reviewed changes

bedevere-app bot added awaiting core review and removed awaiting review labels Mar 27, 2025

Marius-Juston added 3 commits March 27, 2025 13:53

corrected dedent to pass test cases

7754af3

added missing os dependencie

76f231a

reset what the current doc was

8dce5ab

StanFromIreland reviewed Mar 27, 2025

View reviewed changes

Misc/NEWS.d/next/Library/2025-03-27-04-35-17.gh-issue-131792.UtGg3O.rst Outdated Show resolved Hide resolved

StanFromIreland reviewed Mar 27, 2025

View reviewed changes

Lib/textwrap.py Outdated Show resolved Hide resolved

eendebakpt reviewed Mar 27, 2025

View reviewed changes

Lib/textwrap.py Show resolved Hide resolved

eendebakpt reviewed Mar 27, 2025

View reviewed changes

Lib/textwrap.py Outdated Show resolved Hide resolved

eendebakpt reviewed Mar 27, 2025

View reviewed changes

Marius-Juston added 5 commits March 27, 2025 16:45

much simpler implementation, using fileter and common prefix for fast…

520f25f

… checking of common prefixes for each line

small mistake for single line

e703e0d

forgot to invert the compairson

159e363

minor optimization, using lstrip and more concise margin_len computation

1c2678e

Merge remote-tracking branch 'origin/main'

da9e65c

# Conflicts: # Lib/textwrap.py

Marius-Juston requested a review from eendebakpt March 28, 2025 03:28

Marius-Juston added 3 commits March 28, 2025 11:58

minor faster filtering using list comprehension

199d237

small speedup by removing ends with

27fcf53

strip seems to give an extremely minor performance boost over lstrip

1379566

Marius-Juston changed the title ~~gh-131791: textwrap.dedent by 4x for larger files, added only_whitespace option for non whitespace dedent~~ gh-131791: Improve speed of textwrap.dedent by replacing re Mar 28, 2025

picnixz requested changes Mar 28, 2025

View reviewed changes

bedevere-app bot added awaiting changes and removed awaiting core review labels Mar 28, 2025

picnixz reviewed Mar 30, 2025

View reviewed changes

Lib/textwrap.py Outdated Show resolved Hide resolved

fixed unecessary end line operator for empty

b71240d

AA-Turner mentioned this pull request Mar 30, 2025

gh-130167: Optimise textwrap.dedent() #131919

Merged

AA-Turner closed this Mar 31, 2025

Marius-Juston mentioned this pull request Mar 31, 2025

gh-130167: Minor textwrap.dedent() optimization #131925

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

gh-131791: Improve speed of `textwrap.dedent` by replacing `re` #131792

gh-131791: Improve speed of `textwrap.dedent` by replacing `re` #131792

Marius-Juston commented Mar 27, 2025 •

edited

Loading

ghost commented Mar 27, 2025 •

edited by ghost

Loading

bedevere-app bot commented Mar 27, 2025

StanFromIreland left a comment

ZeroIntensity left a comment

eendebakpt commented Mar 27, 2025

Marius-Juston commented Mar 27, 2025

eendebakpt Mar 27, 2025

Marius-Juston commented Mar 28, 2025

picnixz left a comment •

edited

Loading

bedevere-app bot commented Mar 28, 2025

Marius-Juston commented Mar 28, 2025 •

edited

Loading

picnixz commented Mar 28, 2025 •

edited

Loading

AA-Turner commented Mar 28, 2025

Marius-Juston commented Mar 28, 2025

eendebakpt commented Mar 28, 2025

Marius-Juston commented Mar 29, 2025 •

edited

Loading

Marius-Juston commented Mar 30, 2025

picnixz commented Mar 30, 2025 •

edited

Loading

AA-Turner commented Mar 30, 2025

picnixz commented Mar 30, 2025

Marius-Juston commented Mar 30, 2025

eendebakpt commented Mar 30, 2025

Marius-Juston commented Mar 30, 2025

AA-Turner commented Mar 30, 2025

AA-Turner commented Mar 31, 2025

gh-131791: Improve speed of textwrap.dedent by replacing re #131792

gh-131791: Improve speed of textwrap.dedent by replacing re #131792

Conversation

Marius-Juston commented Mar 27, 2025 • edited Loading

ghost commented Mar 27, 2025 • edited by ghost Loading

bedevere-app bot commented Mar 27, 2025

StanFromIreland left a comment

Choose a reason for hiding this comment

ZeroIntensity left a comment

Choose a reason for hiding this comment

eendebakpt commented Mar 27, 2025

Marius-Juston commented Mar 27, 2025

eendebakpt Mar 27, 2025

Choose a reason for hiding this comment

Marius-Juston commented Mar 28, 2025

picnixz left a comment • edited Loading

Choose a reason for hiding this comment

bedevere-app bot commented Mar 28, 2025

Marius-Juston commented Mar 28, 2025 • edited Loading

picnixz commented Mar 28, 2025 • edited Loading

AA-Turner commented Mar 28, 2025

Marius-Juston commented Mar 28, 2025

eendebakpt commented Mar 28, 2025

Marius-Juston commented Mar 29, 2025 • edited Loading

Marius-Juston commented Mar 30, 2025

picnixz commented Mar 30, 2025 • edited Loading

AA-Turner commented Mar 30, 2025

picnixz commented Mar 30, 2025

Marius-Juston commented Mar 30, 2025

eendebakpt commented Mar 30, 2025

Marius-Juston commented Mar 30, 2025

AA-Turner commented Mar 30, 2025

AA-Turner commented Mar 31, 2025

gh-131791: Improve speed of `textwrap.dedent` by replacing `re` #131792

gh-131791: Improve speed of `textwrap.dedent` by replacing `re` #131792

Marius-Juston commented Mar 27, 2025 •

edited

Loading

ghost commented Mar 27, 2025 •

edited by ghost

Loading

picnixz left a comment •

edited

Loading

Marius-Juston commented Mar 28, 2025 •

edited

Loading

picnixz commented Mar 28, 2025 •

edited

Loading

Marius-Juston commented Mar 29, 2025 •

edited

Loading

picnixz commented Mar 30, 2025 •

edited

Loading