Skip to content

gh-131507: Clean up tests and type checking for _pyrepl #131509

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Mar 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 52 additions & 9 deletions Lib/_colorize.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,64 @@
from __future__ import annotations
import io
import os
import sys

COLORIZE = True

# types
if False:
from typing import IO


class ANSIColors:
BACKGROUND_YELLOW = "\x1b[43m"
BOLD_GREEN = "\x1b[1;32m"
BOLD_MAGENTA = "\x1b[1;35m"
BOLD_RED = "\x1b[1;31m"
RESET = "\x1b[0m"

BLACK = "\x1b[30m"
BLUE = "\x1b[34m"
CYAN = "\x1b[36m"
GREEN = "\x1b[32m"
GREY = "\x1b[90m"
MAGENTA = "\x1b[35m"
RED = "\x1b[31m"
RESET = "\x1b[0m"
WHITE = "\x1b[37m" # more like LIGHT GRAY
YELLOW = "\x1b[33m"

BOLD_BLACK = "\x1b[1;30m" # DARK GRAY
BOLD_BLUE = "\x1b[1;34m"
BOLD_CYAN = "\x1b[1;36m"
BOLD_GREEN = "\x1b[1;32m"
BOLD_MAGENTA = "\x1b[1;35m"
BOLD_RED = "\x1b[1;31m"
BOLD_WHITE = "\x1b[1;37m" # actual WHITE
BOLD_YELLOW = "\x1b[1;33m"

# intense = like bold but without being bold
INTENSE_BLACK = "\x1b[90m"
INTENSE_BLUE = "\x1b[94m"
INTENSE_CYAN = "\x1b[96m"
INTENSE_GREEN = "\x1b[92m"
INTENSE_MAGENTA = "\x1b[95m"
INTENSE_RED = "\x1b[91m"
INTENSE_WHITE = "\x1b[97m"
INTENSE_YELLOW = "\x1b[93m"

BACKGROUND_BLACK = "\x1b[40m"
BACKGROUND_BLUE = "\x1b[44m"
BACKGROUND_CYAN = "\x1b[46m"
BACKGROUND_GREEN = "\x1b[42m"
BACKGROUND_MAGENTA = "\x1b[45m"
BACKGROUND_RED = "\x1b[41m"
BACKGROUND_WHITE = "\x1b[47m"
BACKGROUND_YELLOW = "\x1b[43m"

INTENSE_BACKGROUND_BLACK = "\x1b[100m"
INTENSE_BACKGROUND_BLUE = "\x1b[104m"
INTENSE_BACKGROUND_CYAN = "\x1b[106m"
INTENSE_BACKGROUND_GREEN = "\x1b[102m"
INTENSE_BACKGROUND_MAGENTA = "\x1b[105m"
INTENSE_BACKGROUND_RED = "\x1b[101m"
INTENSE_BACKGROUND_WHITE = "\x1b[107m"
INTENSE_BACKGROUND_YELLOW = "\x1b[103m"


NoColors = ANSIColors()

Expand All @@ -26,14 +67,16 @@ class ANSIColors:
setattr(NoColors, attr, "")


def get_colors(colorize: bool = False, *, file=None) -> ANSIColors:
def get_colors(
colorize: bool = False, *, file: IO[str] | IO[bytes] | None = None
) -> ANSIColors:
if colorize or can_colorize(file=file):
return ANSIColors()
else:
return NoColors


def can_colorize(*, file=None) -> bool:
def can_colorize(*, file: IO[str] | IO[bytes] | None = None) -> bool:
if file is None:
file = sys.stdout

Expand Down Expand Up @@ -66,4 +109,4 @@ def can_colorize(*, file=None) -> bool:
try:
return os.isatty(file.fileno())
except io.UnsupportedOperation:
return file.isatty()
return hasattr(file, "isatty") and file.isatty()
2 changes: 1 addition & 1 deletion Lib/_pyrepl/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ def do(self) -> None:
class show_history(Command):
def do(self) -> None:
from .pager import get_pager
from site import gethistoryfile # type: ignore[attr-defined]
from site import gethistoryfile

history = os.linesep.join(self.reader.history[:])
self.reader.console.restore()
Expand Down
4 changes: 2 additions & 2 deletions Lib/_pyrepl/console.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

from __future__ import annotations

import _colorize # type: ignore[import-not-found]
import _colorize

from abc import ABC, abstractmethod
import ast
Expand Down Expand Up @@ -162,7 +162,7 @@ def __init__(
*,
local_exit: bool = False,
) -> None:
super().__init__(locals=locals, filename=filename, local_exit=local_exit) # type: ignore[call-arg]
super().__init__(locals=locals, filename=filename, local_exit=local_exit)
self.can_colorize = _colorize.can_colorize()

def showsyntaxerror(self, filename=None, **kwargs):
Expand Down
7 changes: 6 additions & 1 deletion Lib/_pyrepl/mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

[mypy]
files = Lib/_pyrepl
mypy_path = $MYPY_CONFIG_FILE_DIR/../../Misc/mypy
explicit_package_bases = True
python_version = 3.12
python_version = 3.13
platform = linux
pretty = True

Expand All @@ -22,3 +23,7 @@ check_untyped_defs = False
# Various internal modules that typeshed deliberately doesn't have stubs for:
[mypy-_abc.*,_opcode.*,_overlapped.*,_testcapi.*,_testinternalcapi.*,test.*]
ignore_missing_imports = True

# Other untyped parts of the stdlib
[mypy-idlelib.*]
ignore_missing_imports = True
47 changes: 10 additions & 37 deletions Lib/_pyrepl/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@
from contextlib import contextmanager
from dataclasses import dataclass, field, fields
import unicodedata
from _colorize import can_colorize, ANSIColors # type: ignore[import-not-found]
from _colorize import can_colorize, ANSIColors


from . import commands, console, input
from .utils import ANSI_ESCAPE_SEQUENCE, wlen, str_width
from .utils import wlen, unbracket, str_width
from .trace import trace


Expand Down Expand Up @@ -421,42 +421,15 @@ def calc_screen(self) -> list[str]:

@staticmethod
def process_prompt(prompt: str) -> tuple[str, int]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I may be missing something but looks like the old function was tracking nested ANSI escape seas outside \x01 and \x02 brackets but this one drops this no? What is the rationale?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are three things the old function was trying to do (badly):

  • ignore anything between \001 and \002 for length calculation; and
  • remove \001 and \002 characters from the emitted prompt; and
  • also ignore any ANSI escape sequences from length calculation.

Since wlen() is already doing 3., a contributor here needed to recreate this function to not do that, because we were doing that later anyway. But it's simpler to just use the main wlen() with a string stripped of \001 .. \002 bracketed content.

I find the new form much easier on the eyes.

"""Process the prompt.
r"""Return a tuple with the prompt string and its visible length.

This means calculate the length of the prompt. The character \x01
and \x02 are used to bracket ANSI control sequences and need to be
excluded from the length calculation. So also a copy of the prompt
is returned with these control characters removed."""

# The logic below also ignores the length of common escape
# sequences if they were not explicitly within \x01...\x02.
# They are CSI (or ANSI) sequences ( ESC [ ... LETTER )

# wlen from utils already excludes ANSI_ESCAPE_SEQUENCE chars,
# which breaks the logic below so we redefine it here.
def wlen(s: str) -> int:
return sum(str_width(i) for i in s)

out_prompt = ""
l = wlen(prompt)
pos = 0
while True:
s = prompt.find("\x01", pos)
if s == -1:
break
e = prompt.find("\x02", s)
if e == -1:
break
# Found start and end brackets, subtract from string length
l = l - (e - s + 1)
keep = prompt[pos:s]
l -= sum(map(wlen, ANSI_ESCAPE_SEQUENCE.findall(keep)))
out_prompt += keep + prompt[s + 1 : e]
pos = e + 1
keep = prompt[pos:]
l -= sum(map(wlen, ANSI_ESCAPE_SEQUENCE.findall(keep)))
out_prompt += keep
return out_prompt, l
The prompt string has the zero-width brackets recognized by shells
(\x01 and \x02) removed. The length ignores anything between those
brackets as well as any ANSI escape sequences.
Comment on lines +426 to +428
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
The prompt string has the zero-width brackets recognized by shells
(\x01 and \x02) removed. The length ignores anything between those
brackets as well as any ANSI escape sequences.
The prompt string has the zero-width brackets (\x01 and \x02)
recognized by shells removed. The length ignores anything between
those brackets as well as any ANSI escape sequences.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this an improvement? There are many kinds of zero-width brackets, the ones I am interested in are those recognized by shells (\x01 and \x02). The original docstring uses that ordering then.

"""
out_prompt = unbracket(prompt, including_content=False)
visible_prompt = unbracket(prompt, including_content=True)
return out_prompt, wlen(visible_prompt)

def bow(self, p: int | None = None) -> int:
"""Return the 0-based index of the word break preceding p most
Expand Down
2 changes: 1 addition & 1 deletion Lib/_pyrepl/readline.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from dataclasses import dataclass, field

import os
from site import gethistoryfile # type: ignore[attr-defined]
from site import gethistoryfile
import sys
from rlcompleter import Completer as RLCompleter

Expand Down
19 changes: 16 additions & 3 deletions Lib/_pyrepl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,36 @@
import functools

ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02")
ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""})


@functools.cache
def str_width(c: str) -> int:
if ord(c) < 128:
return 1
w = unicodedata.east_asian_width(c)
if w in ('N', 'Na', 'H', 'A'):
if w in ("N", "Na", "H", "A"):
return 1
return 2


def wlen(s: str) -> int:
if len(s) == 1 and s != '\x1a':
if len(s) == 1 and s != "\x1a":
return str_width(s)
length = sum(str_width(i) for i in s)
# remove lengths of any escape sequences
sequence = ANSI_ESCAPE_SEQUENCE.findall(s)
ctrl_z_cnt = s.count('\x1a')
ctrl_z_cnt = s.count("\x1a")
return length - sum(len(i) for i in sequence) + ctrl_z_cnt


def unbracket(s: str, including_content: bool = False) -> str:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 I like this approach. I was concerned about the regular expressions performance but couldn't find anything too bad when testing

r"""Return `s` with \001 and \002 characters removed.

If `including_content` is True, content between \001 and \002 is also
stripped.
"""
if including_content:
return ZERO_WIDTH_BRACKET.sub("", s)
return s.translate(ZERO_WIDTH_TRANS)
31 changes: 24 additions & 7 deletions Lib/test/test_pyrepl/support.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,24 @@
from _pyrepl.console import Console, Event
from _pyrepl.readline import ReadlineAlikeReader, ReadlineConfig
from _pyrepl.simple_interact import _strip_final_indent
from _pyrepl.utils import unbracket, ANSI_ESCAPE_SEQUENCE


class ScreenEqualMixin:
def assert_screen_equal(
self, reader: ReadlineAlikeReader, expected: str, clean: bool = False
):
actual = clean_screen(reader) if clean else reader.screen
expected = expected.split("\n")
self.assertListEqual(actual, expected)


def multiline_input(reader: ReadlineAlikeReader, namespace: dict | None = None):
saved = reader.more_lines
try:
reader.more_lines = partial(more_lines, namespace=namespace)
reader.ps1 = reader.ps2 = ">>>"
reader.ps3 = reader.ps4 = "..."
reader.ps1 = reader.ps2 = ">>> "
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the extra space?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because that's how the real prompts are in production. I didn't want the difference in tests since it was confusing for syntax highlighting later.

reader.ps3 = reader.ps4 = "... "
return reader.readline()
finally:
reader.more_lines = saved
Expand All @@ -38,18 +48,22 @@ def code_to_events(code: str):
yield Event(evt="key", data=c, raw=bytearray(c.encode("utf-8")))


def clean_screen(screen: Iterable[str]):
def clean_screen(reader: ReadlineAlikeReader) -> list[str]:
"""Cleans color and console characters out of a screen output.

This is useful for screen testing, it increases the test readability since
it strips out all the unreadable side of the screen.
"""
output = []
for line in screen:
if line.startswith(">>>") or line.startswith("..."):
line = line[3:]
for line in reader.screen:
line = unbracket(line, including_content=True)
line = ANSI_ESCAPE_SEQUENCE.sub("", line)
for prefix in (reader.ps1, reader.ps2, reader.ps3, reader.ps4):
if line.startswith(prefix):
line = line[len(prefix):]
break
output.append(line)
return "\n".join(output).strip()
return output


def prepare_reader(console: Console, **kwargs):
Expand Down Expand Up @@ -99,6 +113,9 @@ def handle_all_events(
prepare_console=partial(prepare_console, width=10),
)

reader_no_colors = partial(prepare_reader, can_colorize=False)
reader_force_colors = partial(prepare_reader, can_colorize=True)


class FakeConsole(Console):
def __init__(self, events, encoding="utf-8") -> None:
Expand Down
Loading
Loading