-
-
Notifications
You must be signed in to change notification settings - Fork 31.7k
gh-131507: Clean up tests and type checking for _pyrepl
#131509
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b83d282
a96eea4
5682219
231855b
3c25c95
99469f3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -26,11 +26,11 @@ | |||||||||||||
from contextlib import contextmanager | ||||||||||||||
from dataclasses import dataclass, field, fields | ||||||||||||||
import unicodedata | ||||||||||||||
from _colorize import can_colorize, ANSIColors # type: ignore[import-not-found] | ||||||||||||||
from _colorize import can_colorize, ANSIColors | ||||||||||||||
|
||||||||||||||
|
||||||||||||||
from . import commands, console, input | ||||||||||||||
from .utils import ANSI_ESCAPE_SEQUENCE, wlen, str_width | ||||||||||||||
from .utils import wlen, unbracket, str_width | ||||||||||||||
from .trace import trace | ||||||||||||||
|
||||||||||||||
|
||||||||||||||
|
@@ -421,42 +421,15 @@ def calc_screen(self) -> list[str]: | |||||||||||||
|
||||||||||||||
@staticmethod | ||||||||||||||
def process_prompt(prompt: str) -> tuple[str, int]: | ||||||||||||||
"""Process the prompt. | ||||||||||||||
r"""Return a tuple with the prompt string and its visible length. | ||||||||||||||
|
||||||||||||||
This means calculate the length of the prompt. The character \x01 | ||||||||||||||
and \x02 are used to bracket ANSI control sequences and need to be | ||||||||||||||
excluded from the length calculation. So also a copy of the prompt | ||||||||||||||
is returned with these control characters removed.""" | ||||||||||||||
|
||||||||||||||
# The logic below also ignores the length of common escape | ||||||||||||||
# sequences if they were not explicitly within \x01...\x02. | ||||||||||||||
# They are CSI (or ANSI) sequences ( ESC [ ... LETTER ) | ||||||||||||||
|
||||||||||||||
# wlen from utils already excludes ANSI_ESCAPE_SEQUENCE chars, | ||||||||||||||
# which breaks the logic below so we redefine it here. | ||||||||||||||
def wlen(s: str) -> int: | ||||||||||||||
return sum(str_width(i) for i in s) | ||||||||||||||
|
||||||||||||||
out_prompt = "" | ||||||||||||||
l = wlen(prompt) | ||||||||||||||
pos = 0 | ||||||||||||||
while True: | ||||||||||||||
s = prompt.find("\x01", pos) | ||||||||||||||
if s == -1: | ||||||||||||||
break | ||||||||||||||
e = prompt.find("\x02", s) | ||||||||||||||
if e == -1: | ||||||||||||||
break | ||||||||||||||
# Found start and end brackets, subtract from string length | ||||||||||||||
l = l - (e - s + 1) | ||||||||||||||
keep = prompt[pos:s] | ||||||||||||||
l -= sum(map(wlen, ANSI_ESCAPE_SEQUENCE.findall(keep))) | ||||||||||||||
out_prompt += keep + prompt[s + 1 : e] | ||||||||||||||
pos = e + 1 | ||||||||||||||
keep = prompt[pos:] | ||||||||||||||
l -= sum(map(wlen, ANSI_ESCAPE_SEQUENCE.findall(keep))) | ||||||||||||||
out_prompt += keep | ||||||||||||||
return out_prompt, l | ||||||||||||||
The prompt string has the zero-width brackets recognized by shells | ||||||||||||||
(\x01 and \x02) removed. The length ignores anything between those | ||||||||||||||
brackets as well as any ANSI escape sequences. | ||||||||||||||
Comment on lines
+426
to
+428
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this an improvement? There are many kinds of zero-width brackets, the ones I am interested in are those recognized by shells (\x01 and \x02). The original docstring uses that ordering then. |
||||||||||||||
""" | ||||||||||||||
out_prompt = unbracket(prompt, including_content=False) | ||||||||||||||
visible_prompt = unbracket(prompt, including_content=True) | ||||||||||||||
return out_prompt, wlen(visible_prompt) | ||||||||||||||
|
||||||||||||||
def bow(self, p: int | None = None) -> int: | ||||||||||||||
"""Return the 0-based index of the word break preceding p most | ||||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,23 +3,36 @@ | |
import functools | ||
|
||
ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]") | ||
ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02") | ||
ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""}) | ||
|
||
|
||
@functools.cache | ||
def str_width(c: str) -> int: | ||
if ord(c) < 128: | ||
return 1 | ||
w = unicodedata.east_asian_width(c) | ||
if w in ('N', 'Na', 'H', 'A'): | ||
if w in ("N", "Na", "H", "A"): | ||
return 1 | ||
return 2 | ||
|
||
|
||
def wlen(s: str) -> int: | ||
if len(s) == 1 and s != '\x1a': | ||
if len(s) == 1 and s != "\x1a": | ||
return str_width(s) | ||
length = sum(str_width(i) for i in s) | ||
# remove lengths of any escape sequences | ||
sequence = ANSI_ESCAPE_SEQUENCE.findall(s) | ||
ctrl_z_cnt = s.count('\x1a') | ||
ctrl_z_cnt = s.count("\x1a") | ||
return length - sum(len(i) for i in sequence) + ctrl_z_cnt | ||
|
||
|
||
def unbracket(s: str, including_content: bool = False) -> str: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 I like this approach. I was concerned about the regular expressions performance but couldn't find anything too bad when testing |
||
r"""Return `s` with \001 and \002 characters removed. | ||
|
||
If `including_content` is True, content between \001 and \002 is also | ||
stripped. | ||
""" | ||
if including_content: | ||
return ZERO_WIDTH_BRACKET.sub("", s) | ||
return s.translate(ZERO_WIDTH_TRANS) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,14 +6,24 @@ | |
from _pyrepl.console import Console, Event | ||
from _pyrepl.readline import ReadlineAlikeReader, ReadlineConfig | ||
from _pyrepl.simple_interact import _strip_final_indent | ||
from _pyrepl.utils import unbracket, ANSI_ESCAPE_SEQUENCE | ||
|
||
|
||
class ScreenEqualMixin: | ||
def assert_screen_equal( | ||
self, reader: ReadlineAlikeReader, expected: str, clean: bool = False | ||
): | ||
actual = clean_screen(reader) if clean else reader.screen | ||
expected = expected.split("\n") | ||
self.assertListEqual(actual, expected) | ||
|
||
|
||
def multiline_input(reader: ReadlineAlikeReader, namespace: dict | None = None): | ||
saved = reader.more_lines | ||
try: | ||
reader.more_lines = partial(more_lines, namespace=namespace) | ||
reader.ps1 = reader.ps2 = ">>>" | ||
reader.ps3 = reader.ps4 = "..." | ||
reader.ps1 = reader.ps2 = ">>> " | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why the extra space? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because that's how the real prompts are in production. I didn't want the difference in tests since it was confusing for syntax highlighting later. |
||
reader.ps3 = reader.ps4 = "... " | ||
return reader.readline() | ||
finally: | ||
reader.more_lines = saved | ||
|
@@ -38,18 +48,22 @@ def code_to_events(code: str): | |
yield Event(evt="key", data=c, raw=bytearray(c.encode("utf-8"))) | ||
|
||
|
||
def clean_screen(screen: Iterable[str]): | ||
def clean_screen(reader: ReadlineAlikeReader) -> list[str]: | ||
"""Cleans color and console characters out of a screen output. | ||
|
||
This is useful for screen testing, it increases the test readability since | ||
it strips out all the unreadable side of the screen. | ||
""" | ||
output = [] | ||
for line in screen: | ||
if line.startswith(">>>") or line.startswith("..."): | ||
line = line[3:] | ||
for line in reader.screen: | ||
line = unbracket(line, including_content=True) | ||
line = ANSI_ESCAPE_SEQUENCE.sub("", line) | ||
for prefix in (reader.ps1, reader.ps2, reader.ps3, reader.ps4): | ||
if line.startswith(prefix): | ||
line = line[len(prefix):] | ||
break | ||
output.append(line) | ||
return "\n".join(output).strip() | ||
return output | ||
|
||
|
||
def prepare_reader(console: Console, **kwargs): | ||
|
@@ -99,6 +113,9 @@ def handle_all_events( | |
prepare_console=partial(prepare_console, width=10), | ||
) | ||
|
||
reader_no_colors = partial(prepare_reader, can_colorize=False) | ||
reader_force_colors = partial(prepare_reader, can_colorize=True) | ||
|
||
|
||
class FakeConsole(Console): | ||
def __init__(self, events, encoding="utf-8") -> None: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I may be missing something but looks like the old function was tracking nested ANSI escape seas outside
\x01
and\x02
brackets but this one drops this no? What is the rationale?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are three things the old function was trying to do (badly):
Since wlen() is already doing 3., a contributor here needed to recreate this function to not do that, because we were doing that later anyway. But it's simpler to just use the main wlen() with a string stripped of \001 .. \002 bracketed content.
I find the new form much easier on the eyes.