Skip to content

Commit 59b1aca

Browse files
Fix double-width characters disappearing when wrapping (#3180)
* Update docstring for `Text.wrap`s width parameter to indicate that it's referring to the number of *single-width* characters. Also a small addition to the gitignore file. * Working on double width wrapping fixes * Chop cells to fit to width * Fix folding when theres already text on line * Update wrapping logic to fix issues with CJK charcters disappearing when the "fold" location sat *within* a double-width character. Ensure we retain browser logic of: if there is no space on the current line, move to a new line, and if theres not enough space on the entire new line, fold the text over multiple lines at appropriate locations. * Remove old TODO comments * Add regression test note * Rename function to avoid breaking change * Update CHANGELOG * Remove old comment that is no longer relevant * Cover off some wrapping edge cases * Adding docstrings to tests explaining their purpose * Renaming a local, function scope function alias * Update rich/_wrap.py Co-authored-by: Rodrigo Girão Serrão <[email protected]> * PR feedback * Testing wrapping with trailing and leading whitespace * Improve docstring wording --------- Co-authored-by: Rodrigo Girão Serrão <[email protected]>
1 parent b32e42b commit 59b1aca

File tree

6 files changed

+177
-38
lines changed

6 files changed

+177
-38
lines changed

Diff for: .gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -117,3 +117,5 @@ venv.bak/
117117
# airspeed velocity
118118
benchmarks/env/
119119
benchmarks/html/
120+
121+
sandbox/

Diff for: CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1313

1414
### Fixed
1515

16+
- Some text goes missing during wrapping when it contains double width characters https://github.com/Textualize/rich/issues/3176
1617
- Ensure font is correctly inherited in exported HTML https://github.com/Textualize/rich/issues/3104
1718
- Fixed typing for `FloatPrompt`.
1819

Diff for: rich/_wrap.py

+55-18
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,19 @@
1+
from __future__ import annotations
2+
13
import re
2-
from typing import Iterable, List, Tuple
4+
from typing import Iterable
35

46
from ._loop import loop_last
57
from .cells import cell_len, chop_cells
68

79
re_word = re.compile(r"\s*\S+\s*")
810

911

10-
def words(text: str) -> Iterable[Tuple[int, int, str]]:
12+
def words(text: str) -> Iterable[tuple[int, int, str]]:
13+
"""Yields each word from the text as a tuple
14+
containing (start_index, end_index, word). A "word" in this context may
15+
include the actual word and any whitespace to the right.
16+
"""
1117
position = 0
1218
word_match = re_word.match(text, position)
1319
while word_match is not None:
@@ -17,40 +23,71 @@ def words(text: str) -> Iterable[Tuple[int, int, str]]:
1723
word_match = re_word.match(text, end)
1824

1925

20-
def divide_line(text: str, width: int, fold: bool = True) -> List[int]:
21-
divides: List[int] = []
22-
append = divides.append
23-
line_position = 0
26+
def divide_line(text: str, width: int, fold: bool = True) -> list[int]:
27+
"""Given a string of text, and a width (measured in cells), return a list
28+
of cell offsets which the string should be split at in order for it to fit
29+
within the given width.
30+
31+
Args:
32+
text: The text to examine.
33+
width: The available cell width.
34+
fold: If True, words longer than `width` will be folded onto a new line.
35+
36+
Returns:
37+
A list of indices to break the line at.
38+
"""
39+
break_positions: list[int] = [] # offsets to insert the breaks at
40+
append = break_positions.append
41+
cell_offset = 0
2442
_cell_len = cell_len
43+
2544
for start, _end, word in words(text):
2645
word_length = _cell_len(word.rstrip())
27-
if line_position + word_length > width:
46+
remaining_space = width - cell_offset
47+
word_fits_remaining_space = remaining_space >= word_length
48+
49+
if word_fits_remaining_space:
50+
# Simplest case - the word fits within the remaining width for this line.
51+
cell_offset += _cell_len(word)
52+
else:
53+
# Not enough space remaining for this word on the current line.
2854
if word_length > width:
55+
# The word doesn't fit on any line, so we can't simply
56+
# place it on the next line...
2957
if fold:
30-
chopped_words = chop_cells(word, max_size=width, position=0)
31-
for last, line in loop_last(chopped_words):
58+
# Fold the word across multiple lines.
59+
folded_word = chop_cells(word, width=width)
60+
for last, line in loop_last(folded_word):
3261
if start:
3362
append(start)
34-
3563
if last:
36-
line_position = _cell_len(line)
64+
cell_offset = _cell_len(line)
3765
else:
3866
start += len(line)
3967
else:
68+
# Folding isn't allowed, so crop the word.
4069
if start:
4170
append(start)
42-
line_position = _cell_len(word)
43-
elif line_position and start:
71+
cell_offset = _cell_len(word)
72+
elif cell_offset and start:
73+
# The word doesn't fit within the remaining space on the current
74+
# line, but it *can* fit on to the next (empty) line.
4475
append(start)
45-
line_position = _cell_len(word)
46-
else:
47-
line_position += _cell_len(word)
48-
return divides
76+
cell_offset = _cell_len(word)
77+
78+
return break_positions
4979

5080

5181
if __name__ == "__main__": # pragma: no cover
5282
from .console import Console
5383

5484
console = Console(width=10)
5585
console.print("12345 abcdefghijklmnopqrstuvwyxzABCDEFGHIJKLMNOPQRSTUVWXYZ 12345")
56-
print(chop_cells("abcdefghijklmnopqrstuvwxyz", 10, position=2))
86+
print(chop_cells("abcdefghijklmnopqrstuvwxyz", 10))
87+
88+
console = Console(width=20)
89+
console.rule()
90+
console.print("TextualはPythonの高速アプリケーション開発フレームワークです")
91+
92+
console.rule()
93+
console.print("アプリケーションは1670万色を使用でき")

Diff for: rich/cells.py

+34-20
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1+
from __future__ import annotations
2+
13
import re
24
from functools import lru_cache
3-
from typing import Callable, List
5+
from typing import Callable
46

57
from ._cell_widths import CELL_WIDTHS
68

@@ -119,27 +121,39 @@ def set_cell_size(text: str, total: int) -> str:
119121
start = pos
120122

121123

122-
# TODO: This is inefficient
123-
# TODO: This might not work with CWJ type characters
124-
def chop_cells(text: str, max_size: int, position: int = 0) -> List[str]:
125-
"""Break text in to equal (cell) length strings, returning the characters in reverse
126-
order"""
124+
def chop_cells(
125+
text: str,
126+
width: int,
127+
) -> list[str]:
128+
"""Split text into lines such that each line fits within the available (cell) width.
129+
130+
Args:
131+
text: The text to fold such that it fits in the given width.
132+
width: The width available (number of cells).
133+
134+
Returns:
135+
A list of strings such that each string in the list has cell width
136+
less than or equal to the available width.
137+
"""
127138
_get_character_cell_size = get_character_cell_size
128-
characters = [
129-
(character, _get_character_cell_size(character)) for character in text
130-
]
131-
total_size = position
132-
lines: List[List[str]] = [[]]
133-
append = lines[-1].append
134-
135-
for character, size in reversed(characters):
136-
if total_size + size > max_size:
137-
lines.append([character])
138-
append = lines[-1].append
139-
total_size = size
139+
lines: list[list[str]] = [[]]
140+
141+
append_new_line = lines.append
142+
append_to_last_line = lines[-1].append
143+
144+
total_width = 0
145+
146+
for character in text:
147+
cell_width = _get_character_cell_size(character)
148+
char_doesnt_fit = total_width + cell_width > width
149+
150+
if char_doesnt_fit:
151+
append_new_line([character])
152+
append_to_last_line = lines[-1].append
153+
total_width = cell_width
140154
else:
141-
total_size += size
142-
append(character)
155+
append_to_last_line(character)
156+
total_width += cell_width
143157

144158
return ["".join(line) for line in lines]
145159

Diff for: tests/test_cells.py

+19
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from rich import cells
2+
from rich.cells import chop_cells
23

34

45
def test_cell_len_long_string():
@@ -40,3 +41,21 @@ def test_set_cell_size_infinite():
4041
)
4142
== size
4243
)
44+
45+
46+
def test_chop_cells():
47+
"""Simple example of splitting cells into lines of width 3."""
48+
text = "abcdefghijk"
49+
assert chop_cells(text, 3) == ["abc", "def", "ghi", "jk"]
50+
51+
52+
def test_chop_cells_double_width_boundary():
53+
"""The available width lies within a double-width character."""
54+
text = "ありがとう"
55+
assert chop_cells(text, 3) == ["あ", "り", "が", "と", "う"]
56+
57+
58+
def test_chop_cells_mixed_width():
59+
"""Mixed single and double-width characters."""
60+
text = "あ1り234が5と6う78"
61+
assert chop_cells(text, 3) == ["あ1", "り2", "34", "が5", "と6", "う7", "8"]

Diff for: tests/test_text.py

+66
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,20 @@ def test_wrap_cjk_width_mid_character():
449449
]
450450

451451

452+
def test_wrap_cjk_mixed():
453+
"""Regression test covering https://github.com/Textualize/rich/issues/3176 and
454+
https://github.com/Textualize/textual/issues/3567 - double width characters could
455+
result in text going missing when wrapping."""
456+
text = Text("123ありがとうございました")
457+
console = Console(width=20) # let's ensure the width passed to wrap() wins.
458+
459+
wrapped_lines = text.wrap(console, width=8)
460+
with console.capture() as capture:
461+
console.print(wrapped_lines)
462+
463+
assert capture.get() == "123あり\nがとうご\nざいまし\n\n"
464+
465+
452466
def test_wrap_long():
453467
text = Text("abracadabra", justify="left")
454468
lines = text.wrap(Console(), 4)
@@ -497,6 +511,47 @@ def test_wrap_long_words_2():
497511
]
498512

499513

514+
def test_wrap_long_words_followed_by_other_words():
515+
"""After folding a word across multiple lines, we should continue from
516+
the next word immediately after the folded word (don't take a newline
517+
following completion of the folded word)."""
518+
text = Text("123 12345678 123 123")
519+
lines = text.wrap(Console(), 6)
520+
assert lines._lines == [
521+
Text("123 "),
522+
Text("123456"),
523+
Text("78 123"),
524+
Text("123"),
525+
]
526+
527+
528+
def test_wrap_long_word_preceeded_by_word_of_full_line_length():
529+
"""The width of the first word is the same as the available width.
530+
Ensures that folding works correctly when there's no space available
531+
on the current line."""
532+
text = Text("123456 12345678 123 123")
533+
lines = text.wrap(Console(), 6)
534+
assert lines._lines == [
535+
Text("123456"),
536+
Text("123456"),
537+
Text("78 123"),
538+
Text("123"),
539+
]
540+
541+
542+
def test_wrap_multiple_consecutive_spaces():
543+
"""Adding multiple consecutive spaces at the end of a line does not impact
544+
the location at which a break will be added during the process of wrapping."""
545+
text = Text("123456 12345678 123 123")
546+
lines = text.wrap(Console(), 6)
547+
assert lines._lines == [
548+
Text("123456"),
549+
Text("123456"),
550+
Text("78 123"),
551+
Text("123"),
552+
]
553+
554+
500555
def test_wrap_long_words_justify_left():
501556
text = Text("X 123456789", justify="left")
502557
lines = text.wrap(Console(), 4)
@@ -508,6 +563,17 @@ def test_wrap_long_words_justify_left():
508563
assert lines[3] == Text("9 ")
509564

510565

566+
def test_wrap_leading_and_trailing_whitespace():
567+
text = Text(" 123 456 789 ")
568+
lines = text.wrap(Console(), 4)
569+
assert lines._lines == [
570+
Text(" 1"),
571+
Text("23 "),
572+
Text("456 "),
573+
Text("789 "),
574+
]
575+
576+
511577
def test_no_wrap_no_crop():
512578
text = Text("Hello World!" * 3)
513579

0 commit comments

Comments
 (0)