Skip to content

feat!: Add use_wcwidth for Asian character support #63

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Dec 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,15 +165,18 @@ All parameters are optional.

| Option | Type | Default | Description |
| :-----------------: | :-------------------: | :-------------------: | :-------------------------------------------------------------------------------: |
| `header` | `List[Any]` | `None` | First table row seperated by header row separator. Values should support `str()`. |
| `body` | `List[List[Any]]` | `None` | List of rows for the main section of the table. Values should support `str()`. |
| `footer` | `List[Any]` | `None` | Last table row seperated by header row separator. Values should support `str()`. |
| `header` | `List[Any]` | `None` | First table row seperated by header row separator. Values should support `str()` |
| `body` | `List[List[Any]]` | `None` | List of rows for the main section of the table. Values should support `str()` |
| `footer` | `List[Any]` | `None` | Last table row seperated by header row separator. Values should support `str()` |
| `column_widths` | `List[Optional[int]]` | `None` (automatic) | List of column widths in characters for each column |
| `alignments` | `List[Alignment]` | `None` (all centered) | Column alignments<br/>(ex. `[Alignment.LEFT, Alignment.CENTER, Alignment.RIGHT]`) |
| `style` | `TableStyle` | `double_thin_compact` | Table style to use for the table\* |
| `first_col_heading` | `bool` | `False` | Whether to add a heading column separator after the first column |
| `last_col_heading` | `bool` | `False` | Whether to add a heading column separator before the last column |
| `cell_padding` | `int` | `1` | The minimum number of spaces to add between the cell content and the cell border. |
| `cell_padding` | `int` | `1` | The minimum number of spaces to add between the cell content and the cell border |
| `use_wcwidth` | `bool` | `True` | Whether to use [wcwidth][wcwidth] instead of `len()` to calculate cell width |

[wcwidth]: https://pypi.org/project/wcwidth/

\*See a list of all preset styles [here](https://table2ascii.readthedocs.io/en/latest/styles.html).

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,5 +77,6 @@ namespace_packages = true
[[tool.mypy.overrides]]
module = [
"setuptools.*",
"wcwidth"
]
ignore_missing_imports = true
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
typing-extensions>=3.7.4; python_version<'3.8'
typing-extensions>=3.7.4; python_version<'3.8'
wcwidth<1
8 changes: 7 additions & 1 deletion table2ascii/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,17 @@

@dataclass
class Options:
"""Class for storing options that the user sets"""
"""Class for storing options that the user sets

.. versionchanged:: 1.0.0

Added ``use_wcwidth`` option
"""

first_col_heading: bool
last_col_heading: bool
column_widths: list[int | None] | None
alignments: list[Alignment] | None
cell_padding: int
style: TableStyle
use_wcwidth: bool
43 changes: 37 additions & 6 deletions table2ascii/table_to_ascii.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from math import ceil, floor

from wcwidth import wcswidth

from .alignment import Alignment
from .annotations import SupportsStr
from .exceptions import (
Expand Down Expand Up @@ -44,6 +46,7 @@ def __init__(
self.__first_col_heading = options.first_col_heading
self.__last_col_heading = options.last_col_heading
self.__cell_padding = options.cell_padding
self.__use_wcwidth = options.use_wcwidth

# calculate number of columns
self.__columns = self.__count_columns()
Expand Down Expand Up @@ -93,7 +96,7 @@ def __auto_column_widths(self) -> list[int]:
def widest_line(value: SupportsStr) -> int:
"""Returns the width of the longest line in a multi-line string"""
text = str(value)
return max(len(line) for line in text.splitlines()) if len(text) else 0
return max(self.__str_width(line) for line in text.splitlines()) if len(text) else 0

column_widths = []
# get the width necessary for each column
Expand Down Expand Up @@ -145,17 +148,18 @@ def __pad(self, cell_value: SupportsStr, width: int, alignment: Alignment) -> st
text = str(cell_value)
padding = " " * self.__cell_padding
padded_text = f"{padding}{text}{padding}"
text_width = self.__str_width(padded_text)
if alignment == Alignment.LEFT:
# pad with spaces on the end
return padded_text + (" " * (width - len(padded_text)))
return padded_text + (" " * (width - text_width))
if alignment == Alignment.CENTER:
# pad with spaces, half on each side
before = " " * floor((width - len(padded_text)) / 2)
after = " " * ceil((width - len(padded_text)) / 2)
before = " " * floor((width - text_width) / 2)
after = " " * ceil((width - text_width) / 2)
return before + padded_text + after
if alignment == Alignment.RIGHT:
# pad with spaces at the beginning
return (" " * (width - len(padded_text))) + padded_text
return (" " * (width - text_width)) + padded_text
raise InvalidAlignmentError(alignment)

def __row_to_ascii(
Expand Down Expand Up @@ -344,6 +348,23 @@ def __body_to_ascii(self, body: list[list[SupportsStr]]) -> str:
for row in body
)

def __str_width(self, text: str) -> int:
"""
Returns the width of the string in characters for the purposes of monospace
formatting. This is usually the same as the length of the string, but can be
different for double-width characters (East Asian Wide and East Asian Fullwidth)
or zero-width characters (combining characters, zero-width space, etc.)

Args:
text: The text to measure

Returns:
The width of the string in characters
"""
width = wcswidth(text) if self.__use_wcwidth else -1
# if use_wcwidth is False or wcswidth fails, fall back to len
return width if width >= 0 else len(text)

def to_ascii(self) -> str:
"""Generates a formatted ASCII table

Expand Down Expand Up @@ -380,9 +401,13 @@ def table2ascii(
alignments: list[Alignment] | None = None,
cell_padding: int = 1,
style: TableStyle = PresetStyle.double_thin_compact,
use_wcwidth: bool = True,
) -> str:
"""Convert a 2D Python table to ASCII text

.. versionchanged:: 1.0.0
Added the ``use_wcwidth`` parameter defaulting to :py:obj:`True`.

Args:
header: List of column values in the table's header row. All values should be :class:`str`
or support :class:`str` conversion. If not specified, the table will not have a header row.
Expand All @@ -396,7 +421,7 @@ def table2ascii(
Defaults to :py:obj:`False`.
column_widths: List of widths in characters for each column. Any value of :py:obj:`None`
indicates that the column width should be determined automatically. If :py:obj:`None`
is passed instead of a :py:obj:`~typing.List`, all columns will be automatically sized.
is passed instead of a :class:`list`, all columns will be automatically sized.
Defaults to :py:obj:`None`.
alignments: List of alignments for each column
(ex. ``[Alignment.LEFT, Alignment.CENTER, Alignment.RIGHT]``). If not specified or set to
Expand All @@ -406,6 +431,11 @@ def table2ascii(
Defaults to ``1``.
style: Table style to use for styling (preset styles can be imported).
Defaults to :ref:`PresetStyle.double_thin_compact <PresetStyle.double_thin_compact>`.
use_wcwidth: Whether to use :func:`wcwidth.wcswidth` to determine the width of each cell instead of
:func:`len`. The :func:`~wcwidth.wcswidth` function takes into account double-width characters
(East Asian Wide and East Asian Fullwidth) and zero-width characters (combining characters,
zero-width space, etc.), whereas :func:`len` determines the width solely based on the number of
characters in the string. Defaults to :py:obj:`True`.

Returns:
The generated ASCII table
Expand All @@ -421,5 +451,6 @@ def table2ascii(
alignments=alignments,
cell_padding=cell_padding,
style=style,
use_wcwidth=use_wcwidth,
),
).to_ascii()
49 changes: 49 additions & 0 deletions tests/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,3 +247,52 @@ def test_multiline_cells():
"╚═══════════════════════════════════════════╝"
)
assert text == expected


def test_east_asian_wide_characters_and_zero_width_wcwidth():
# using wcwidth.wcswidth() to count the number of characters
text = t2a(
header=["#\u200b", "🦁", "🦡", "🦅", "🐍"],
body=[["💻", "✅", "✅", "❌", "❌"]],
footer=["🥞", "日", "月", "火", "水"],
first_col_heading=True,
)
text2 = t2a(
header=["#\u200b", "🦁", "🦡", "🦅", "🐍"],
body=[["💻", "✅", "✅", "❌", "❌"]],
footer=["🥞", "日", "月", "火", "水"],
first_col_heading=True,
use_wcwidth=True,
)
expected = (
"╔════╦═══════════════════╗\n"
"║ #​ ║ 🦁 🦡 🦅 🐍 ║\n"
"╟────╫───────────────────╢\n"
"║ 💻 ║ ✅ ✅ ❌ ❌ ║\n"
"╟────╫───────────────────╢\n"
"║ 🥞 ║ 日 月 火 水 ║\n"
"╚════╩═══════════════════╝"
)
assert text == expected
assert text2 == expected


def test_east_asian_wide_characters_and_zero_width_no_wcwidth():
# using len() to count the number of characters
text = t2a(
header=["#\u200b", "🦁", "🦡", "🦅", "🐍"],
body=[["💻", "✅", "✅", "❌", "❌"]],
footer=["🥞", "日", "月", "火", "水"],
first_col_heading=True,
use_wcwidth=False,
)
expected = (
"╔════╦═══════════════╗\n"
"║ #​ ║ 🦁 🦡 🦅 🐍 ║\n"
"╟────╫───────────────╢\n"
"║ 💻 ║ ✅ ✅ ❌ ❌ ║\n"
"╟────╫───────────────╢\n"
"║ 🥞 ║ 日 月 火 水 ║\n"
"╚════╩═══════════════╝"
)
assert text == expected