Skip to content

Commit 50cebec

Browse files
committed
Improve whitespace encoding in converter module
1 parent ee5bf6d commit 50cebec

File tree

2 files changed

+42
-5
lines changed

2 files changed

+42
-5
lines changed

coloredlogs/converter.py

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,14 +81,15 @@ def capture(command, encoding='UTF-8'):
8181
return u'\n'.join(clean_terminal_output(output))
8282

8383

84-
def convert(text, code=True):
84+
def convert(text, code=True, tabsize=4):
8585
"""
8686
Convert text with ANSI escape sequences to HTML.
8787
8888
:param text: The text with ANSI escape sequences (a string).
8989
:param code: Whether to wrap the returned HTML fragment in a
9090
``<code>...</code>`` element (a boolean, defaults
9191
to :data:`True`).
92+
:param tabsize: Refer to :func:`str.expandtabs()` for details.
9293
:returns: The text converted to HTML (a string).
9394
"""
9495
output = []
@@ -116,31 +117,67 @@ def convert(text, code=True):
116117
token = ''
117118
else:
118119
token = html_encode(token)
119-
token = encode_whitespace(token)
120120
output.append(token)
121121
html = ''.join(output)
122+
html = encode_whitespace(html, tabsize)
122123
if code:
123124
html = '<code>%s</code>' % html
124125
return html
125126

126127

127-
def encode_whitespace(text):
128+
def encode_whitespace(text, tabsize=4):
128129
"""
129130
Encode whitespace so that web browsers properly render it.
130131
131132
:param text: The plain text (a string).
133+
:param tabsize: Refer to :func:`str.expandtabs()` for details.
132134
:returns: The text converted to HTML (a string).
133135
134136
The purpose of this function is to encode whitespace in such a way that web
135137
browsers render the same whitespace regardless of whether 'preformatted'
136138
styling is used (by wrapping the text in a ``<pre>...</pre>`` element).
139+
140+
.. note:: While the string manipulation performed by this function is
141+
specifically intended not to corrupt the HTML generated by
142+
:func:`convert()` it definitely does have the potential to
143+
corrupt HTML from other sources. You have been warned :-).
137144
"""
145+
# Convert Windows line endings (CR+LF) to UNIX line endings (LF).
138146
text = text.replace('\r\n', '\n')
147+
# Convert UNIX line endings (LF) to HTML line endings (<br>).
139148
text = text.replace('\n', '<br>\n')
140-
text = text.replace(' ', '&nbsp;')
149+
# Convert tabs to spaces.
150+
text = text.expandtabs(tabsize)
151+
# Convert leading spaces (that is to say spaces at the start of the string
152+
# and/or directly after a line ending) into non-breaking spaces, otherwise
153+
# HTML rendering engines will simply ignore these spaces.
154+
text = re.sub('^ +', encode_whitespace_cb, text, 0, re.MULTILINE)
155+
# Convert runs of multiple spaces into non-breaking spaces to avoid HTML
156+
# rendering engines from visually collapsing runs of spaces into a single
157+
# space. We specifically don't replace single spaces for several reasons:
158+
# 1. We'd break the HTML emitted by convert() by replacing spaces
159+
# inside HTML elements (for example the spaces that separate
160+
# element names from attribute names).
161+
# 2. If every single space is replaced by a non-breaking space,
162+
# web browsers perform awkwardly unintuitive word wrapping.
163+
# 3. The HTML output would be bloated for no good reason.
164+
text = re.sub(' {2,}', encode_whitespace_cb, text)
141165
return text
142166

143167

168+
def encode_whitespace_cb(match):
169+
"""
170+
Replace runs of multiple spaces with non-breaking spaces.
171+
172+
:param match: A regular expression match object.
173+
:returns: The replacement string.
174+
175+
This function is used by func:`encode_whitespace()` as a callback for
176+
replacement using a regular expression pattern.
177+
"""
178+
return '&nbsp;' * len(match.group(0))
179+
180+
144181
def html_encode(text):
145182
"""
146183
Encode characters with a special meaning as HTML.

coloredlogs/tests.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ def test_html_conversion(self):
343343
assert ansi_encoded_text == 'I like \x1b[1;34mbirds\x1b[0m - www.eelstheband.com'
344344
html_encoded_text = convert(ansi_encoded_text)
345345
assert html_encoded_text == (
346-
'<code>I&nbsp;like&nbsp;<span style="font-weight:bold;color:blue">birds</span>&nbsp;-&nbsp;'
346+
'<code>I like <span style="font-weight:bold;color:blue">birds</span> - '
347347
'<a href="http://www.eelstheband.com" style="color:inherit">www.eelstheband.com</a></code>'
348348
)
349349

0 commit comments

Comments
 (0)