@@ -81,14 +81,15 @@ def capture(command, encoding='UTF-8'):
81
81
return u'\n ' .join (clean_terminal_output (output ))
82
82
83
83
84
- def convert (text , code = True ):
84
+ def convert (text , code = True , tabsize = 4 ):
85
85
"""
86
86
Convert text with ANSI escape sequences to HTML.
87
87
88
88
:param text: The text with ANSI escape sequences (a string).
89
89
:param code: Whether to wrap the returned HTML fragment in a
90
90
``<code>...</code>`` element (a boolean, defaults
91
91
to :data:`True`).
92
+ :param tabsize: Refer to :func:`str.expandtabs()` for details.
92
93
:returns: The text converted to HTML (a string).
93
94
"""
94
95
output = []
@@ -116,31 +117,67 @@ def convert(text, code=True):
116
117
token = ''
117
118
else :
118
119
token = html_encode (token )
119
- token = encode_whitespace (token )
120
120
output .append (token )
121
121
html = '' .join (output )
122
+ html = encode_whitespace (html , tabsize )
122
123
if code :
123
124
html = '<code>%s</code>' % html
124
125
return html
125
126
126
127
127
- def encode_whitespace (text ):
128
+ def encode_whitespace (text , tabsize = 4 ):
128
129
"""
129
130
Encode whitespace so that web browsers properly render it.
130
131
131
132
:param text: The plain text (a string).
133
+ :param tabsize: Refer to :func:`str.expandtabs()` for details.
132
134
:returns: The text converted to HTML (a string).
133
135
134
136
The purpose of this function is to encode whitespace in such a way that web
135
137
browsers render the same whitespace regardless of whether 'preformatted'
136
138
styling is used (by wrapping the text in a ``<pre>...</pre>`` element).
139
+
140
+ .. note:: While the string manipulation performed by this function is
141
+ specifically intended not to corrupt the HTML generated by
142
+ :func:`convert()` it definitely does have the potential to
143
+ corrupt HTML from other sources. You have been warned :-).
137
144
"""
145
+ # Convert Windows line endings (CR+LF) to UNIX line endings (LF).
138
146
text = text .replace ('\r \n ' , '\n ' )
147
+ # Convert UNIX line endings (LF) to HTML line endings (<br>).
139
148
text = text .replace ('\n ' , '<br>\n ' )
140
- text = text .replace (' ' , ' ' )
149
+ # Convert tabs to spaces.
150
+ text = text .expandtabs (tabsize )
151
+ # Convert leading spaces (that is to say spaces at the start of the string
152
+ # and/or directly after a line ending) into non-breaking spaces, otherwise
153
+ # HTML rendering engines will simply ignore these spaces.
154
+ text = re .sub ('^ +' , encode_whitespace_cb , text , 0 , re .MULTILINE )
155
+ # Convert runs of multiple spaces into non-breaking spaces to avoid HTML
156
+ # rendering engines from visually collapsing runs of spaces into a single
157
+ # space. We specifically don't replace single spaces for several reasons:
158
+ # 1. We'd break the HTML emitted by convert() by replacing spaces
159
+ # inside HTML elements (for example the spaces that separate
160
+ # element names from attribute names).
161
+ # 2. If every single space is replaced by a non-breaking space,
162
+ # web browsers perform awkwardly unintuitive word wrapping.
163
+ # 3. The HTML output would be bloated for no good reason.
164
+ text = re .sub (' {2,}' , encode_whitespace_cb , text )
141
165
return text
142
166
143
167
168
+ def encode_whitespace_cb (match ):
169
+ """
170
+ Replace runs of multiple spaces with non-breaking spaces.
171
+
172
+ :param match: A regular expression match object.
173
+ :returns: The replacement string.
174
+
175
+ This function is used by func:`encode_whitespace()` as a callback for
176
+ replacement using a regular expression pattern.
177
+ """
178
+ return ' ' * len (match .group (0 ))
179
+
180
+
144
181
def html_encode (text ):
145
182
"""
146
183
Encode characters with a special meaning as HTML.
0 commit comments