Skip to content

Commit 8a2b049

Browse files
authored
Merge pull request #2785 from zerline/HTMLSanitizer
HTML sanitizer for descriptions.
2 parents 2de28de + df33205 commit 8a2b049

16 files changed

+849
-19
lines changed

ipywidgets/widgets/widget_bool.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,13 @@ class ToggleButton(_Bool):
5353
value : {True,False}
5454
value of the toggle button: True-pressed, False-unpressed
5555
description : str
56-
description displayed next to the button
56+
description displayed on the button
5757
icon: str
5858
font-awesome icon name
59+
style: instance of DescriptionStyle
60+
styling customizations
61+
button_style: enum
62+
button predefined styling
5963
"""
6064
_view_name = Unicode('ToggleButtonView').tag(sync=True)
6165
_model_name = Unicode('ToggleButtonModel').tag(sync=True)

ipywidgets/widgets/widget_button.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ class Button(DOMWidget, CoreWidget):
3535
Parameters
3636
----------
3737
description: str
38-
description displayed next to the button
38+
description displayed on the button
3939
icon: str
4040
font-awesome icon names, without the 'fa-' prefix
4141
disabled: bool

ipywidgets/widgets/widget_description.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
"""Contains the DOMWidget class"""
55

6-
from traitlets import Unicode
6+
from traitlets import Bool, Unicode
77
from .widget import Widget, widget_serialization, register
88
from .trait_types import InstanceDict
99
from .widget_style import Style
@@ -21,6 +21,7 @@ class DescriptionWidget(DOMWidget, CoreWidget):
2121
"""Widget that has a description label to the side."""
2222
_model_name = Unicode('DescriptionModel').tag(sync=True)
2323
description = Unicode('', help="Description of the control.").tag(sync=True)
24+
description_allow_html = Bool(False, help="Accept HTML in the description.").tag(sync=True)
2425
style = InstanceDict(DescriptionStyle, help="Styling customizations").tag(sync=True, **widget_serialization)
2526

2627
def _repr_keys(self):

packages/base-manager/package.json

+3-1
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,16 @@
3535
"@jupyter-widgets/base": "^5.0.0-alpha.3",
3636
"@jupyterlab/services": "^6.0.0",
3737
"@lumino/coreutils": "^1.4.2",
38-
"base64-js": "^1.2.1"
38+
"base64-js": "^1.2.1",
39+
"sanitize-html": "^1.20"
3940
},
4041
"devDependencies": {
4142
"@types/base64-js": "^1.2.5",
4243
"@types/chai": "^4.1.7",
4344
"@types/chai-as-promised": "^7.1.0",
4445
"@types/expect.js": "^0.3.29",
4546
"@types/mocha": "^8.2.2",
47+
"@types/sanitize-html": "^1.20",
4648
"@types/sinon": "^10.0.2",
4749
"@types/sinon-chai": "^3.2.2",
4850
"chai": "^4.0.0",

packages/base-manager/src/latex.ts

+193
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
/*-----------------------------------------------------------------------------
2+
| Copyright (c) Jupyter Development Team.
3+
| Distributed under the terms of the Modified BSD License.
4+
|----------------------------------------------------------------------------*/
5+
// Some magic for deferring mathematical expressions to MathJax
6+
// by hiding them from the Markdown parser.
7+
// Some of the code here is adapted with permission from Davide Cervone
8+
// under the terms of the Apache2 license governing the MathJax project.
9+
// Other minor modifications are also due to StackExchange and are used with
10+
// permission.
11+
12+
const inline = '$'; // the inline math delimiter
13+
14+
// MATHSPLIT contains the pattern for math delimiters and special symbols
15+
// needed for searching for math in the text input.
16+
const MATHSPLIT =
17+
/(\$\$?|\\(?:begin|end)\{[a-z]*\*?\}|\\[{}$]|[{}]|(?:\n\s*)+|@@\d+@@|\\\\(?:\(|\)|\[|\]))/i;
18+
19+
/**
20+
* Break up the text into its component parts and search
21+
* through them for math delimiters, braces, linebreaks, etc.
22+
* Math delimiters must match and braces must balance.
23+
* Don't allow math to pass through a double linebreak
24+
* (which will be a paragraph).
25+
*/
26+
export function removeMath(text: string): { text: string; math: string[] } {
27+
const math: string[] = []; // stores math strings for later
28+
let start: number | null = null;
29+
let end: string | null = null;
30+
let last: number | null = null;
31+
let braces = 0;
32+
let deTilde: (text: string) => string;
33+
34+
// Except for extreme edge cases, this should catch precisely those pieces of the markdown
35+
// source that will later be turned into code spans. While MathJax will not TeXify code spans,
36+
// we still have to consider them at this point; the following issue has happened several times:
37+
//
38+
// `$foo` and `$bar` are variables. --> <code>$foo ` and `$bar</code> are variables.
39+
const hasCodeSpans = /`/.test(text);
40+
if (hasCodeSpans) {
41+
text = text
42+
.replace(/~/g, '~T')
43+
.replace(/(^|[^\\])(`+)([^\n]*?[^`\n])\2(?!`)/gm, (wholematch) =>
44+
wholematch.replace(/\$/g, '~D')
45+
);
46+
deTilde = (text: string) => {
47+
return text.replace(/~([TD])/g, (wholematch, character) =>
48+
character === 'T' ? '~' : inline
49+
);
50+
};
51+
} else {
52+
deTilde = (text: string) => {
53+
return text;
54+
};
55+
}
56+
57+
let blocks = text.replace(/\r\n?/g, '\n').split(MATHSPLIT);
58+
59+
for (let i = 1, m = blocks.length; i < m; i += 2) {
60+
const block = blocks[i];
61+
if (block.charAt(0) === '@') {
62+
//
63+
// Things that look like our math markers will get
64+
// stored and then retrieved along with the math.
65+
//
66+
blocks[i] = '@@' + math.length + '@@';
67+
math.push(block);
68+
} else if (start !== null) {
69+
//
70+
// If we are in math, look for the end delimiter,
71+
// but don't go past double line breaks, and
72+
// and balance braces within the math.
73+
//
74+
if (block === end) {
75+
if (braces) {
76+
last = i;
77+
} else {
78+
blocks = processMath(start, i, deTilde, math, blocks);
79+
start = null;
80+
end = null;
81+
last = null;
82+
}
83+
} else if (block.match(/\n.*\n/)) {
84+
if (last !== null) {
85+
i = last;
86+
blocks = processMath(start, i, deTilde, math, blocks);
87+
}
88+
start = null;
89+
end = null;
90+
last = null;
91+
braces = 0;
92+
} else if (block === '{') {
93+
braces++;
94+
} else if (block === '}' && braces) {
95+
braces--;
96+
}
97+
} else {
98+
//
99+
// Look for math start delimiters and when
100+
// found, set up the end delimiter.
101+
//
102+
if (block === inline || block === '$$') {
103+
start = i;
104+
end = block;
105+
braces = 0;
106+
} else if (block === '\\\\(' || block === '\\\\[') {
107+
start = i;
108+
end = block.slice(-1) === '(' ? '\\\\)' : '\\\\]';
109+
braces = 0;
110+
} else if (block.substr(1, 5) === 'begin') {
111+
start = i;
112+
end = '\\end' + block.substr(6);
113+
braces = 0;
114+
}
115+
}
116+
}
117+
if (start !== null && last !== null) {
118+
blocks = processMath(start, last, deTilde, math, blocks);
119+
start = null;
120+
end = null;
121+
last = null;
122+
}
123+
return { text: deTilde(blocks.join('')), math };
124+
}
125+
126+
/**
127+
* Put back the math strings that were saved,
128+
* and clear the math array (no need to keep it around).
129+
*/
130+
export function replaceMath(text: string, math: string[]): string {
131+
/**
132+
* Replace a math placeholder with its corresponding group.
133+
* The math delimiters "\\(", "\\[", "\\)" and "\\]" are replaced
134+
* removing one backslash in order to be interpreted correctly by MathJax.
135+
*/
136+
const process = (match: string, n: number): string => {
137+
let group = math[n];
138+
if (
139+
group.substr(0, 3) === '\\\\(' &&
140+
group.substr(group.length - 3) === '\\\\)'
141+
) {
142+
group = '\\(' + group.substring(3, group.length - 3) + '\\)';
143+
} else if (
144+
group.substr(0, 3) === '\\\\[' &&
145+
group.substr(group.length - 3) === '\\\\]'
146+
) {
147+
group = '\\[' + group.substring(3, group.length - 3) + '\\]';
148+
}
149+
return group;
150+
};
151+
// Replace all the math group placeholders in the text
152+
// with the saved strings.
153+
return text.replace(/@@(\d+)@@/g, process);
154+
}
155+
156+
/**
157+
* Process math blocks.
158+
*
159+
* The math is in blocks i through j, so
160+
* collect it into one block and clear the others.
161+
* Replace &, <, and > by named entities.
162+
* For IE, put <br> at the ends of comments since IE removes \n.
163+
* Clear the current math positions and store the index of the
164+
* math, then push the math string onto the storage array.
165+
* The preProcess function is called on all blocks if it has been passed in
166+
*/
167+
function processMath(
168+
i: number,
169+
j: number,
170+
preProcess: (input: string) => string,
171+
math: string[],
172+
blocks: string[]
173+
): string[] {
174+
let block = blocks
175+
.slice(i, j + 1)
176+
.join('')
177+
.replace(/&/g, '&amp;') // use HTML entity for &
178+
.replace(/</g, '&lt;') // use HTML entity for <
179+
.replace(/>/g, '&gt;'); // use HTML entity for >
180+
if (navigator && navigator.appName === 'Microsoft Internet Explorer') {
181+
block = block.replace(/(%[^\n]*)\n/g, '$1<br/>\n');
182+
}
183+
while (j > i) {
184+
blocks[j] = '';
185+
j--;
186+
}
187+
blocks[i] = '@@' + math.length + '@@'; // replace the current block text with a unique tag to find later
188+
if (preProcess) {
189+
block = preProcess(block);
190+
}
191+
math.push(block);
192+
return blocks;
193+
}

packages/base-manager/src/manager-base.ts

+39
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,41 @@ import {
2626
} from '@jupyter-widgets/base';
2727

2828
import { base64ToBuffer, bufferToBase64, hexToBuffer } from './utils';
29+
import { removeMath, replaceMath } from './latex';
30+
import sanitize from 'sanitize-html';
2931

3032
const PROTOCOL_MAJOR_VERSION = PROTOCOL_VERSION.split('.', 1)[0];
3133

34+
/**
35+
* Sanitize HTML-formatted descriptions.
36+
*/
37+
function default_inline_sanitize(s: string): string {
38+
const allowedTags = [
39+
'a',
40+
'abbr',
41+
'b',
42+
'code',
43+
'em',
44+
'i',
45+
'img',
46+
'li',
47+
'ol',
48+
'span',
49+
'strong',
50+
'ul',
51+
];
52+
const allowedAttributes = {
53+
'*': ['aria-*', 'style', 'title'],
54+
a: ['href'],
55+
img: ['src'],
56+
style: ['media', 'type'],
57+
};
58+
return sanitize(s, {
59+
allowedTags: allowedTags,
60+
allowedAttributes: allowedAttributes,
61+
});
62+
}
63+
3264
export interface IState extends PartialJSONObject {
3365
buffers?: IBase64Buffers[];
3466
model_name: string;
@@ -467,6 +499,13 @@ export abstract class ManagerBase implements IWidgetManager {
467499
return Promise.resolve(url);
468500
}
469501

502+
inline_sanitize(source: string): string {
503+
const parts = removeMath(source);
504+
// Sanitize tags for inline output.
505+
const sanitized = default_inline_sanitize(parts['text']);
506+
return replaceMath(sanitized, parts['math']);
507+
}
508+
470509
/**
471510
* The comm target name to register
472511
*/

packages/base/src/manager.ts

+2
Original file line numberDiff line numberDiff line change
@@ -190,4 +190,6 @@ export interface IWidgetManager {
190190
* The default implementation just returns the original url.
191191
*/
192192
resolveUrl(url: string): Promise<string>;
193+
194+
inline_sanitize(s: string): string;
193195
}

packages/base/test/src/dummy-manager.ts

+4
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,10 @@ export class DummyManager implements widgets.IWidgetManager {
316316
return Promise.resolve(url);
317317
}
318318

319+
inline_sanitize(s: string): string {
320+
return s;
321+
}
322+
319323
/**
320324
* Dictionary of model ids and model instance promises
321325
*/

packages/controls/src/widget_bool.ts

+6-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,12 @@ export class CheckboxView extends DescriptionView {
7979
return;
8080
}
8181
const description = this.model.get('description');
82-
this.descriptionSpan.innerHTML = description;
82+
if (this.model.get('description_allow_html')) {
83+
this.descriptionSpan.innerHTML =
84+
this.model.widget_manager.inline_sanitize(description);
85+
} else {
86+
this.descriptionSpan.textContent = description;
87+
}
8388
this.typeset(this.descriptionSpan);
8489
this.descriptionSpan.title = description;
8590
this.checkbox.title = description;

packages/controls/src/widget_description.ts

+12-1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ export class DescriptionModel extends DOMWidgetModel {
4141
_view_module_version: JUPYTER_CONTROLS_VERSION,
4242
_model_module_version: JUPYTER_CONTROLS_VERSION,
4343
description: '',
44+
description_allow_html: false,
4445
};
4546
}
4647
}
@@ -53,6 +54,11 @@ export class DescriptionView extends DOMWidgetView {
5354
this.label.style.display = 'none';
5455

5556
this.listenTo(this.model, 'change:description', this.updateDescription);
57+
this.listenTo(
58+
this.model,
59+
'change:description_allow_html',
60+
this.updateDescription
61+
);
5662
this.listenTo(this.model, 'change:tabbable', this.updateTabindex);
5763
this.updateDescription();
5864
this.updateTabindex();
@@ -68,7 +74,12 @@ export class DescriptionView extends DOMWidgetView {
6874
if (description.length === 0) {
6975
this.label.style.display = 'none';
7076
} else {
71-
this.label.innerHTML = description;
77+
if (this.model.get('description_allow_html')) {
78+
this.label.innerHTML =
79+
this.model.widget_manager.inline_sanitize(description);
80+
} else {
81+
this.label.textContent = description;
82+
}
7283
this.typeset(this.label);
7384
this.label.style.display = '';
7485
}

packages/html-manager/package.json

+1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
"devDependencies": {
5353
"@types/mocha": "^8.2.2",
5454
"@types/node": "^15.12.2",
55+
"@types/sanitize-html": "^1.20",
5556
"chai": "^4.0.0",
5657
"css-loader": "^5.2.6",
5758
"file-loader": "^6.2.0",

0 commit comments

Comments
 (0)