Skip to content

Commit b206470

Browse files
martinRenouSylvainCorlay
authored andcommitted
GHSL-2021-1017, GHSL-2021-1020, GHSL-2021-1021
1 parent a03cbb8 commit b206470

File tree

7 files changed

+155
-12
lines changed

7 files changed

+155
-12
lines changed

nbconvert/exporters/html.py

+10
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import base64
77
import json
8+
from lxml.html.clean import clean_html
89
import mimetypes
910
import os
1011
from pathlib import Path
@@ -149,6 +150,14 @@ def _template_name_default(self):
149150
help="Template specific theme(e.g. the name of a JupyterLab CSS theme distributed as prebuilt extension for the lab template)",
150151
).tag(config=True)
151152

153+
sanitize_html = Bool(
154+
False,
155+
help=(
156+
"Whether the HTML in Markdown cells and cell outputs should be sanitized."
157+
"This should be set to True by nbviewer or similar tools."
158+
),
159+
).tag(config=True)
160+
152161
embed_images = Bool(
153162
False, help="Whether or not to embed images as base64 in markdown cells."
154163
).tag(config=True)
@@ -287,4 +296,5 @@ def resources_include_url(name):
287296
resources["jupyter_widgets_base_url"] = self.jupyter_widgets_base_url
288297
resources["widget_renderer_url"] = self.widget_renderer_url
289298
resources["html_manager_semver_range"] = self.html_manager_semver_range
299+
resources["should_sanitize_html"] = self.sanitize_html
290300
return resources

nbconvert/exporters/templateexporter.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,9 @@
7070
"get_metadata": filters.get_metadata,
7171
"convert_pandoc": filters.convert_pandoc,
7272
"json_dumps": json.dumps,
73-
# browsers will parse </script>, closing a script tag early
74-
# Since JSON allows escaping forward slash, this will still be parsed by JSON
75-
"escape_html_script": lambda x: x.replace("</script>", "<\\/script>"),
73+
# For removing any HTML
7674
"escape_html": html.escape,
75+
# For sanitizing HTML for any XSS
7776
"clean_html": clean_html,
7877
"strip_trailing_newline": filters.strip_trailing_newline,
7978
"text_base64": filters.text_base64,

nbconvert/exporters/tests/files/notebook_inject.ipynb

+63
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,69 @@
191191
}
192192
],
193193
"source": [""]
194+
},
195+
{
196+
"cell_type": "code",
197+
"execution_count": 5,
198+
"id": "2616e107",
199+
"metadata": {},
200+
"outputs": [
201+
{
202+
"data": {
203+
"text/html": [
204+
"<script>alert('text/html output')</script>"
205+
]
206+
},
207+
"execution_count": 5,
208+
"metadata": {},
209+
"output_type": "execute_result"
210+
}
211+
],
212+
"source": [
213+
"import os; os.system('touch /tmp/pwned')"
214+
]
215+
},
216+
{
217+
"cell_type": "code",
218+
"execution_count": 5,
219+
"id": "3616e107",
220+
"metadata": {},
221+
"outputs": [
222+
{
223+
"data": {
224+
"text/markdown": [
225+
"<script>alert('text/markdown output')</script>"
226+
]
227+
},
228+
"execution_count": 5,
229+
"metadata": {},
230+
"output_type": "execute_result"
231+
}
232+
],
233+
"source": [
234+
"import os; os.system('touch /tmp/pwned')"
235+
]
236+
},
237+
{
238+
"cell_type": "code",
239+
"execution_count": 5,
240+
"id": "4616e107",
241+
"metadata": {},
242+
"outputs": [
243+
{
244+
"data": {
245+
"application/javascript": [
246+
"alert('application/javascript output')"
247+
]
248+
},
249+
"execution_count": 5,
250+
"metadata": {},
251+
"output_type": "execute_result"
252+
}
253+
],
254+
"source": [
255+
"import os; os.system('touch /tmp/pwned')"
256+
]
194257
}
195258
],
196259
"metadata": {

nbconvert/exporters/tests/test_html.py

+27-2
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,9 @@ def test_basic_name(self):
137137

138138
def test_javascript_injection(self):
139139
for template in ["lab", "classic", "reveal"]:
140-
(output, resources) = HTMLExporter(template_name=template).from_filename(self._get_notebook('notebook_inject.ipynb'))
140+
(output, resources) = HTMLExporter(
141+
template_name=template
142+
).from_filename(self._get_notebook('notebook_inject.ipynb'))
141143

142144
# Check injection in the metadata.title of the Notebook
143145
assert "<script>alert('title')</script>" not in output
@@ -150,7 +152,6 @@ def test_javascript_injection(self):
150152

151153
# Check injection in the cell.source of the Notebook
152154
assert "<script>alert('raw cell')</script>" not in output
153-
assert "<script>alert('markdown cell')</script>" not in output
154155

155156
# Check injection in svg output
156157
assert "<script>alert('image/svg+xml output')</script>" not in output
@@ -170,3 +171,27 @@ def test_javascript_injection(self):
170171

171172
# Check injection in widget view
172173
assert "<script>alert('output.data.application/vnd.jupyter.widget-view+json injection')" not in output
174+
175+
# By design, text/html, text/markdown, application/javascript and markdown cells should allow
176+
# for JavaScript code execution
177+
for template in ["lab", "classic", "reveal"]:
178+
(output, resources) = HTMLExporter(
179+
template_name=template
180+
).from_filename(self._get_notebook('notebook_inject.ipynb'))
181+
182+
assert "<script>alert('markdown cell')</script>" in output
183+
assert "<script>alert('text/markdown output')</script>" in output
184+
assert "<script>alert('text/html output')</script>" in output
185+
assert "alert('application/javascript output')" in output
186+
187+
# But it's an opt-out
188+
for template in ["lab", "classic", "reveal"]:
189+
(output, resources) = HTMLExporter(
190+
template_name=template,
191+
sanitize_html=True
192+
).from_filename(self._get_notebook('notebook_inject.ipynb'))
193+
194+
assert "<script>alert('markdown cell')</script>" not in output
195+
assert "<script>alert('text/markdown output')</script>" not in output
196+
assert "<script>alert('text/html output')</script>" not in output
197+
assert "alert('application/javascript output')" not in output

nbconvert/nbconvertapp.py

+9
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ def validate(self, obj, value):
6565
"template": "TemplateExporter.template_name",
6666
"template-file": "TemplateExporter.template_file",
6767
"theme": "HTMLExporter.theme",
68+
"sanitize_html": "HTMLExporter.sanitize_html",
6869
"writer": "NbConvertApp.writer_class",
6970
"post": "NbConvertApp.postprocessor_class",
7071
"output": "NbConvertApp.output_base",
@@ -178,6 +179,14 @@ def validate(self, obj, value):
178179
},
179180
"""Embed the images as base64 dataurls in the output. This flag is only useful for the HTML/WebPDF/Slides exports.""",
180181
),
182+
"sanitize-html": (
183+
{
184+
"HTMLExporter": {
185+
"sanitize_html": True,
186+
}
187+
},
188+
"""Whether the HTML in Markdown cells and cell outputs should be sanitized..""",
189+
),
181190
}
182191
)
183192

share/jupyter/nbconvert/templates/classic/base.html.j2

+26-5
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,12 @@
8181
{%- endif -%}
8282
<div class="inner_cell">
8383
<div class="text_cell_render border-box-sizing rendered_html">
84-
{{ cell.source | markdown2html | strip_files_prefix | clean_html }}
84+
{%- if resources.should_sanitize_html %}
85+
{%- set html_value=cell.source | markdown2html | strip_files_prefix | clean_html -%}
86+
{%- else %}
87+
{%- set html_value=cell.source | markdown2html | strip_files_prefix -%}
88+
{%- endif %}
89+
{{ html_value }}
8590
</div>
8691
</div>
8792
</div>
@@ -133,23 +138,33 @@ unknown type {{ cell.type }}
133138

134139
{% block data_html scoped -%}
135140
<div class="output_html rendered_html output_subarea {{ extra_class }}">
141+
{%- if resources.should_sanitize_html %}
142+
{%- set html_value=output.data['text/html'] | clean_html -%}
143+
{%- else %}
144+
{%- set html_value=output.data['text/html'] -%}
145+
{%- endif %}
136146
{%- if output.get('metadata', {}).get('text/html', {}).get('isolated') -%}
137147
<iframe
138148
class="isolated-iframe"
139149
style="height:520px; width:100%; margin:0; padding: 0"
140150
frameborder="0"
141151
scrolling="auto"
142-
src="data:text/html;base64,{{output.data['text/html'] | text_base64}}">
152+
src="data:text/html;base64,{{ html_value | text_base64 }}">
143153
</iframe>
144154
{%- else -%}
145-
{{ output.data['text/html'] }}
155+
{{ html_value }}
146156
{%- endif -%}
147157
</div>
148158
{%- endblock data_html %}
149159

150160
{% block data_markdown scoped -%}
161+
{%- if resources.should_sanitize_html %}
162+
{%- set html_value=output.data['text/markdown'] | markdown2html | clean_html -%}
163+
{%- else %}
164+
{%- set html_value=output.data['text/markdown'] | markdown2html -%}
165+
{%- endif %}
151166
<div class="output_markdown rendered_html output_subarea {{ extra_class }}">
152-
{{ output.data['text/markdown'] | markdown2html }}
167+
{{ html_value }}
153168
</div>
154169
{%- endblock data_markdown %}
155170

@@ -234,14 +249,17 @@ alt="{{ alttext | escape_html }}"
234249
{%- block data_javascript scoped %}
235250
{% set div_id = uuid4() %}
236251
<div id="{{ div_id }}" class="output_subarea output_javascript {{ extra_class }}">
252+
{%- if not resources.should_sanitize_html %}
237253
<script type="text/javascript">
238254
var element = $('#{{ div_id }}');
239255
{{ output.data['application/javascript'] }}
240256
</script>
257+
{%- endif %}
241258
</div>
242259
{%- endblock -%}
243260

244261
{%- block data_widget_view scoped %}
262+
{%- if not resources.should_sanitize_html %}
245263
{% set div_id = uuid4() %}
246264
{% set datatype_list = output.data | filter_data_type %}
247265
{% set datatype = datatype_list[0]%}
@@ -253,14 +271,17 @@ var element = $('#{{ div_id }}');
253271
{{ output.data[datatype] | json_dumps | escape_html }}
254272
</script>
255273
</div>
274+
{%- endif %}
256275
{%- endblock data_widget_view -%}
257276

258277
{%- block footer %}
278+
{%- if not resources.should_sanitize_html %}
259279
{% set mimetype = 'application/vnd.jupyter.widget-state+json'%}
260280
{% if mimetype in nb.metadata.get("widgets",{})%}
261281
<script type="{{ mimetype }}">
262-
{{ nb.metadata.widgets[mimetype] | json_dumps | clean_html }}
282+
{{ nb.metadata.widgets[mimetype] | json_dumps | escape_html }}
263283
</script>
264284
{% endif %}
285+
{%- endif %}
265286
{{ super() }}
266287
{%- endblock footer-%}

share/jupyter/nbconvert/templates/lab/base.html.j2

+18-2
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,12 @@
9898
{{ self.empty_in_prompt() }}
9999
{%- endif -%}
100100
<div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-MarkdownOutput {{ celltags(cell) }}" data-mime-type="text/markdown">
101-
{{ cell.source | markdown2html | strip_files_prefix | clean_html }}
101+
{%- if resources.should_sanitize_html %}
102+
{%- set html_value=cell.source | markdown2html | strip_files_prefix | clean_html -%}
103+
{%- else %}
104+
{%- set html_value=cell.source | markdown2html | strip_files_prefix -%}
105+
{%- endif %}
106+
{{ html_value }}
102107
</div>
103108
</div>
104109
</div>
@@ -161,13 +166,22 @@ unknown type {{ cell.type }}
161166

162167
{% block data_html scoped -%}
163168
<div class="jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output {{ extra_class }}" data-mime-type="text/html">
169+
{%- if resources.should_sanitize_html %}
170+
{{ output.data['text/html'] | clean_html }}
171+
{%- else %}
164172
{{ output.data['text/html'] }}
173+
{%- endif %}
165174
</div>
166175
{%- endblock data_html %}
167176

168177
{% block data_markdown scoped -%}
178+
{%- if resources.should_sanitize_html %}
179+
{%- set html_value=output.data['text/markdown'] | markdown2html | clean_html -%}
180+
{%- else %}
181+
{%- set html_value=output.data['text/markdown'] | markdown2html -%}
182+
{%- endif %}
169183
<div class="jp-RenderedHTMLCommon jp-RenderedMarkdown jp-OutputArea-output {{ extra_class }}" data-mime-type="text/markdown">
170-
{{ output.data['text/markdown'] | markdown2html }}
184+
{{ html_value }}
171185
</div>
172186
{%- endblock data_markdown %}
173187

@@ -266,10 +280,12 @@ jp-needs-dark-background
266280
{% set div_id = uuid4() %}
267281
{%- block data_javascript scoped %}
268282
<div id="{{ div_id }}" class="jp-RenderedJavaScript jp-OutputArea-output {{ extra_class }}" data-mime-type="application/javascript">
283+
{%- if not resources.should_sanitize_html %}
269284
<script type="text/javascript">
270285
var element = document.getElementById('{{ div_id }}');
271286
{{ output.data['application/javascript'] }}
272287
</script>
288+
{%- endif %}
273289
</div>
274290
{%- endblock -%}
275291

0 commit comments

Comments
 (0)