Skip to content

Commit ec1353c

Browse files
committed
Escape HTML entities
1 parent 16b26c6 commit ec1353c

File tree

1 file changed

+21
-4
lines changed

1 file changed

+21
-4
lines changed

Diff for: sphinxcontrib/htmlhelp/__init__.py

+21-4
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
import html
66
import os
7+
import re
8+
from html.entities import codepoint2name
79
from os import path
810
from pathlib import Path
911
from typing import TYPE_CHECKING, Any
@@ -21,7 +23,7 @@
2123
from sphinx.util.template import SphinxRenderer
2224

2325
if TYPE_CHECKING:
24-
from docutils.nodes import Element, Node, document
26+
from docutils.nodes import Element, Node
2527
from sphinx.application import Sphinx
2628
from sphinx.config import Config
2729

@@ -91,7 +93,7 @@ def chm_htmlescape(s: str, quote: bool = True) -> str:
9193

9294

9395
class ToCTreeVisitor(nodes.NodeVisitor):
94-
def __init__(self, document: document) -> None:
96+
def __init__(self, document: nodes.document) -> None:
9597
super().__init__(document)
9698
self.body: list[str] = []
9799
self.depth = 0
@@ -181,13 +183,25 @@ def update_page_context(
181183
) -> None:
182184
ctx['encoding'] = self.encoding
183185

186+
# escape the `body` part to 7-bit ASCII
187+
body = ctx.get("body")
188+
if body is not None:
189+
ctx["body"] = re.sub(r"[^\x00-\x7F]", self._escape, body)
190+
191+
@staticmethod
192+
def _escape(match: re.Match[str]) -> str:
193+
codepoint = ord(match.group(0))
194+
if codepoint in codepoint2name:
195+
return f"&{codepoint2name[codepoint]};"
196+
return f"&#{codepoint};"
197+
184198
def handle_finish(self) -> None:
185199
self.copy_stopword_list()
186200
self.build_project_file()
187201
self.build_toc_file()
188202
self.build_hhx(self.outdir, self.config.htmlhelp_basename)
189203

190-
def write_doc(self, docname: str, doctree: document) -> None:
204+
def write_doc(self, docname: str, doctree: nodes.document) -> None:
191205
for node in doctree.findall(nodes.reference):
192206
# add ``target=_blank`` attributes to external links
193207
if node.get('internal') is None and 'refuri' in node:
@@ -265,7 +279,7 @@ def build_toc_file(self) -> None:
265279
def build_hhx(self, outdir: str | os.PathLike[str], outname: str) -> None:
266280
logger.info(__('writing index file...'))
267281
index = IndexEntries(self.env).create_index(self)
268-
filename = path.join(outdir, outname + '.hhk')
282+
filename = Path(outdir, outname + '.hhk')
269283
with open(filename, 'w', encoding=self.encoding, errors='xmlcharrefreplace') as f:
270284
f.write('<UL>\n')
271285

@@ -299,6 +313,9 @@ def write_param(name: str, value: str) -> None:
299313
for title, (refs, subitems, _category_key) in group:
300314
write_index(title, refs, subitems)
301315
f.write('</UL>\n')
316+
# Fixup keywords (HTML escapes in keywords file)
317+
content = filename.read_bytes().replace(b'&#x27;', b'&#39;')
318+
filename.write_bytes(content)
302319

303320

304321
def default_htmlhelp_basename(config: Config) -> str:

0 commit comments

Comments
 (0)