|
4 | 4 |
|
5 | 5 | import html
|
6 | 6 | import os
|
| 7 | +import re |
| 8 | +from html.entities import codepoint2name |
7 | 9 | from os import path
|
8 | 10 | from pathlib import Path
|
9 | 11 | from typing import TYPE_CHECKING, Any
|
|
21 | 23 | from sphinx.util.template import SphinxRenderer
|
22 | 24 |
|
23 | 25 | if TYPE_CHECKING:
|
24 |
| - from docutils.nodes import Element, Node, document |
| 26 | + from docutils.nodes import Element, Node |
25 | 27 | from sphinx.application import Sphinx
|
26 | 28 | from sphinx.config import Config
|
27 | 29 |
|
@@ -91,7 +93,7 @@ def chm_htmlescape(s: str, quote: bool = True) -> str:
|
91 | 93 |
|
92 | 94 |
|
93 | 95 | class ToCTreeVisitor(nodes.NodeVisitor):
|
94 |
| - def __init__(self, document: document) -> None: |
| 96 | + def __init__(self, document: nodes.document) -> None: |
95 | 97 | super().__init__(document)
|
96 | 98 | self.body: list[str] = []
|
97 | 99 | self.depth = 0
|
@@ -181,13 +183,25 @@ def update_page_context(
|
181 | 183 | ) -> None:
|
182 | 184 | ctx['encoding'] = self.encoding
|
183 | 185 |
|
| 186 | + # escape the `body` part to 7-bit ASCII |
| 187 | + body = ctx.get("body") |
| 188 | + if body is not None: |
| 189 | + ctx["body"] = re.sub(r"[^\x00-\x7F]", self._escape, body) |
| 190 | + |
| 191 | + @staticmethod |
| 192 | + def _escape(match: re.Match[str]) -> str: |
| 193 | + codepoint = ord(match.group(0)) |
| 194 | + if codepoint in codepoint2name: |
| 195 | + return f"&{codepoint2name[codepoint]};" |
| 196 | + return f"&#{codepoint};" |
| 197 | + |
184 | 198 | def handle_finish(self) -> None:
|
185 | 199 | self.copy_stopword_list()
|
186 | 200 | self.build_project_file()
|
187 | 201 | self.build_toc_file()
|
188 | 202 | self.build_hhx(self.outdir, self.config.htmlhelp_basename)
|
189 | 203 |
|
190 |
| - def write_doc(self, docname: str, doctree: document) -> None: |
| 204 | + def write_doc(self, docname: str, doctree: nodes.document) -> None: |
191 | 205 | for node in doctree.findall(nodes.reference):
|
192 | 206 | # add ``target=_blank`` attributes to external links
|
193 | 207 | if node.get('internal') is None and 'refuri' in node:
|
@@ -265,7 +279,7 @@ def build_toc_file(self) -> None:
|
265 | 279 | def build_hhx(self, outdir: str | os.PathLike[str], outname: str) -> None:
|
266 | 280 | logger.info(__('writing index file...'))
|
267 | 281 | index = IndexEntries(self.env).create_index(self)
|
268 |
| - filename = path.join(outdir, outname + '.hhk') |
| 282 | + filename = Path(outdir, outname + '.hhk') |
269 | 283 | with open(filename, 'w', encoding=self.encoding, errors='xmlcharrefreplace') as f:
|
270 | 284 | f.write('<UL>\n')
|
271 | 285 |
|
@@ -299,6 +313,9 @@ def write_param(name: str, value: str) -> None:
|
299 | 313 | for title, (refs, subitems, _category_key) in group:
|
300 | 314 | write_index(title, refs, subitems)
|
301 | 315 | f.write('</UL>\n')
|
| 316 | + # Fixup keywords (HTML escapes in keywords file) |
| 317 | + content = filename.read_bytes().replace(b''', b''') |
| 318 | + filename.write_bytes(content) |
302 | 319 |
|
303 | 320 |
|
304 | 321 | def default_htmlhelp_basename(config: Config) -> str:
|
|
0 commit comments