Skip to content

Commit e142023

Browse files
authored
refactor: Use a custom autoref HTML tag
PR-48: #48
1 parent 44605ec commit e142023

File tree

3 files changed

+145
-12
lines changed

3 files changed

+145
-12
lines changed

src/mkdocs_autorefs/plugin.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ class AutorefsPlugin(BasePlugin):
5252

5353
scan_toc: bool = True
5454
current_page: str | None = None
55+
legacy_refs: bool = True
5556

5657
def __init__(self) -> None:
5758
"""Initialize the object."""
@@ -211,7 +212,7 @@ def on_post_page(self, output: str, page: Page, **kwargs: Any) -> str: # noqa:
211212
log.debug(f"Fixing references in page {page.file.src_path}")
212213

213214
url_mapper = functools.partial(self.get_item_url, from_url=page.url, fallback=self.get_fallback_anchor)
214-
fixed_output, unmapped = fix_refs(output, url_mapper)
215+
fixed_output, unmapped = fix_refs(output, url_mapper, _legacy_refs=self.legacy_refs)
215216

216217
if unmapped and log.isEnabledFor(logging.WARNING):
217218
for ref in unmapped:

src/mkdocs_autorefs/references.py

+91-6
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import re
77
import warnings
88
from html import escape, unescape
9+
from html.parser import HTMLParser
910
from typing import TYPE_CHECKING, Any, Callable, ClassVar, Match
1011
from urllib.parse import urlsplit
1112
from xml.etree.ElementTree import Element
@@ -44,7 +45,12 @@ def __getattr__(name: str) -> Any:
4445
rf"(?: class=(?P<class>{_ATTR_VALUE}))?(?P<attrs> [^<>]+)?>(?P<title>.*?)</span>",
4546
flags=re.DOTALL,
4647
)
47-
"""A regular expression to match mkdocs-autorefs' special reference markers
48+
"""Deprecated. Use [`AUTOREF_RE`][mkdocs_autorefs.references.AUTOREF_RE] instead."""
49+
50+
AUTOREF_RE = re.compile(r"<autoref (?P<attrs>.*?)>(?P<title>.*?)</autoref>", flags=re.DOTALL)
51+
"""The autoref HTML tag regular expression.
52+
53+
A regular expression to match mkdocs-autorefs' special reference markers
4854
in the [`on_post_page` hook][mkdocs_autorefs.plugin.AutorefsPlugin.on_post_page].
4955
"""
5056

@@ -135,8 +141,8 @@ def _make_tag(self, identifier: str, text: str) -> Element:
135141
Returns:
136142
A new element.
137143
"""
138-
el = Element("span")
139-
el.set("data-autorefs-identifier", identifier)
144+
el = Element("autoref")
145+
el.set("identifier", identifier)
140146
el.text = text
141147
return el
142148

@@ -167,7 +173,7 @@ def relative_url(url_a: str, url_b: str) -> str:
167173
return f"{relative}#{anchor}"
168174

169175

170-
def fix_ref(url_mapper: Callable[[str], str], unmapped: list[str]) -> Callable:
176+
def _legacy_fix_ref(url_mapper: Callable[[str], str], unmapped: list[str]) -> Callable:
171177
"""Return a `repl` function for [`re.sub`](https://docs.python.org/3/library/re.html#re.sub).
172178
173179
In our context, we match Markdown references and replace them with HTML links.
@@ -216,7 +222,84 @@ def inner(match: Match) -> str:
216222
return inner
217223

218224

219-
def fix_refs(html: str, url_mapper: Callable[[str], str]) -> tuple[str, list[str]]:
225+
class _AutorefsAttrs(dict):
226+
_handled_attrs: ClassVar[set[str]] = {"identifier", "optional", "hover", "class"}
227+
228+
@property
229+
def remaining(self) -> str:
230+
return " ".join(k if v is None else f'{k}="{v}"' for k, v in self.items() if k not in self._handled_attrs)
231+
232+
233+
class _HTMLAttrsParser(HTMLParser):
234+
def __init__(self):
235+
super().__init__()
236+
self.attrs = {}
237+
238+
def parse(self, html: str) -> _AutorefsAttrs:
239+
self.attrs.clear()
240+
self.feed(html)
241+
return _AutorefsAttrs(self.attrs)
242+
243+
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: # noqa: ARG002
244+
self.attrs.update(attrs)
245+
246+
247+
_html_attrs_parser = _HTMLAttrsParser()
248+
249+
250+
def fix_ref(url_mapper: Callable[[str], str], unmapped: list[str]) -> Callable:
251+
"""Return a `repl` function for [`re.sub`](https://docs.python.org/3/library/re.html#re.sub).
252+
253+
In our context, we match Markdown references and replace them with HTML links.
254+
255+
When the matched reference's identifier was not mapped to an URL, we append the identifier to the outer
256+
`unmapped` list. It generally means the user is trying to cross-reference an object that was not collected
257+
and rendered, making it impossible to link to it. We catch this exception in the caller to issue a warning.
258+
259+
Arguments:
260+
url_mapper: A callable that gets an object's site URL by its identifier,
261+
such as [mkdocs_autorefs.plugin.AutorefsPlugin.get_item_url][].
262+
unmapped: A list to store unmapped identifiers.
263+
264+
Returns:
265+
The actual function accepting a [`Match` object](https://docs.python.org/3/library/re.html#match-objects)
266+
and returning the replacement strings.
267+
"""
268+
269+
def inner(match: Match) -> str:
270+
title = match["title"]
271+
attrs = _html_attrs_parser.parse(f"<a {match['attrs']}>")
272+
identifier: str = attrs["identifier"]
273+
optional = "optional" in attrs
274+
hover = "hover" in attrs
275+
276+
try:
277+
url = url_mapper(unescape(identifier))
278+
except KeyError:
279+
if optional:
280+
if hover:
281+
return f'<span title="{identifier}">{title}</span>'
282+
return title
283+
unmapped.append(identifier)
284+
if title == identifier:
285+
return f"[{identifier}][]"
286+
return f"[{title}][{identifier}]"
287+
288+
parsed = urlsplit(url)
289+
external = parsed.scheme or parsed.netloc
290+
classes = (attrs.get("class") or "").strip().split()
291+
classes = ["autorefs", "autorefs-external" if external else "autorefs-internal", *classes]
292+
class_attr = " ".join(classes)
293+
if remaining := attrs.remaining:
294+
remaining = f" {remaining}"
295+
if optional and hover:
296+
return f'<a class="{class_attr}" title="{identifier}" href="{escape(url)}"{remaining}>{title}</a>'
297+
return f'<a class="{class_attr}" href="{escape(url)}"{remaining}>{title}</a>'
298+
299+
return inner
300+
301+
302+
def fix_refs(html: str, url_mapper: Callable[[str], str], *, _legacy_refs: bool = True) -> tuple[str, list[str]]:
220303
"""Fix all references in the given HTML text.
221304
222305
Arguments:
@@ -228,7 +311,9 @@ def fix_refs(html: str, url_mapper: Callable[[str], str]) -> tuple[str, list[str
228311
The fixed HTML.
229312
"""
230313
unmapped: list[str] = []
231-
html = AUTO_REF_RE.sub(fix_ref(url_mapper, unmapped), html)
314+
html = AUTOREF_RE.sub(fix_ref(url_mapper, unmapped), html)
315+
if _legacy_refs:
316+
html = AUTO_REF_RE.sub(_legacy_fix_ref(url_mapper, unmapped), html)
232317
return html, unmapped
233318

234319

tests/test_references.py

+52-5
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ def test_ignore_reference_with_special_char() -> None:
212212
)
213213

214214

215-
def test_custom_required_reference() -> None:
215+
def test_legacy_custom_required_reference() -> None:
216216
"""Check that external HTML-based references are expanded or reported missing."""
217217
url_map = {"ok": "ok.html#ok"}
218218
source = "<span data-autorefs-identifier=bar>foo</span> <span data-autorefs-identifier=ok>ok</span>"
@@ -221,7 +221,16 @@ def test_custom_required_reference() -> None:
221221
assert unmapped == ["bar"]
222222

223223

224-
def test_custom_optional_reference() -> None:
224+
def test_custom_required_reference() -> None:
225+
"""Check that external HTML-based references are expanded or reported missing."""
226+
url_map = {"ok": "ok.html#ok"}
227+
source = "<autoref identifier=bar>foo</autoref> <autoref identifier=ok>ok</autoref>"
228+
output, unmapped = fix_refs(source, url_map.__getitem__)
229+
assert output == '[foo][bar] <a class="autorefs autorefs-internal" href="ok.html#ok">ok</a>'
230+
assert unmapped == ["bar"]
231+
232+
233+
def test_legacy_custom_optional_reference() -> None:
225234
"""Check that optional HTML-based references are expanded and never reported missing."""
226235
url_map = {"ok": "ok.html#ok"}
227236
source = '<span data-autorefs-optional="bar">foo</span> <span data-autorefs-optional=ok>ok</span>'
@@ -230,7 +239,16 @@ def test_custom_optional_reference() -> None:
230239
assert unmapped == []
231240

232241

233-
def test_custom_optional_hover_reference() -> None:
242+
def test_custom_optional_reference() -> None:
243+
"""Check that optional HTML-based references are expanded and never reported missing."""
244+
url_map = {"ok": "ok.html#ok"}
245+
source = '<autoref optional identifier="bar">foo</autoref> <autoref identifier=ok optional>ok</autoref>'
246+
output, unmapped = fix_refs(source, url_map.__getitem__)
247+
assert output == 'foo <a class="autorefs autorefs-internal" href="ok.html#ok">ok</a>'
248+
assert unmapped == []
249+
250+
251+
def test_legacy_custom_optional_hover_reference() -> None:
234252
"""Check that optional-hover HTML-based references are expanded and never reported missing."""
235253
url_map = {"ok": "ok.html#ok"}
236254
source = '<span data-autorefs-optional-hover="bar">foo</span> <span data-autorefs-optional-hover=ok>ok</span>'
@@ -242,7 +260,19 @@ def test_custom_optional_hover_reference() -> None:
242260
assert unmapped == []
243261

244262

245-
def test_external_references() -> None:
263+
def test_custom_optional_hover_reference() -> None:
264+
"""Check that optional-hover HTML-based references are expanded and never reported missing."""
265+
url_map = {"ok": "ok.html#ok"}
266+
source = '<autoref optional hover identifier="bar">foo</autoref> <autoref optional identifier=ok hover>ok</autoref>'
267+
output, unmapped = fix_refs(source, url_map.__getitem__)
268+
assert (
269+
output
270+
== '<span title="bar">foo</span> <a class="autorefs autorefs-internal" title="ok" href="ok.html#ok">ok</a>'
271+
)
272+
assert unmapped == []
273+
274+
275+
def test_legacy_external_references() -> None:
246276
"""Check that external references are marked as such."""
247277
url_map = {"example": "https://example.com"}
248278
source = '<span data-autorefs-optional="example">example</span>'
@@ -251,6 +281,15 @@ def test_external_references() -> None:
251281
assert unmapped == []
252282

253283

284+
def test_external_references() -> None:
285+
"""Check that external references are marked as such."""
286+
url_map = {"example": "https://example.com"}
287+
source = '<autoref optional identifier="example">example</autoref>'
288+
output, unmapped = fix_refs(source, url_map.__getitem__)
289+
assert output == '<a class="autorefs autorefs-external" href="https://example.com">example</a>'
290+
assert unmapped == []
291+
292+
254293
def test_register_markdown_anchors() -> None:
255294
"""Check that Markdown anchors are registered when enabled."""
256295
plugin = AutorefsPlugin()
@@ -333,9 +372,17 @@ def test_register_markdown_anchors_with_admonition() -> None:
333372
}
334373

335374

336-
def test_keep_data_attributes() -> None:
375+
def test_legacy_keep_data_attributes() -> None:
337376
"""Keep HTML data attributes from autorefs spans."""
338377
url_map = {"example": "https://e.com"}
339378
source = '<span data-autorefs-optional="example" class="hi ho" data-foo data-bar="0">e</span>'
340379
output, _ = fix_refs(source, url_map.__getitem__)
341380
assert output == '<a class="autorefs autorefs-external hi ho" href="https://e.com" data-foo data-bar="0">e</a>'
381+
382+
383+
def test_keep_data_attributes() -> None:
384+
"""Keep HTML data attributes from autorefs spans."""
385+
url_map = {"example": "https://e.com"}
386+
source = '<autoref optional identifier="example" class="hi ho" data-foo data-bar="0">e</autoref>'
387+
output, _ = fix_refs(source, url_map.__getitem__)
388+
assert output == '<a class="autorefs autorefs-external hi ho" href="https://e.com" data-foo data-bar="0">e</a>'

0 commit comments

Comments
 (0)