Skip to content

Commit 13428f1

Browse files
committed
fix: Fallback to slugified title as id for non-exact, non-code references ([Hello World][] -> [hello-world][])
With a heading like `## Welcome`, we should be able to cross-reference it with `[Welcome][]`, without having to specify the actual, slugified identifier: `[Welcome][welcome]`. This is compliant with the original Markdown spec. How does it work? When the base Markdown converter doesn't convert a reference, autorefs kicks in. It converts the yet-unresolved reference to an `autoref` HTML element. If an identifier was explicitly given, it creates a regular `autoref` element like before. If only a title was provided, then there are two scenarios: - the title converts to a `code` HTML element, in which case we create a regular `autoref` again (important for API docs) - the title does not convert to a `code` HTML element, in which case we add a slug to the `autoref` element `autoref` elements without a slug are handled like before. `autoref` elements with a slug will first try to find an URL for the initial identifier (which is the title), and if that fails, will try again with the slugified title. Slugification is made with the `toc` extension's `slugify` function. Issue-58: #58
1 parent 418e770 commit 13428f1

File tree

2 files changed

+119
-18
lines changed

2 files changed

+119
-18
lines changed

src/mkdocs_autorefs/references.py

+38-15
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,13 @@
1717
import markupsafe
1818
from markdown.core import Markdown
1919
from markdown.extensions import Extension
20+
from markdown.extensions.toc import slugify
2021
from markdown.inlinepatterns import REFERENCE_RE, ReferenceInlineProcessor
2122
from markdown.treeprocessors import Treeprocessor
2223
from markdown.util import HTML_PLACEHOLDER_RE, INLINE_PLACEHOLDER_RE
2324

2425
if TYPE_CHECKING:
26+
from collections.abc import Iterable
2527
from pathlib import Path
2628
from re import Match
2729

@@ -120,7 +122,6 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: # noqa: D107
120122

121123
# Code based on
122124
# https://github.com/Python-Markdown/markdown/blob/8e7528fa5c98bf4652deb13206d6e6241d61630b/markdown/inlinepatterns.py#L780
123-
124125
def handleMatch(self, m: Match[str], data: str) -> tuple[Element | None, int | None, int | None]: # type: ignore[override] # noqa: N802
125126
"""Handle an element that matched.
126127
@@ -135,19 +136,19 @@ def handleMatch(self, m: Match[str], data: str) -> tuple[Element | None, int | N
135136
if not handled:
136137
return None, None, None
137138

138-
identifier, end, handled = self.evalId(data, index, text)
139+
identifier, slug, end, handled = self._eval_id(data, index, text)
139140
if not handled or identifier is None:
140141
return None, None, None
141142

142-
if re.search(r"[\x00-\x1f]", identifier):
143+
if slug is None and re.search(r"[\x00-\x1f]", identifier):
143144
# Do nothing if the matched reference contains control characters (from 0 to 31 included).
144145
# Specifically `\x01` is used by Python-Markdown HTML stash when there's inline formatting,
145146
# but references with Markdown formatting are not possible anyway.
146147
return None, m.start(0), end
147148

148-
return self._make_tag(identifier, text), m.start(0), end
149+
return self._make_tag(identifier, text, slug=slug), m.start(0), end
149150

150-
def evalId(self, data: str, index: int, text: str) -> tuple[str | None, int, bool]: # noqa: N802 (parent's casing)
151+
def _eval_id(self, data: str, index: int, text: str) -> tuple[str | None, str | None, int, bool]:
151152
"""Evaluate the id portion of `[ref][id]`.
152153
153154
If `[ref][]` use `[ref]`.
@@ -158,23 +159,28 @@ def evalId(self, data: str, index: int, text: str) -> tuple[str | None, int, boo
158159
text: The text to use when no identifier.
159160
160161
Returns:
161-
A tuple containing the identifier, its end position, and whether it matched.
162+
A tuple containing the identifier, its optional slug, its end position, and whether it matched.
162163
"""
163164
m = self.RE_LINK.match(data, pos=index)
164165
if not m:
165-
return None, index, False
166+
return None, None, index, False
166167

167-
identifier = m.group(1)
168-
if not identifier:
168+
if identifier := m.group(1):
169+
# An identifier was provided, match it exactly (later).
170+
slug = None
171+
else:
172+
# Only a title was provided, use it as identifier.
169173
identifier = text
170-
# Allow the entire content to be one placeholder, with the intent of catching things like [`Foo`][].
171-
# It doesn't catch [*Foo*][] though, just due to the priority order.
172-
# https://github.com/Python-Markdown/markdown/blob/1858c1b601ead62ed49646ae0d99298f41b1a271/markdown/inlinepatterns.py#L78
174+
175+
# Catch single stash entries, like the result of [`Foo`][].
173176
if match := INLINE_PLACEHOLDER_RE.fullmatch(identifier):
174177
stashed_nodes: dict[str, Element | str] = self.md.treeprocessors["inline"].stashed_nodes # type: ignore[attr-defined]
175178
el = stashed_nodes.get(match[1])
176179
if isinstance(el, Element) and el.tag == "code":
180+
# The title was wrapped in backticks, we only keep the content,
181+
# and tell autorefs to match the identifier exactly.
177182
identifier = "".join(el.itertext())
183+
slug = None
178184
# Special case: allow pymdownx.inlinehilite raw <code> snippets but strip them back to unhighlighted.
179185
if match := HTML_PLACEHOLDER_RE.fullmatch(identifier):
180186
stash_index = int(match.group(1))
@@ -183,9 +189,9 @@ def evalId(self, data: str, index: int, text: str) -> tuple[str | None, int, boo
183189
self.md.htmlStash.rawHtmlBlocks[stash_index] = escape(identifier)
184190

185191
end = m.end(0)
186-
return identifier, end, True
192+
return identifier, slug, end, True
187193

188-
def _make_tag(self, identifier: str, text: str) -> Element:
194+
def _make_tag(self, identifier: str, text: str, *, slug: str | None = None) -> Element:
189195
"""Create a tag that can be matched by `AUTO_REF_RE`.
190196
191197
Arguments:
@@ -201,6 +207,8 @@ def _make_tag(self, identifier: str, text: str) -> Element:
201207
el.attrib.update(self.hook.get_context().as_dict())
202208
el.set("identifier", identifier)
203209
el.text = text
210+
if slug:
211+
el.attrib["slug"] = slug
204212
return el
205213

206214

@@ -300,6 +308,7 @@ class _AutorefsAttrs(dict):
300308
"origin",
301309
"filepath",
302310
"lineno",
311+
"slug",
303312
}
304313

305314
@property
@@ -337,6 +346,15 @@ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None
337346
_html_attrs_parser = _HTMLAttrsParser()
338347

339348

349+
def _find_url(identifiers: Iterable[str], url_mapper: Callable[[str], str]) -> str:
350+
for identifier in identifiers:
351+
try:
352+
return url_mapper(identifier)
353+
except KeyError:
354+
pass
355+
raise KeyError(f"None of the identifiers {identifiers} were found")
356+
357+
340358
def fix_ref(
341359
url_mapper: Callable[[str], str],
342360
unmapped: list[tuple[str, AutorefsHookInterface.Context | None]],
@@ -363,11 +381,14 @@ def inner(match: Match) -> str:
363381
title = match["title"]
364382
attrs = _html_attrs_parser.parse(f"<a {match['attrs']}>")
365383
identifier: str = attrs["identifier"]
384+
slug = attrs.get("slug", None)
366385
optional = "optional" in attrs
367386
hover = "hover" in attrs
368387

388+
identifiers = (identifier, slug) if slug else (identifier,)
389+
369390
try:
370-
url = url_mapper(unescape(identifier))
391+
url = _find_url(identifiers, url_mapper)
371392
except KeyError:
372393
if optional:
373394
if hover:
@@ -376,6 +397,8 @@ def inner(match: Match) -> str:
376397
unmapped.append((identifier, attrs.context))
377398
if title == identifier:
378399
return f"[{identifier}][]"
400+
if title == f"<code>{identifier}</code>" and not slug:
401+
return f"[<code>{identifier}</code>][]"
379402
return f"[{title}][{identifier}]"
380403

381404
parsed = urlsplit(url)

tests/test_references.py

+81-3
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ def test_missing_reference_with_markdown_text() -> None:
181181
run_references_test(
182182
url_map={"NotFoo": "foo.html#NotFoo"},
183183
source="[`Foo`][Foo]",
184-
output="<p>[<code>Foo</code>][Foo]</p>",
184+
output="<p>[<code>Foo</code>][]</p>",
185185
unmapped=[("Foo", None)],
186186
)
187187

@@ -201,8 +201,8 @@ def test_missing_reference_with_markdown_implicit() -> None:
201201
run_references_test(
202202
url_map={"Foo-bar": "foo.html#Foo-bar"},
203203
source="[*Foo-bar*][] and [`Foo`-bar][]",
204-
output="<p>[<em>Foo-bar</em>][*Foo-bar*] and [<code>Foo</code>-bar][]</p>",
205-
unmapped=[("*Foo-bar*", None)],
204+
output="<p>[<em>Foo-bar</em>][*Foo-bar*] and [<code>Foo</code>-bar][`Foo`-bar]</p>",
205+
unmapped=[("*Foo-bar*", None), ("`Foo`-bar", None)],
206206
)
207207

208208

@@ -405,3 +405,81 @@ def test_keep_data_attributes() -> None:
405405
source = '<autoref optional identifier="example" class="hi ho" data-foo data-bar="0">e</autoref>'
406406
output, _ = fix_refs(source, url_map.__getitem__)
407407
assert output == '<a class="autorefs autorefs-external hi ho" href="https://e.com" data-foo data-bar="0">e</a>'
408+
409+
410+
@pytest.mark.parametrize(
411+
("markdown_ref", "exact_expected"),
412+
[
413+
("[Foo][]", False),
414+
("[\\`Foo][]", False),
415+
("[\\`\\`Foo][]", False),
416+
("[\\`\\`Foo\\`][]", False),
417+
("[Foo\\`][]", False),
418+
("[Foo\\`\\`][]", False),
419+
("[\\`Foo\\`\\`][]", False),
420+
("[`Foo` `Bar`][]", False),
421+
("[Foo][Foo]", True),
422+
("[`Foo`][]", True),
423+
("[`Foo``Bar`][]", True),
424+
("[`Foo```Bar`][]", True),
425+
("[``Foo```Bar``][]", True),
426+
("[``Foo`Bar``][]", True),
427+
("[```Foo``Bar```][]", True),
428+
],
429+
)
430+
def test_mark_identifiers_as_exact(markdown_ref: str, exact_expected: bool) -> None:
431+
"""Mark code and explicit identifiers as exact (no `slug` attribute in autoref elements)."""
432+
plugin = AutorefsPlugin()
433+
md = markdown.Markdown(extensions=["attr_list", "toc", AutorefsExtension(plugin)])
434+
plugin.current_page = "page"
435+
output = md.convert(markdown_ref)
436+
if exact_expected:
437+
assert "slug=" not in output
438+
else:
439+
assert "slug=" in output
440+
441+
442+
def test_slugified_identifier_fallback() -> None:
443+
"""Fallback to the slugified identifier when no URL is found."""
444+
run_references_test(
445+
url_map={"hello-world": "https://e.com#a"},
446+
source='<autoref identifier="Hello World" slug="hello-world">Hello World</autoref>',
447+
output='<p><a class="autorefs autorefs-external" href="https://e.com#a">Hello World</a></p>',
448+
)
449+
run_references_test(
450+
url_map={"foo-bar": "https://e.com#a"},
451+
source="[*Foo*-bar][]",
452+
output='<p><a class="autorefs autorefs-external" href="https://e.com#a"><em>Foo</em>-bar</a></p>',
453+
)
454+
run_references_test(
455+
url_map={"foo-bar": "https://e.com#a"},
456+
source="[`Foo`-bar][]",
457+
output='<p><a class="autorefs autorefs-external" href="https://e.com#a"><code>Foo</code>-bar</a></p>',
458+
)
459+
460+
461+
def test_no_fallback_for_exact_identifiers() -> None:
462+
"""Do not fallback to the slugified identifier for exact identifiers."""
463+
run_references_test(
464+
url_map={"hello-world": "https://e.com"},
465+
source='<autoref identifier="Hello World"><code>Hello World</code></autoref>',
466+
output="<p>[<code>Hello World</code>][]</p>",
467+
unmapped=[("Hello World", None)],
468+
)
469+
470+
run_references_test(
471+
url_map={"hello-world": "https://e.com"},
472+
source='<autoref identifier="Hello World">Hello World</autoref>',
473+
output="<p>[Hello World][]</p>",
474+
unmapped=[("Hello World", None)],
475+
)
476+
477+
478+
def test_no_fallback_for_provided_identifiers() -> None:
479+
"""Do not slugify provided identifiers."""
480+
run_references_test(
481+
url_map={"hello-world": "foo.html#hello-world"},
482+
source="[Hello][Hello world]",
483+
output="<p>[Hello][Hello world]</p>",
484+
unmapped=[("Hello world", None)],
485+
)

0 commit comments

Comments
 (0)