Skip to content

Commit 1cea7ac

Browse files
committed
refactor: Record heading titles alongside URLs
This change makes autorefs record heading titles alongside URLs, but doesn't actually change the rendering logic. This will be done in a later change that will rely on new title-related options. Issue-33: #33
1 parent ac77752 commit 1cea7ac

File tree

4 files changed

+48
-29
lines changed

4 files changed

+48
-29
lines changed

Diff for: src/mkdocs_autorefs/plugin.py

+18-6
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ def __init__(self) -> None:
111111
# This logic unfolds in `_get_item_url`.
112112
self._primary_url_map: dict[str, list[str]] = {}
113113
self._secondary_url_map: dict[str, list[str]] = {}
114+
self._title_map: dict[str, str] = {}
114115
self._abs_url_map: dict[str, str] = {}
115116
# YORE: Bump 2: Remove line.
116117
self._get_fallback_anchor: Callable[[str], tuple[str, ...]] | None = None
@@ -133,13 +134,22 @@ def get_fallback_anchor(self, value: Callable[[str], tuple[str, ...]] | None) ->
133134
stacklevel=2,
134135
)
135136

136-
def register_anchor(self, page: str, identifier: str, anchor: str | None = None, *, primary: bool = True) -> None:
137+
def register_anchor(
138+
self,
139+
page: str,
140+
identifier: str,
141+
anchor: str | None = None,
142+
*,
143+
title: str | None = None,
144+
primary: bool = True,
145+
) -> None:
137146
"""Register that an anchor corresponding to an identifier was encountered when rendering the page.
138147
139148
Arguments:
140149
page: The relative URL of the current page. Examples: `'foo/bar/'`, `'foo/index.html'`
141150
identifier: The identifier to register.
142151
anchor: The anchor on the page, without `#`. If not provided, defaults to the identifier.
152+
title: The title of the anchor (optional).
143153
primary: Whether this anchor is the primary one for the identifier.
144154
"""
145155
page_anchor = f"{page}#{anchor or identifier}"
@@ -148,7 +158,8 @@ def register_anchor(self, page: str, identifier: str, anchor: str | None = None,
148158
if page_anchor not in url_map[identifier]:
149159
url_map[identifier].append(page_anchor)
150160
else:
151-
url_map[identifier] = [page_anchor]
161+
if title and url not in self._title_map:
162+
self._title_map[url] = title
152163

153164
def register_url(self, identifier: str, url: str) -> None:
154165
"""Register that the identifier should be turned into a link to this URL.
@@ -240,7 +251,7 @@ def get_item_url(
240251
from_url: str | None = None,
241252
# YORE: Bump 2: Remove line.
242253
fallback: Callable[[str], Sequence[str]] | None = None,
243-
) -> str:
254+
) -> tuple[str, str | None]:
244255
"""Return a site-relative URL with anchor to the identifier, if it's present anywhere.
245256
246257
Arguments:
@@ -252,11 +263,12 @@ def get_item_url(
252263
"""
253264
# YORE: Bump 2: Replace `, fallback` with `` within line.
254265
url = self._get_item_url(identifier, from_url, fallback)
266+
title = self._title_map.get(url) or None
255267
if from_url is not None:
256268
parsed = urlsplit(url)
257269
if not parsed.scheme and not parsed.netloc:
258-
return relative_url(from_url, url)
259-
return url
270+
url = relative_url(from_url, url)
271+
return url, title
260272

261273
def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None:
262274
"""Instantiate our Markdown extension.
@@ -321,7 +333,7 @@ def map_urls(self, base_url: str, anchor: AnchorLink) -> None:
321333
base_url: The base URL to use as a prefix for each anchor's relative URL.
322334
anchor: The anchor to process and to recurse on.
323335
"""
324-
self.register_anchor(base_url, anchor.id, primary=True)
336+
self.register_anchor(base_url, anchor.id, title=anchor.title, primary=True)
325337
for child in anchor.children:
326338
self.map_urls(base_url, child)
327339

Diff for: src/mkdocs_autorefs/references.py

+17-13
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ def relative_url(url_a: str, url_b: str) -> str:
258258

259259
# YORE: Bump 2: Remove block.
260260
def _legacy_fix_ref(
261-
url_mapper: Callable[[str], str],
261+
url_mapper: Callable[[str], tuple[str, str | None]],
262262
unmapped: list[tuple[str, AutorefsHookInterface.Context | None]],
263263
) -> Callable:
264264
"""Return a `repl` function for [`re.sub`](https://docs.python.org/3/library/re.html#re.sub).
@@ -287,7 +287,7 @@ def inner(match: Match) -> str:
287287
classes = (match["class"] or "").strip('"').split()
288288

289289
try:
290-
url = url_mapper(unescape(identifier))
290+
url, _ = url_mapper(unescape(identifier))
291291
except KeyError:
292292
if kind == "autorefs-optional":
293293
return title
@@ -364,7 +364,10 @@ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None
364364
_html_attrs_parser = _HTMLAttrsParser()
365365

366366

367-
def _find_url(identifiers: Iterable[str], url_mapper: Callable[[str], str]) -> str:
367+
def _find_url(
368+
identifiers: Iterable[str],
369+
url_mapper: Callable[[str], tuple[str, str | None]],
370+
) -> tuple[str, str | None]:
368371
for identifier in identifiers:
369372
try:
370373
return url_mapper(identifier)
@@ -374,7 +377,7 @@ def _find_url(identifiers: Iterable[str], url_mapper: Callable[[str], str]) -> s
374377

375378

376379
def fix_ref(
377-
url_mapper: Callable[[str], str],
380+
url_mapper: Callable[[str], tuple[str, str | None]],
378381
unmapped: list[tuple[str, AutorefsHookInterface.Context | None]],
379382
) -> Callable:
380383
"""Return a `repl` function for [`re.sub`](https://docs.python.org/3/library/re.html#re.sub).
@@ -406,7 +409,7 @@ def inner(match: Match) -> str:
406409
identifiers = (identifier, slug) if slug else (identifier,)
407410

408411
try:
409-
url = _find_url(identifiers, url_mapper)
412+
url, original_title = _find_url(identifiers, url_mapper)
410413
except KeyError:
411414
if optional:
412415
log.debug("Unresolved optional cross-reference: %s", identifier)
@@ -436,7 +439,7 @@ def inner(match: Match) -> str:
436439

437440
def fix_refs(
438441
html: str,
439-
url_mapper: Callable[[str], str],
442+
url_mapper: Callable[[str], tuple[str, str | None]],
440443
# YORE: Bump 2: Remove line.
441444
_legacy_refs: bool = True, # noqa: FBT001, FBT002
442445
) -> tuple[str, list[tuple[str, AutorefsHookInterface.Context | None]]]:
@@ -481,7 +484,7 @@ def run(self, root: Element) -> None: # noqa: D102
481484
self._scan_anchors(root, pending_anchors)
482485
pending_anchors.flush()
483486

484-
def _scan_anchors(self, parent: Element, pending_anchors: _PendingAnchors) -> None:
487+
def _scan_anchors(self, parent: Element, pending_anchors: _PendingAnchors, last_heading: str | None = None) -> None:
485488
for el in parent:
486489
if el.tag == "a":
487490
# We found an anchor. Record its id if it has one.
@@ -490,23 +493,24 @@ def _scan_anchors(self, parent: Element, pending_anchors: _PendingAnchors) -> No
490493
# If the element has text or a link, it's not an alias.
491494
# Non-whitespace text after the element interrupts the chain, aliases can't apply.
492495
if el.text or el.get("href") or (el.tail and el.tail.strip()):
493-
pending_anchors.flush()
496+
pending_anchors.flush(title=last_heading)
494497

495498
elif el.tag == "p":
496499
# A `p` tag is a no-op for our purposes, just recurse into it in the context
497500
# of the current collection of anchors.
498-
self._scan_anchors(el, pending_anchors)
501+
self._scan_anchors(el, pending_anchors, last_heading)
499502
# Non-whitespace text after the element interrupts the chain, aliases can't apply.
500503
if el.tail and el.tail.strip():
501504
pending_anchors.flush()
502505

503506
elif el.tag in self._htags:
504507
# If the element is a heading, that turns the pending anchors into aliases.
505-
pending_anchors.flush(el.get("id"))
508+
last_heading = el.text
509+
pending_anchors.flush(el.get("id"), title=last_heading)
506510

507511
else:
508512
# But if it's some other interruption, flush anchors anyway as non-aliases.
509-
pending_anchors.flush()
513+
pending_anchors.flush(title=last_heading)
510514
# Recurse into sub-elements, in a *separate* context.
511515
self.run(el)
512516

@@ -522,9 +526,9 @@ def __init__(self, plugin: AutorefsPlugin, current_page: str):
522526
def append(self, anchor: str) -> None:
523527
self.anchors.append(anchor)
524528

525-
def flush(self, alias_to: str | None = None) -> None:
529+
def flush(self, alias_to: str | None = None, title: str | None = None) -> None:
526530
for anchor in self.anchors:
527-
self.plugin.register_anchor(self.current_page, anchor, alias_to, primary=True)
531+
self.plugin.register_anchor(self.current_page, anchor, alias_to, title=title, primary=True)
528532
self.anchors.clear()
529533

530534

Diff for: tests/test_plugin.py

+11-8
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ def test_url_registration() -> None:
1616
plugin.register_anchor(identifier="foo", page="foo1.html", primary=True)
1717
plugin.register_url(identifier="bar", url="https://example.org/bar.html")
1818

19-
assert plugin.get_item_url("foo") == "foo1.html#foo"
20-
assert plugin.get_item_url("bar") == "https://example.org/bar.html"
19+
assert plugin.get_item_url("foo") == ("foo1.html#foo", None)
20+
assert plugin.get_item_url("bar") == ("https://example.org/bar.html", None)
2121
with pytest.raises(KeyError):
2222
plugin.get_item_url("baz")
2323

@@ -28,8 +28,8 @@ def test_url_registration_with_from_url() -> None:
2828
plugin.register_anchor(identifier="foo", page="foo1.html", primary=True)
2929
plugin.register_url(identifier="bar", url="https://example.org/bar.html")
3030

31-
assert plugin.get_item_url("foo", from_url="a/b.html") == "../foo1.html#foo"
32-
assert plugin.get_item_url("bar", from_url="a/b.html") == "https://example.org/bar.html"
31+
assert plugin.get_item_url("foo", from_url="a/b.html") == ("../foo1.html#foo", None)
32+
assert plugin.get_item_url("bar", from_url="a/b.html") == ("https://example.org/bar.html", None)
3333
with pytest.raises(KeyError):
3434
plugin.get_item_url("baz", from_url="a/b.html")
3535

@@ -42,11 +42,11 @@ def test_url_registration_with_fallback() -> None:
4242
plugin.register_url(identifier="bar", url="https://example.org/bar.html")
4343

4444
# URL map will be updated with baz -> foo1.html#foo
45-
assert plugin.get_item_url("baz", fallback=lambda _: ("foo",)) == "foo1.html#foo"
45+
assert plugin.get_item_url("baz", fallback=lambda _: ("foo",)) == ("foo1.html#foo", None)
4646
# as expected, baz is now known as foo1.html#foo
47-
assert plugin.get_item_url("baz", fallback=lambda _: ("bar",)) == "foo1.html#foo"
47+
assert plugin.get_item_url("baz", fallback=lambda _: ("bar",)) == ("foo1.html#foo", None)
4848
# unknown identifiers correctly fallback: qux -> https://example.org/bar.html
49-
assert plugin.get_item_url("qux", fallback=lambda _: ("bar",)) == "https://example.org/bar.html"
49+
assert plugin.get_item_url("qux", fallback=lambda _: ("bar",)) == ("https://example.org/bar.html", None)
5050

5151
with pytest.raises(KeyError):
5252
plugin.get_item_url("foobar", fallback=lambda _: ("baaaa",))
@@ -60,7 +60,10 @@ def test_dont_make_relative_urls_relative_again() -> None:
6060
plugin.register_anchor(identifier="foo.bar.baz", page="foo/bar/baz.html", primary=True)
6161

6262
for _ in range(2):
63-
assert plugin.get_item_url("foo.bar.baz", from_url="baz/bar/foo.html") == "../../foo/bar/baz.html#foo.bar.baz"
63+
assert plugin.get_item_url("foo.bar.baz", from_url="baz/bar/foo.html") == (
64+
"../../foo/bar/baz.html#foo.bar.baz",
65+
None,
66+
)
6467

6568

6669
@pytest.mark.parametrize(

Diff for: tests/test_references.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,8 @@ def run_references_test(
6565
md = markdown.Markdown(extensions=[AutorefsExtension(), *extensions], extension_configs=extensions)
6666
content = md.convert(source)
6767

68-
def url_mapper(identifier: str) -> str:
69-
return relative_url(from_url, url_map[identifier])
68+
def url_mapper(identifier: str) -> tuple[str, str | None]:
69+
return relative_url(from_url, url_map[identifier]), None
7070

7171
actual_output, actual_unmapped = fix_refs(content, url_mapper)
7272
assert actual_output == output

0 commit comments

Comments
 (0)