Skip to content

Commit 3341add

Browse files
committed
feat: Add backlinks feature
This feature only allows recording and retrieving backlinks, for other plugins or systems to render them. Backlinks are not rendered directly by autorefs (though we could consider offering such a feature in the future). PR-65: #65 Issue-mkdocstrings-723: mkdocstrings/mkdocstrings#723 Issue-mkdocstrings-python-153: mkdocstrings/python#153
1 parent 08ea5fe commit 3341add

File tree

5 files changed

+297
-10
lines changed

5 files changed

+297
-10
lines changed

README.md

+45
Original file line numberDiff line numberDiff line change
@@ -182,3 +182,48 @@ You can also change the actual identifier of a heading, thanks again to the `att
182182
```
183183

184184
...though note that this will impact the URL anchor too (and therefore the permalink to the heading).
185+
186+
### Backlinks
187+
188+
The autorefs plugin supports recording backlinks, that other plugins or systems can then use to render backlinks into pages.
189+
190+
For example, when linking from page `foo/`, section `Section` to a heading with identifier `heading` thanks to a cross-reference `[Some heading][heading]`, the plugin will record that `foo/#section` references `heading`.
191+
192+
```md
193+
# Page foo
194+
195+
This is page foo.
196+
197+
## Section
198+
199+
This section references [some heading][heading].
200+
```
201+
202+
Other plugins or systems integrating with the autorefs plugin can then retrieve backlinks for a specific identifier:
203+
204+
```python
205+
backlinks = autorefs_plugin.get_backlinks("heading")
206+
```
207+
208+
The `get_backlinks` method returns a map of backlink types to sets of backlinks. A backlink is a tuple of navigation breadcrumbs, each breadcrumb having a title and URL.
209+
210+
```python
211+
print(backlinks)
212+
# {
213+
# "referenced-by": {
214+
# Backlink(
215+
# crumbs=(
216+
# BacklinkCrumb(title="Foo", url="foo/"),
217+
# BacklinkCrumb(title="Section", url="foo/#section"),
218+
# ),
219+
# ),
220+
# }
221+
```
222+
223+
The default backlink type is `referenced-by`, but can be customized by other plugins or systems thanks to the `backlink-type` HTML data attribute on `autoref` elements. Such plugins and systems can also specify the anchor on the current page to use for the backlink with the `backlink-anchor` HTML data attribute on `autoref` elements.
224+
225+
```html
226+
<autoref identifier="heading" backlink-type="mentionned-by" backlink-anchor="section-paragraph">
227+
```
228+
229+
This feature is typically designed for use in [mkdocstrings](https://mkdocstrings.github.io/) handlers.

src/mkdocs_autorefs/backlinks.py

+81
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
"""Backlinks module."""
2+
3+
from __future__ import annotations
4+
5+
import logging
6+
from dataclasses import dataclass
7+
from typing import TYPE_CHECKING, ClassVar
8+
9+
from markdown.core import Markdown
10+
from markdown.treeprocessors import Treeprocessor
11+
12+
if TYPE_CHECKING:
13+
from xml.etree.ElementTree import Element
14+
15+
from markdown import Markdown
16+
17+
from mkdocs_autorefs.plugin import AutorefsPlugin
18+
19+
try:
20+
from mkdocs.plugins import get_plugin_logger
21+
22+
log = get_plugin_logger(__name__)
23+
except ImportError:
24+
# TODO: remove once support for MkDocs <1.5 is dropped
25+
log = logging.getLogger(f"mkdocs.plugins.{__name__}") # type: ignore[assignment]
26+
27+
28+
@dataclass(eq=True, frozen=True, order=True)
29+
class BacklinkCrumb:
30+
"""A navigation breadcrumb for a backlink."""
31+
32+
title: str
33+
url: str
34+
35+
36+
@dataclass(eq=True, frozen=True, order=True)
37+
class Backlink:
38+
"""A backlink (list of breadcrumbs)."""
39+
40+
crumbs: tuple[BacklinkCrumb, ...]
41+
42+
43+
class BacklinksTreeProcessor(Treeprocessor):
44+
"""Enhance autorefs with `backlink-type` and `backlink-anchor` attributes.
45+
46+
These attributes are then used later to register backlinks.
47+
"""
48+
49+
name: str = "mkdocs-autorefs-backlinks"
50+
initial_id: str | None = None
51+
_htags: ClassVar[set[str]] = {"h1", "h2", "h3", "h4", "h5", "h6"}
52+
53+
def __init__(self, plugin: AutorefsPlugin, md: Markdown | None = None) -> None:
54+
"""Initialize the tree processor.
55+
56+
Parameters:
57+
plugin: A reference to the autorefs plugin, to use its `register_anchor` method.
58+
"""
59+
super().__init__(md)
60+
self.plugin = plugin
61+
self.last_heading_id: str | None = None
62+
63+
def run(self, root: Element) -> None: # noqa: D102
64+
if self.plugin.current_page is not None:
65+
self.last_heading_id = self.initial_id
66+
self._enhance_autorefs(root)
67+
68+
def _enhance_autorefs(self, parent: Element) -> None:
69+
for el in parent:
70+
if el.tag == "a": # Markdown anchor.
71+
if not (el.text or el.get("href") or (el.tail and el.tail.strip())) and (anchor_id := el.get("id")):
72+
self.last_heading_id = anchor_id
73+
elif el.tag in self._htags: # Heading.
74+
self.last_heading_id = el.get("id")
75+
elif el.tag == "autoref":
76+
if "backlink-type" not in el.attrib:
77+
el.set("backlink-type", "referenced-by")
78+
if "backlink-anchor" not in el.attrib and self.last_heading_id:
79+
el.set("backlink-anchor", self.last_heading_id)
80+
else:
81+
self._enhance_autorefs(el)

src/mkdocs_autorefs/plugin.py

+66-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import contextlib
1313
import functools
1414
import logging
15+
from collections import defaultdict
1516
from pathlib import PurePosixPath as URL # noqa: N814
1617
from typing import TYPE_CHECKING, Any, Callable, Literal
1718
from urllib.parse import urlsplit
@@ -22,6 +23,7 @@
2223
from mkdocs.plugins import BasePlugin, event_priority
2324
from mkdocs.structure.pages import Page
2425

26+
from mkdocs_autorefs.backlinks import Backlink, BacklinkCrumb
2527
from mkdocs_autorefs.references import AutorefsExtension, fix_refs, relative_url
2628

2729
if TYPE_CHECKING:
@@ -30,6 +32,7 @@
3032
from jinja2.environment import Environment
3133
from mkdocs.config.defaults import MkDocsConfig
3234
from mkdocs.structure.files import Files
35+
from mkdocs.structure.nav import Section
3336
from mkdocs.structure.pages import Page
3437
from mkdocs.structure.toc import AnchorLink
3538

@@ -99,6 +102,7 @@ class AutorefsPlugin(BasePlugin[AutorefsConfig]):
99102
"""
100103

101104
scan_toc: bool = True
105+
record_backlinks: bool = False
102106
current_page: Page | None = None
103107
# YORE: Bump 2: Remove line.
104108
legacy_refs: bool = True
@@ -135,7 +139,9 @@ def __init__(self) -> None:
135139
self._primary_url_map: dict[str, list[str]] = {}
136140
self._secondary_url_map: dict[str, list[str]] = {}
137141
self._title_map: dict[str, str] = {}
142+
self._backlink_page_map: dict[str, Page] = {}
138143
self._abs_url_map: dict[str, str] = {}
144+
self._backlinks: dict[str, dict[str, set[str]]] = defaultdict(lambda: defaultdict(set))
139145
# YORE: Bump 2: Remove line.
140146
self._get_fallback_anchor: Callable[[str], tuple[str, ...]] | None = None
141147
# YORE: Bump 2: Remove line.
@@ -162,6 +168,56 @@ def get_fallback_anchor(self, value: Callable[[str], tuple[str, ...]] | None) ->
162168
stacklevel=2,
163169
)
164170

171+
def _record_backlink(self, identifier: str, backlink_type: str, backlink_anchor: str, page_url: str) -> None:
172+
"""Record a backlink.
173+
174+
Arguments:
175+
identifier: The target identifier.
176+
backlink_type: The type of backlink.
177+
backlink_anchor: The backlink target anchor.
178+
page_url: The URL of the page containing the backlink.
179+
"""
180+
# When we record backlinks, all identifiers have been registered.
181+
# If an identifier is not found in the primary or secondary URL maps, it's an absolute URL,
182+
# meaning it comes from an external source (typically an object inventory),
183+
# and we don't need to record backlinks for it.
184+
if identifier in self._primary_url_map or identifier in self._secondary_url_map:
185+
self._backlinks[identifier][backlink_type].add(f"{page_url}#{backlink_anchor}")
186+
187+
def get_backlinks(self, *identifiers: str, from_url: str) -> dict[str, set[Backlink]]:
188+
"""Return the backlinks to an identifier relative to the given URL.
189+
190+
Arguments:
191+
*identifiers: The identifiers to get backlinks for.
192+
from_url: The URL of the page where backlinks are rendered.
193+
194+
Returns:
195+
A dictionary of backlinks, with the type of reference as key and a set of backlinks as value.
196+
Each backlink is a tuple of (URL, title) tuples forming navigation breadcrumbs.
197+
"""
198+
relative_backlinks: dict[str, set[Backlink]] = defaultdict(set)
199+
for identifier in set(identifiers):
200+
backlinks = self._backlinks.get(identifier, {})
201+
for backlink_type, backlink_urls in backlinks.items():
202+
for backlink_url in backlink_urls:
203+
relative_backlinks[backlink_type].add(self._crumbs(from_url, backlink_url))
204+
return relative_backlinks
205+
206+
def _crumbs(self, from_url: str, backlink_url: str) -> Backlink:
207+
backlink_page: Page = self._backlink_page_map[backlink_url]
208+
backlink_title = self._title_map.get(backlink_url, "")
209+
crumbs: list[BacklinkCrumb] = [
210+
BacklinkCrumb(backlink_title, relative_url(from_url, backlink_url)),
211+
BacklinkCrumb(backlink_page.title, relative_url(from_url, backlink_page.url + "#")),
212+
]
213+
page: Page | Section = backlink_page
214+
while page.parent:
215+
page = page.parent
216+
if url := getattr(page, "url", ""):
217+
url = relative_url(from_url, url + "#")
218+
crumbs.append(BacklinkCrumb(page.title, url))
219+
return Backlink(tuple(reversed(crumbs)))
220+
165221
def register_anchor(
166222
self,
167223
page: Page,
@@ -196,6 +252,8 @@ def register_anchor(
196252
url_map[identifier] = [url]
197253
if title and url not in self._title_map:
198254
self._title_map[url] = title
255+
if self.record_backlinks and url not in self._backlink_page_map:
256+
self._backlink_page_map[url] = page
199257

200258
def register_url(self, identifier: str, url: str) -> None:
201259
"""Register that the identifier should be turned into a link to this URL.
@@ -406,7 +464,7 @@ def map_urls(self, page: Page, anchor: AnchorLink) -> None:
406464

407465
@event_priority(-50) # Late, after mkdocstrings has finished loading inventories.
408466
def on_env(self, env: Environment, /, *, config: MkDocsConfig, files: Files) -> Environment: # noqa: ARG002
409-
"""Apply cross-references.
467+
"""Apply cross-references and collect backlinks.
410468
411469
Hook for the [`on_env` event](https://www.mkdocs.org/user-guide/plugins/#on_env).
412470
In this hook, we try to fix unresolved references of the form `[title][identifier]` or `[identifier][]`.
@@ -415,6 +473,9 @@ def on_env(self, env: Environment, /, *, config: MkDocsConfig, files: Files) ->
415473
416474
We log a warning for each reference that we couldn't map to an URL.
417475
476+
We also collect backlinks at the same time. We fix cross-refs and collect backlinks in a single pass
477+
for performance reasons (we don't want to run the regular expression on each page twice).
478+
418479
Arguments:
419480
env: The MkDocs environment.
420481
config: The MkDocs config object.
@@ -433,10 +494,14 @@ def on_env(self, env: Environment, /, *, config: MkDocsConfig, files: Files) ->
433494
from_url=file.page.url,
434495
fallback=self.get_fallback_anchor,
435496
)
497+
backlink_recorder = (
498+
functools.partial(self._record_backlink, page_url=file.page.url) if self.record_backlinks else None
499+
)
436500
# YORE: Bump 2: Replace `, _legacy_refs=self.legacy_refs` with `` within line.
437501
file.page.content, unmapped = fix_refs(
438502
file.page.content,
439503
url_mapper,
504+
record_backlink=backlink_recorder,
440505
link_titles=self._link_titles,
441506
strip_title_tags=self._strip_title_tags,
442507
_legacy_refs=self.legacy_refs,

src/mkdocs_autorefs/references.py

+41-9
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
from markdown.util import HTML_PLACEHOLDER_RE, INLINE_PLACEHOLDER_RE
2424
from markupsafe import Markup
2525

26+
from mkdocs_autorefs.backlinks import BacklinksTreeProcessor
27+
2628
if TYPE_CHECKING:
2729
from collections.abc import Iterable
2830
from pathlib import Path
@@ -328,6 +330,8 @@ class _AutorefsAttrs(dict):
328330
"filepath",
329331
"lineno",
330332
"slug",
333+
"backlink-type",
334+
"backlink-anchor",
331335
}
332336

333337
@property
@@ -416,6 +420,7 @@ def _strip_tags(html: str) -> str:
416420
def fix_ref(
417421
url_mapper: Callable[[str], tuple[str, str | None]],
418422
unmapped: list[tuple[str, AutorefsHookInterface.Context | None]],
423+
record_backlink: Callable[[str, str, str], None] | None = None,
419424
*,
420425
link_titles: bool | Literal["external"] = True,
421426
strip_title_tags: bool = False,
@@ -432,6 +437,7 @@ def fix_ref(
432437
url_mapper: A callable that gets an object's site URL by its identifier,
433438
such as [mkdocs_autorefs.plugin.AutorefsPlugin.get_item_url][].
434439
unmapped: A list to store unmapped identifiers.
440+
record_backlink: A callable to record backlinks.
435441
link_titles: How to set HTML titles on links. Always (`True`), never (`False`), or external-only (`"external"`).
436442
strip_title_tags: Whether to strip HTML tags from link titles.
437443
@@ -449,6 +455,13 @@ def inner(match: Match) -> str:
449455

450456
identifiers = (identifier, slug) if slug else (identifier,)
451457

458+
if (
459+
record_backlink
460+
and (backlink_type := attrs.get("backlink-type"))
461+
and (backlink_anchor := attrs.get("backlink-anchor"))
462+
):
463+
record_backlink(identifier, backlink_type, backlink_anchor)
464+
452465
try:
453466
url, original_title = _find_url(identifiers, url_mapper)
454467
except KeyError:
@@ -495,6 +508,7 @@ def fix_refs(
495508
html: str,
496509
url_mapper: Callable[[str], tuple[str, str | None]],
497510
*,
511+
record_backlink: Callable[[str, str, str], None] | None = None,
498512
link_titles: bool | Literal["external"] = True,
499513
strip_title_tags: bool = False,
500514
# YORE: Bump 2: Remove line.
@@ -506,6 +520,7 @@ def fix_refs(
506520
html: The text to fix.
507521
url_mapper: A callable that gets an object's site URL by its identifier,
508522
such as [mkdocs_autorefs.plugin.AutorefsPlugin.get_item_url][].
523+
record_backlink: A callable to record backlinks.
509524
link_titles: How to set HTML titles on links. Always (`True`), never (`False`), or external-only (`"external"`).
510525
strip_title_tags: Whether to strip HTML tags from link titles.
511526
@@ -514,7 +529,7 @@ def fix_refs(
514529
"""
515530
unmapped: list[tuple[str, AutorefsHookInterface.Context | None]] = []
516531
html = AUTOREF_RE.sub(
517-
fix_ref(url_mapper, unmapped, link_titles=link_titles, strip_title_tags=strip_title_tags),
532+
fix_ref(url_mapper, unmapped, record_backlink, link_titles=link_titles, strip_title_tags=strip_title_tags),
518533
html,
519534
)
520535

@@ -599,6 +614,11 @@ def _log_enabling_markdown_anchors() -> None:
599614
log.debug("Enabling Markdown anchors feature")
600615

601616

617+
@lru_cache
618+
def _log_enabling_backlinks() -> None:
619+
log.debug("Enabling backlinks feature")
620+
621+
602622
class AutorefsExtension(Extension):
603623
"""Markdown extension that transforms unresolved references into auto-references.
604624
@@ -627,7 +647,8 @@ def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent me
627647
628648
Add an instance of our [`AutorefsInlineProcessor`][mkdocs_autorefs.references.AutorefsInlineProcessor] to the Markdown parser.
629649
Also optionally add an instance of our [`AnchorScannerTreeProcessor`][mkdocs_autorefs.references.AnchorScannerTreeProcessor]
630-
to the Markdown parser if a reference to the autorefs plugin was passed to this extension.
650+
and [`BacklinksTreeProcessor`][mkdocs_autorefs.references.BacklinksTreeProcessor] to the Markdown parser
651+
if a reference to the autorefs plugin was passed to this extension.
631652
632653
Arguments:
633654
md: A `markdown.Markdown` instance.
@@ -637,10 +658,21 @@ def extendMarkdown(self, md: Markdown) -> None: # noqa: N802 (casing: parent me
637658
AutorefsInlineProcessor.name,
638659
priority=168, # Right after markdown.inlinepatterns.ReferenceInlineProcessor
639660
)
640-
if self.plugin is not None and self.plugin.scan_toc and "attr_list" in md.treeprocessors:
641-
_log_enabling_markdown_anchors()
642-
md.treeprocessors.register(
643-
AnchorScannerTreeProcessor(self.plugin, md),
644-
AnchorScannerTreeProcessor.name,
645-
priority=0,
646-
)
661+
if self.plugin is not None:
662+
# Markdown anchors require the `attr_list` extension.
663+
if self.plugin.scan_toc and "attr_list" in md.treeprocessors:
664+
_log_enabling_markdown_anchors()
665+
md.treeprocessors.register(
666+
AnchorScannerTreeProcessor(self.plugin, md),
667+
AnchorScannerTreeProcessor.name,
668+
priority=0,
669+
)
670+
# Backlinks require IDs on headings, which are either set by `toc`,
671+
# or manually by the user with `attr_list`.
672+
if self.plugin.record_backlinks and ("attr_list" in md.treeprocessors or "toc" in md.treeprocessors):
673+
_log_enabling_backlinks()
674+
md.treeprocessors.register(
675+
BacklinksTreeProcessor(self.plugin, md),
676+
BacklinksTreeProcessor.name,
677+
priority=0,
678+
)

0 commit comments

Comments
 (0)