6
6
import re
7
7
import warnings
8
8
from html import escape , unescape
9
+ from html .parser import HTMLParser
9
10
from typing import TYPE_CHECKING , Any , Callable , ClassVar , Match
10
11
from urllib .parse import urlsplit
11
12
from xml .etree .ElementTree import Element
@@ -44,7 +45,12 @@ def __getattr__(name: str) -> Any:
44
45
rf"(?: class=(?P<class>{ _ATTR_VALUE } ))?(?P<attrs> [^<>]+)?>(?P<title>.*?)</span>" ,
45
46
flags = re .DOTALL ,
46
47
)
47
- """A regular expression to match mkdocs-autorefs' special reference markers
48
+ """Deprecated. Use [`AUTOREF_RE`][mkdocs_autorefs.references.AUTOREF_RE] instead."""
49
+
50
+ AUTOREF_RE = re .compile (r"<autoref (?P<attrs>.*?)>(?P<title>.*?)</autoref>" , flags = re .DOTALL )
51
+ """The autoref HTML tag regular expression.
52
+
53
+ A regular expression to match mkdocs-autorefs' special reference markers
48
54
in the [`on_post_page` hook][mkdocs_autorefs.plugin.AutorefsPlugin.on_post_page].
49
55
"""
50
56
@@ -135,8 +141,8 @@ def _make_tag(self, identifier: str, text: str) -> Element:
135
141
Returns:
136
142
A new element.
137
143
"""
138
- el = Element ("span " )
139
- el .set ("data-autorefs- identifier" , identifier )
144
+ el = Element ("autoref " )
145
+ el .set ("identifier" , identifier )
140
146
el .text = text
141
147
return el
142
148
@@ -167,7 +173,7 @@ def relative_url(url_a: str, url_b: str) -> str:
167
173
return f"{ relative } #{ anchor } "
168
174
169
175
170
- def fix_ref (url_mapper : Callable [[str ], str ], unmapped : list [str ]) -> Callable :
176
+ def _legacy_fix_ref (url_mapper : Callable [[str ], str ], unmapped : list [str ]) -> Callable :
171
177
"""Return a `repl` function for [`re.sub`](https://docs.python.org/3/library/re.html#re.sub).
172
178
173
179
In our context, we match Markdown references and replace them with HTML links.
@@ -216,7 +222,84 @@ def inner(match: Match) -> str:
216
222
return inner
217
223
218
224
219
- def fix_refs (html : str , url_mapper : Callable [[str ], str ]) -> tuple [str , list [str ]]:
225
+ class _AutorefsAttrs (dict ):
226
+ _handled_attrs : ClassVar [set [str ]] = {"identifier" , "optional" , "hover" , "class" }
227
+
228
+ @property
229
+ def remaining (self ) -> str :
230
+ return " " .join (k if v is None else f'{ k } ="{ v } "' for k , v in self .items () if k not in self ._handled_attrs )
231
+
232
+
233
+ class _HTMLAttrsParser (HTMLParser ):
234
+ def __init__ (self ):
235
+ super ().__init__ ()
236
+ self .attrs = {}
237
+
238
+ def parse (self , html : str ) -> _AutorefsAttrs :
239
+ self .attrs .clear ()
240
+ self .feed (html )
241
+ return _AutorefsAttrs (self .attrs )
242
+
243
+ def handle_starttag (self , tag : str , attrs : list [tuple [str , str | None ]]) -> None : # noqa: ARG002
244
+ self .attrs .update (attrs )
245
+
246
+
247
+ _html_attrs_parser = _HTMLAttrsParser ()
248
+
249
+
250
+ def fix_ref (url_mapper : Callable [[str ], str ], unmapped : list [str ]) -> Callable :
251
+ """Return a `repl` function for [`re.sub`](https://docs.python.org/3/library/re.html#re.sub).
252
+
253
+ In our context, we match Markdown references and replace them with HTML links.
254
+
255
+ When the matched reference's identifier was not mapped to an URL, we append the identifier to the outer
256
+ `unmapped` list. It generally means the user is trying to cross-reference an object that was not collected
257
+ and rendered, making it impossible to link to it. We catch this exception in the caller to issue a warning.
258
+
259
+ Arguments:
260
+ url_mapper: A callable that gets an object's site URL by its identifier,
261
+ such as [mkdocs_autorefs.plugin.AutorefsPlugin.get_item_url][].
262
+ unmapped: A list to store unmapped identifiers.
263
+
264
+ Returns:
265
+ The actual function accepting a [`Match` object](https://docs.python.org/3/library/re.html#match-objects)
266
+ and returning the replacement strings.
267
+ """
268
+
269
+ def inner (match : Match ) -> str :
270
+ title = match ["title" ]
271
+ attrs = _html_attrs_parser .parse (f"<a { match ['attrs' ]} >" )
272
+ identifier : str = attrs ["identifier" ]
273
+ optional = "optional" in attrs
274
+ hover = "hover" in attrs
275
+
276
+ try :
277
+ url = url_mapper (unescape (identifier ))
278
+ except KeyError :
279
+ if optional :
280
+ if hover :
281
+ return f'<span title="{ identifier } ">{ title } </span>'
282
+ return title
283
+ unmapped .append (identifier )
284
+ if title == identifier :
285
+ return f"[{ identifier } ][]"
286
+ return f"[{ title } ][{ identifier } ]"
287
+
288
+ parsed = urlsplit (url )
289
+ external = parsed .scheme or parsed .netloc
290
+ classes = (attrs .get ("class" ) or "" ).strip ().split ()
291
+ classes = ["autorefs" , "autorefs-external" if external else "autorefs-internal" , * classes ]
292
+ class_attr = " " .join (classes )
293
+ if remaining := attrs .remaining :
294
+ remaining = f" { remaining } "
295
+ if optional and hover :
296
+ return f'<a class="{ class_attr } " title="{ identifier } " href="{ escape (url )} "{ remaining } >{ title } </a>'
297
+ return f'<a class="{ class_attr } " href="{ escape (url )} "{ remaining } >{ title } </a>'
298
+
299
+ return inner
300
+
301
+
302
+ def fix_refs (html : str , url_mapper : Callable [[str ], str ], * , _legacy_refs : bool = True ) -> tuple [str , list [str ]]:
220
303
"""Fix all references in the given HTML text.
221
304
222
305
Arguments:
@@ -228,7 +311,9 @@ def fix_refs(html: str, url_mapper: Callable[[str], str]) -> tuple[str, list[str
228
311
The fixed HTML.
229
312
"""
230
313
unmapped : list [str ] = []
231
- html = AUTO_REF_RE .sub (fix_ref (url_mapper , unmapped ), html )
314
+ html = AUTOREF_RE .sub (fix_ref (url_mapper , unmapped ), html )
315
+ if _legacy_refs :
316
+ html = AUTO_REF_RE .sub (_legacy_fix_ref (url_mapper , unmapped ), html )
232
317
return html , unmapped
233
318
234
319
0 commit comments