|
5 | 5 | * file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
6 | 6 | */
|
7 | 7 |
|
8 |
| -//! Converts [Markdown](https://en.wikipedia.org/wiki/Markdown) to [BBCode](https://en.wikipedia.org/wiki/BBCode). |
| 8 | +//! Converts [Markdown](https://en.wikipedia.org/wiki/Markdown) to Godot-compatible [BBCode](https://en.wikipedia.org/wiki/BBCode). |
9 | 9 |
|
10 | 10 | use markdown::mdast as md;
|
11 | 11 | use markdown::{to_mdast, ParseOptions};
|
12 |
| -use std::collections::HashMap; |
| 12 | +use std::collections::{BTreeMap, HashMap}; |
13 | 13 |
|
14 |
| -pub fn to_bbcode(md: &str) -> String { |
| 14 | +/// Converts the provided Markdown string to BBCode suitable for Godot's docs renderer. |
| 15 | +/// Simulates any missing features (e.g. tables) with a best-effort approach. |
| 16 | +pub fn to_bbcode(md_text: &str) -> String { |
15 | 17 | // to_mdast() never errors with normal Markdown, so unwrap is safe.
|
16 |
| - let n = to_mdast(md, &ParseOptions::gfm()).unwrap(); |
| 18 | + let root = to_mdast(md_text, &ParseOptions::gfm()).unwrap(); |
17 | 19 |
|
18 |
| - let definitions = n |
| 20 | + // Collect link/image definitions (for reference-style links). |
| 21 | + let definitions = root |
19 | 22 | .children()
|
20 |
| - .unwrap() // root node always has children |
| 23 | + .expect("Markdown root node should always have children") |
21 | 24 | .iter()
|
22 |
| - .filter_map(|n| match n { |
| 25 | + .filter_map(|node| match node { |
23 | 26 | md::Node::Definition(def) => Some((&*def.identifier, &*def.url)),
|
24 | 27 | _ => None,
|
25 | 28 | })
|
26 | 29 | .collect::<HashMap<_, _>>();
|
27 | 30 |
|
28 |
| - walk_node(&n, &definitions).unwrap_or_default() |
29 |
| -} |
| 31 | + // Convert the root node to BBCode. |
| 32 | + let mut converter = BBCodeConverter::new(&definitions); |
| 33 | + let content = converter.walk_node(&root, 0).unwrap_or_default(); |
30 | 34 |
|
31 |
| -fn walk_node(node: &md::Node, definitions: &HashMap<&str, &str>) -> Option<String> { |
32 |
| - use md::Node::*; |
| 35 | + // Append footnotes at the bottom if any. |
| 36 | + if !converter.footnote_defs.is_empty() { |
| 37 | + let notes = converter |
| 38 | + .footnote_defs |
| 39 | + .iter() |
| 40 | + .map(|(idx, text)| format!("{} {}", BBCodeConverter::superscript(*idx), text)) |
| 41 | + .collect::<Vec<_>>() |
| 42 | + .join("[br]"); |
| 43 | + format!("{content}[br][br]{notes}") |
| 44 | + } else { |
| 45 | + content |
| 46 | + } |
| 47 | +} |
33 | 48 |
|
34 |
| - let bbcode = match node { |
35 |
| - Root(root) => walk_nodes(&root.children, definitions, "[br][br]"), |
| 49 | +/// Manages the context needed to convert Markdown AST to Godot-compatible BBCode. |
| 50 | +pub struct BBCodeConverter<'a> { |
| 51 | + /// Link/image references from the Markdown AST. Key is the identifier, value is the URL. |
| 52 | + link_reference_map: &'a HashMap<&'a str, &'a str>, |
36 | 53 |
|
37 |
| - InlineCode(md::InlineCode { value, .. }) => format!("[code]{value}[/code]"), |
| 54 | + /// Footnote label -> numeric index. |
| 55 | + footnote_map: HashMap<String, usize>, |
38 | 56 |
|
39 |
| - Delete(delete) => format!("[s]{}[/s]", walk_nodes(&delete.children, definitions, "")), |
| 57 | + /// Footnotes (index -> rendered text), sorted by index. |
| 58 | + footnote_defs: BTreeMap<usize, String>, |
40 | 59 |
|
41 |
| - Emphasis(emphasis) => format!("[i]{}[/i]", walk_nodes(&emphasis.children, definitions, "")), |
| 60 | + /// Current footnote index (i.e. the index last used, before incrementing). |
| 61 | + current_footnote_index: usize, |
| 62 | +} |
42 | 63 |
|
43 |
| - Image(md::Image { url, .. }) => format!("[img]{url}[/img]",), |
| 64 | +// Given a Vec of Strings, if the Vec is empty, return None. Otherwise, join the strings |
| 65 | +// with a separator and return the result. |
| 66 | +fn join_if_not_empty(strings: &[String], sep: &str) -> Option<String> { |
| 67 | + if strings.is_empty() { |
| 68 | + None |
| 69 | + } else { |
| 70 | + Some(strings.join(sep)) |
| 71 | + } |
| 72 | +} |
44 | 73 |
|
45 |
| - ImageReference(image) => { |
46 |
| - format!( |
47 |
| - "[img]{}[/img]", |
48 |
| - definitions.get(&&*image.identifier).unwrap() |
49 |
| - ) |
| 74 | +impl<'a> BBCodeConverter<'a> { |
| 75 | + /// Creates a new converter with the provided link/image definitions. |
| 76 | + pub fn new(link_reference_map: &'a HashMap<&'a str, &'a str>) -> Self { |
| 77 | + Self { |
| 78 | + link_reference_map, |
| 79 | + footnote_map: HashMap::new(), |
| 80 | + footnote_defs: BTreeMap::new(), |
| 81 | + current_footnote_index: 0, |
50 | 82 | }
|
| 83 | + } |
51 | 84 |
|
52 |
| - Link(md::Link { url, children, .. }) => { |
53 |
| - format!("[url={url}]{}[/url]", walk_nodes(children, definitions, "")) |
54 |
| - } |
| 85 | + /// Walk an AST node and return its BBCode. Returns `None` if the node should be |
| 86 | + /// ignored. |
| 87 | + /// |
| 88 | + /// `level` is used for nesting (e.g. lists). |
| 89 | + pub fn walk_node(&mut self, node: &md::Node, level: usize) -> Option<String> { |
| 90 | + use md::Node::*; |
55 | 91 |
|
56 |
| - LinkReference(md::LinkReference { |
57 |
| - identifier, |
58 |
| - children, |
59 |
| - .. |
60 |
| - }) => format!( |
61 |
| - "[url={}]{}[/url]", |
62 |
| - definitions.get(&&**identifier).unwrap(), |
63 |
| - walk_nodes(children, definitions, "") |
64 |
| - ), |
| 92 | + let result = match node { |
| 93 | + // Root node: treat children as top-level blocks. |
| 94 | + // We join each block with [br][br], a double line break. |
| 95 | + Root(md::Root { children, .. }) => { |
| 96 | + let block_strs: Vec<_> = children |
| 97 | + .iter() |
| 98 | + .filter_map(|child| self.walk_node(child, level)) |
| 99 | + .collect(); |
65 | 100 |
|
66 |
| - Strong(strong) => format!("[b]{}[/b]", walk_nodes(&strong.children, definitions, "")), |
| 101 | + join_if_not_empty(&block_strs, "[br][br]")? |
| 102 | + } |
67 | 103 |
|
68 |
| - Text(text) => text.value.clone(), |
| 104 | + // Paragraph: gather inline children as a single line. |
| 105 | + Paragraph(md::Paragraph { children, .. }) => self.walk_inline_nodes(children, level), |
69 | 106 |
|
70 |
| - // TODO: more langs? |
71 |
| - Code(md::Code { value, .. }) => format!("[codeblock]{value}[/codeblock]"), |
| 107 | + // Inline code -> [code]...[/code] |
| 108 | + InlineCode(md::InlineCode { value, .. }) => format!("[code]{value}[/code]"), |
72 | 109 |
|
73 |
| - Paragraph(paragraph) => walk_nodes(¶graph.children, definitions, ""), |
| 110 | + // Strikethrough -> [s]...[/s] |
| 111 | + Delete(md::Delete { children, .. }) => { |
| 112 | + let inner = self.walk_inline_nodes(children, level); |
| 113 | + format!("[s]{inner}[/s]") |
| 114 | + } |
74 | 115 |
|
75 |
| - // BBCode supports lists, but docs don't. |
76 |
| - List(_) | Blockquote(_) | FootnoteReference(_) | FootnoteDefinition(_) | Table(_) => { |
77 |
| - String::new() |
78 |
| - } |
| 116 | + // Italic -> [i]...[/i] |
| 117 | + Emphasis(md::Emphasis { children, .. }) => { |
| 118 | + let inner = self.walk_inline_nodes(children, level); |
| 119 | + format!("[i]{inner}[/i]") |
| 120 | + } |
79 | 121 |
|
80 |
| - Html(html) => html.value.clone(), |
| 122 | + // Bold -> [b]...[/b] |
| 123 | + Strong(md::Strong { children, .. }) => { |
| 124 | + let inner = self.walk_inline_nodes(children, level); |
| 125 | + format!("[b]{inner}[/b]") |
| 126 | + } |
81 | 127 |
|
82 |
| - _ => walk_nodes(node.children()?, definitions, ""), |
83 |
| - }; |
| 128 | + // Plain text -> just the text, with newlines replaced by spaces. |
| 129 | + Text(md::Text { value, .. }) => value.replace("\n", " "), |
84 | 130 |
|
85 |
| - Some(bbcode) |
86 |
| -} |
| 131 | + // Heading -> single line, "fake" heading with [b]...[/b] |
| 132 | + Heading(md::Heading { children, .. }) => { |
| 133 | + let inner = self.walk_inline_nodes(children, level); |
| 134 | + format!("[b]{inner}[/b]") |
| 135 | + } |
87 | 136 |
|
88 |
| -/// Calls [`walk_node`] over every node it receives, joining them with the supplied separator. |
89 |
| -fn walk_nodes(nodes: &[md::Node], definitions: &HashMap<&str, &str>, separator: &str) -> String { |
90 |
| - nodes |
91 |
| - .iter() |
92 |
| - .filter_map(|n| walk_node(n, definitions)) |
93 |
| - .collect::<Vec<_>>() |
94 |
| - .join(separator) |
| 137 | + // Blockquote -> each child is effectively a block. We gather them with a single |
| 138 | + // [br] in between, then prefix each resulting line with "> ". |
| 139 | + Blockquote(md::Blockquote { children, .. }) => { |
| 140 | + let child_blocks: Vec<_> = children |
| 141 | + .iter() |
| 142 | + .filter_map(|child| self.walk_node(child, level)) |
| 143 | + .collect(); |
| 144 | + let content = child_blocks.join("[br]"); // Each child is a block. |
| 145 | + |
| 146 | + // Prefix each line with "> ". |
| 147 | + let mut out = String::new(); |
| 148 | + for (i, line) in content.split("[br]").enumerate() { |
| 149 | + if i > 0 { |
| 150 | + out.push_str("[br]"); |
| 151 | + } |
| 152 | + out.push_str("> "); |
| 153 | + out.push_str(line); |
| 154 | + } |
| 155 | + out |
| 156 | + } |
| 157 | + |
| 158 | + // Code block -> [codeblock lang=??]...[/codeblock] |
| 159 | + Code(md::Code { value, lang, .. }) => { |
| 160 | + let maybe_lang = lang |
| 161 | + .as_ref() |
| 162 | + .map(|l| format!(" lang={l}")) |
| 163 | + .unwrap_or_default(); |
| 164 | + format!("[codeblock{maybe_lang}]{value}[/codeblock]") |
| 165 | + } |
| 166 | + |
| 167 | + // List -> each item is on its own line with indentation. |
| 168 | + // For ordered lists, we use a counter we increment for each item. |
| 169 | + // For unordered lists, we use '•'. |
| 170 | + List(md::List { |
| 171 | + ordered, |
| 172 | + start, |
| 173 | + children, |
| 174 | + .. |
| 175 | + }) => { |
| 176 | + let indent = " ".repeat(level * 4); |
| 177 | + let mut counter = start.unwrap_or(0); |
| 178 | + |
| 179 | + let mut lines = Vec::new(); |
| 180 | + for item_node in children.iter() { |
| 181 | + if let md::Node::ListItem(item) = item_node { |
| 182 | + // Converts the item's children. These may be paragraphs or sub-lists, etc. |
| 183 | + // We join multiple paragraphs in the same item with [br]. |
| 184 | + let item_str = self.walk_nodes_as_block(&item.children, level + 1); |
| 185 | + let bullet = if *ordered { |
| 186 | + counter += 1; |
| 187 | + format!("{counter}.") |
| 188 | + } else { |
| 189 | + "•".to_string() |
| 190 | + }; |
| 191 | + let checkbox = match item.checked { |
| 192 | + Some(true) => "[x] ", |
| 193 | + Some(false) => "[ ] ", |
| 194 | + None => "", |
| 195 | + }; |
| 196 | + |
| 197 | + lines.push(format!("{indent}{bullet} {checkbox}{item_str}")); |
| 198 | + } |
| 199 | + } |
| 200 | + |
| 201 | + join_if_not_empty(&lines, "[br]")? |
| 202 | + } |
| 203 | + |
| 204 | + // Footnote reference -> a superscript number. |
| 205 | + FootnoteReference(md::FootnoteReference { label, .. }) => { |
| 206 | + if let Some(label) = label { |
| 207 | + let idx = *self.footnote_map.entry(label.clone()).or_insert_with(|| { |
| 208 | + self.current_footnote_index += 1; |
| 209 | + self.current_footnote_index |
| 210 | + }); |
| 211 | + Self::superscript(idx) |
| 212 | + } else { |
| 213 | + return None; |
| 214 | + } |
| 215 | + } |
| 216 | + |
| 217 | + // Footnote definition -> keep track of it, but produce no output here. |
| 218 | + FootnoteDefinition(md::FootnoteDefinition { |
| 219 | + label, children, .. |
| 220 | + }) => { |
| 221 | + if let Some(label) = label { |
| 222 | + let idx = *self.footnote_map.entry(label.clone()).or_insert_with(|| { |
| 223 | + self.current_footnote_index += 1; |
| 224 | + self.current_footnote_index |
| 225 | + }); |
| 226 | + let def_content = self.walk_nodes_as_block(children, level); |
| 227 | + self.footnote_defs.insert(idx, def_content); |
| 228 | + } |
| 229 | + |
| 230 | + return None; |
| 231 | + } |
| 232 | + |
| 233 | + // Image -> [url=URL]URL[/url] |
| 234 | + Image(md::Image { url, .. }) => format!("[url={url}]{url}[/url]"), |
| 235 | + |
| 236 | + // Reference-style image -> [url=URL]URL[/url] |
| 237 | + ImageReference(md::ImageReference { identifier, .. }) => { |
| 238 | + let url = self.link_reference_map.get(&**identifier).unwrap_or(&""); |
| 239 | + format!("[url={url}]{url}[/url]") |
| 240 | + } |
| 241 | + |
| 242 | + // Explicit link -> [url=URL]...[/url] |
| 243 | + Link(md::Link { url, children, .. }) => { |
| 244 | + let inner = self.walk_inline_nodes(children, level); |
| 245 | + format!("[url={url}]{inner}[/url]") |
| 246 | + } |
| 247 | + |
| 248 | + // Reference-style link -> [url=URL]...[/url] |
| 249 | + LinkReference(md::LinkReference { |
| 250 | + identifier, |
| 251 | + children, |
| 252 | + .. |
| 253 | + }) => { |
| 254 | + let url = self.link_reference_map.get(&**identifier).unwrap_or(&""); |
| 255 | + let inner = self.walk_inline_nodes(children, level); |
| 256 | + format!("[url={url}]{inner}[/url]") |
| 257 | + } |
| 258 | + |
| 259 | + // Table: approximate by reading rows as block lines. |
| 260 | + Table(md::Table { children, .. }) => { |
| 261 | + let rows: Vec<String> = children |
| 262 | + .iter() |
| 263 | + .filter_map(|row| self.walk_node(row, level)) |
| 264 | + .collect(); |
| 265 | + |
| 266 | + join_if_not_empty(&rows, "[br]")? |
| 267 | + } |
| 268 | + |
| 269 | + // TableRow -> gather cells separated by " | ". |
| 270 | + md::Node::TableRow(md::TableRow { children, .. }) => { |
| 271 | + let cells: Vec<String> = children |
| 272 | + .iter() |
| 273 | + .filter_map(|cell| self.walk_node(cell, level)) |
| 274 | + .collect(); |
| 275 | + cells.join(" | ") |
| 276 | + } |
| 277 | + |
| 278 | + // TableCell -> treat as inline. |
| 279 | + md::Node::TableCell(md::TableCell { children, .. }) => { |
| 280 | + self.walk_inline_nodes(children, level) |
| 281 | + } |
| 282 | + |
| 283 | + // Raw HTML -> output as-is. |
| 284 | + Html(md::Html { value, .. }) => value.clone(), |
| 285 | + |
| 286 | + // Hard line break -> single line break, with indentation if needed. |
| 287 | + Break(_) => format!("[br]{}", " ".repeat(level * 4)), |
| 288 | + |
| 289 | + // Fallback: just walk children. |
| 290 | + _ => { |
| 291 | + let children = node.children()?; |
| 292 | + self.walk_inline_nodes(children, level) |
| 293 | + } |
| 294 | + }; |
| 295 | + |
| 296 | + Some(result) |
| 297 | + } |
| 298 | + |
| 299 | + /// Collects multiple sibling nodes that might be block-level (list items, etc.), |
| 300 | + /// joining them with `[br]`. Ignores nodes that return `None`. If all nodes return |
| 301 | + /// `None`, returns an empty string, as if the block was empty, since this function |
| 302 | + /// is called when we expect a block of content, even if it's empty. |
| 303 | + fn walk_nodes_as_block(&mut self, nodes: &[md::Node], level: usize) -> String { |
| 304 | + let mut pieces = Vec::new(); |
| 305 | + for node in nodes { |
| 306 | + if let Some(s) = self.walk_node(node, level) { |
| 307 | + pieces.push(s); |
| 308 | + } |
| 309 | + } |
| 310 | + pieces.join("[br]") |
| 311 | + } |
| 312 | + |
| 313 | + /// Gathers children as an inline sequence: no forced breaks between them. Ignores |
| 314 | + /// nodes that return `None`. If all nodes return `None`, returns an empty string, |
| 315 | + /// as if the block was empty, since this function is called when we expect a block |
| 316 | + /// of content, even if it's empty. |
| 317 | + fn walk_inline_nodes(&mut self, children: &[md::Node], level: usize) -> String { |
| 318 | + let mut out = String::new(); |
| 319 | + for child in children { |
| 320 | + if let Some(s) = self.walk_node(child, level) { |
| 321 | + out.push_str(&s); |
| 322 | + } |
| 323 | + } |
| 324 | + out |
| 325 | + } |
| 326 | + |
| 327 | + /// Convert a numeric index into a Unicode superscript (e.g. 123 -> ¹²³). |
| 328 | + pub fn superscript(idx: usize) -> String { |
| 329 | + const SUPS: &[char] = &['⁰', '¹', '²', '³', '⁴', '⁵', '⁶', '⁷', '⁸', '⁹']; |
| 330 | + idx.to_string() |
| 331 | + .chars() |
| 332 | + .filter_map(|c| c.to_digit(10).map(|d| SUPS[d as usize])) |
| 333 | + .collect() |
| 334 | + } |
95 | 335 | }
|
0 commit comments