diff --git a/Cargo.lock b/Cargo.lock index e43f712a6e23..500a150b57b8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1046,7 +1046,6 @@ dependencies = [ "arrayvec", "cov-mark", "parser", - "ra-ap-rustc_lexer", "rustc-hash", "smallvec", "span", @@ -1326,6 +1325,7 @@ dependencies = [ "base-db", "indexmap", "la-arena 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "mbe", "paths", "rustc-hash", "serde", @@ -2218,6 +2218,7 @@ name = "tt" version = "0.0.0" dependencies = [ "arrayvec", + "ra-ap-rustc_lexer", "smol_str", "stdx", "text-size", diff --git a/crates/hir-expand/src/attrs.rs b/crates/hir-expand/src/attrs.rs index 4fce7c1fde13..49a104fa118e 100644 --- a/crates/hir-expand/src/attrs.rs +++ b/crates/hir-expand/src/attrs.rs @@ -5,9 +5,10 @@ use base_db::CrateId; use cfg::CfgExpr; use either::Either; use intern::{sym, Interned}; + use mbe::{ - desugar_doc_comment_text, syntax_node_to_token_tree, token_to_literal, DelimiterKind, - DocCommentDesugarMode, Punct, + desugar_doc_comment_text, syntax_node_to_token_tree, DelimiterKind, DocCommentDesugarMode, + Punct, }; use smallvec::{smallvec, SmallVec}; use span::{Span, SyntaxContextId}; @@ -20,7 +21,7 @@ use crate::{ db::ExpandDatabase, mod_path::ModPath, span_map::SpanMapRef, - tt::{self, Subtree}, + tt::{self, token_to_literal, Subtree}, InFile, }; diff --git a/crates/hir-expand/src/lib.rs b/crates/hir-expand/src/lib.rs index c4921da61004..3460d1ca3d11 100644 --- a/crates/hir-expand/src/lib.rs +++ b/crates/hir-expand/src/lib.rs @@ -59,7 +59,7 @@ pub use span::{HirFileId, MacroCallId, MacroFileId}; pub mod tt { pub use span::Span; - pub use tt::{DelimiterKind, IdentIsRaw, LitKind, Spacing}; + pub use tt::{token_to_literal, DelimiterKind, IdentIsRaw, LitKind, Spacing}; pub type Delimiter = ::tt::Delimiter; pub type DelimSpan = ::tt::DelimSpan; diff --git a/crates/mbe/Cargo.toml b/crates/mbe/Cargo.toml index 7ce8aadfb361..1002de2104a0 100644 --- a/crates/mbe/Cargo.toml +++ b/crates/mbe/Cargo.toml @@ -17,7 +17,6 @@ rustc-hash.workspace = true smallvec.workspace = true tracing.workspace = true arrayvec.workspace = true -ra-ap-rustc_lexer.workspace = true # local deps syntax.workspace = true @@ -30,7 +29,7 @@ span.workspace = true test-utils.workspace = true [features] -in-rust-tree = ["parser/in-rust-tree", "syntax/in-rust-tree"] +in-rust-tree = ["parser/in-rust-tree", "tt/in-rust-tree", "syntax/in-rust-tree"] [lints] workspace = true diff --git a/crates/mbe/src/lib.rs b/crates/mbe/src/lib.rs index 8ab9269e9523..44b056a1acf5 100644 --- a/crates/mbe/src/lib.rs +++ b/crates/mbe/src/lib.rs @@ -6,13 +6,6 @@ //! The tests for this functionality live in another crate: //! `hir_def::macro_expansion_tests::mbe`. -#![cfg_attr(feature = "in-rust-tree", feature(rustc_private))] - -#[cfg(not(feature = "in-rust-tree"))] -extern crate ra_ap_rustc_lexer as rustc_lexer; -#[cfg(feature = "in-rust-tree")] -extern crate rustc_lexer; - mod expander; mod parser; mod syntax_bridge; @@ -36,7 +29,7 @@ pub use tt::{Delimiter, DelimiterKind, Punct}; pub use crate::syntax_bridge::{ desugar_doc_comment_text, parse_exprs_with_sep, parse_to_token_tree, parse_to_token_tree_static_span, syntax_node_to_token_tree, syntax_node_to_token_tree_modified, - token_to_literal, token_tree_to_syntax_node, DocCommentDesugarMode, SpanMapper, + token_tree_to_syntax_node, DocCommentDesugarMode, SpanMapper, }; pub use crate::syntax_bridge::dummy_test_span_utils::*; diff --git a/crates/mbe/src/syntax_bridge.rs b/crates/mbe/src/syntax_bridge.rs index 3feddba21061..4d66464932b8 100644 --- a/crates/mbe/src/syntax_bridge.rs +++ b/crates/mbe/src/syntax_bridge.rs @@ -4,7 +4,7 @@ use std::fmt; use rustc_hash::{FxHashMap, FxHashSet}; use span::{Edition, SpanAnchor, SpanData, SpanMap}; -use stdx::{format_to, itertools::Itertools, never, non_empty_vec::NonEmptyVec}; +use stdx::{format_to, never, non_empty_vec::NonEmptyVec}; use syntax::{ ast::{self, make::tokens::doc_comment}, format_smolstr, AstToken, Parse, PreorderWithTokens, SmolStr, SyntaxElement, @@ -14,6 +14,7 @@ use syntax::{ use tt::{ buffer::{Cursor, TokenBuffer}, iter::TtIter, + token_to_literal, }; use crate::to_parser_input::to_parser_input; @@ -400,56 +401,6 @@ where } } -pub fn token_to_literal(text: SmolStr, span: S) -> tt::Literal -where - S: Copy, -{ - use rustc_lexer::LiteralKind; - - let token = rustc_lexer::tokenize(&text).next_tuple(); - let Some((rustc_lexer::Token { - kind: rustc_lexer::TokenKind::Literal { kind, suffix_start }, - .. - },)) = token - else { - return tt::Literal { span, text, kind: tt::LitKind::Err(()), suffix: None }; - }; - - let (kind, start_offset, end_offset) = match kind { - LiteralKind::Int { .. } => (tt::LitKind::Integer, 0, 0), - LiteralKind::Float { .. } => (tt::LitKind::Float, 0, 0), - LiteralKind::Char { terminated } => (tt::LitKind::Char, 1, terminated as usize), - LiteralKind::Byte { terminated } => (tt::LitKind::Byte, 2, terminated as usize), - LiteralKind::Str { terminated } => (tt::LitKind::Str, 1, terminated as usize), - LiteralKind::ByteStr { terminated } => (tt::LitKind::ByteStr, 2, terminated as usize), - LiteralKind::CStr { terminated } => (tt::LitKind::CStr, 2, terminated as usize), - LiteralKind::RawStr { n_hashes } => ( - tt::LitKind::StrRaw(n_hashes.unwrap_or_default()), - 2 + n_hashes.unwrap_or_default() as usize, - 1 + n_hashes.unwrap_or_default() as usize, - ), - LiteralKind::RawByteStr { n_hashes } => ( - tt::LitKind::ByteStrRaw(n_hashes.unwrap_or_default()), - 3 + n_hashes.unwrap_or_default() as usize, - 1 + n_hashes.unwrap_or_default() as usize, - ), - LiteralKind::RawCStr { n_hashes } => ( - tt::LitKind::CStrRaw(n_hashes.unwrap_or_default()), - 3 + n_hashes.unwrap_or_default() as usize, - 1 + n_hashes.unwrap_or_default() as usize, - ), - }; - - let (lit, suffix) = text.split_at(suffix_start as usize); - let lit = &lit[start_offset..lit.len() - end_offset]; - let suffix = match suffix { - "" | "_" => None, - suffix => Some(Box::new(suffix.into())), - }; - - tt::Literal { span, text: lit.into(), kind, suffix } -} - fn is_single_token_op(kind: SyntaxKind) -> bool { matches!( kind, diff --git a/crates/proc-macro-api/Cargo.toml b/crates/proc-macro-api/Cargo.toml index 7f633d91ecc8..889eefa8b5c6 100644 --- a/crates/proc-macro-api/Cargo.toml +++ b/crates/proc-macro-api/Cargo.toml @@ -28,6 +28,8 @@ span.workspace = true # InternIds for the syntax context base-db.workspace = true la-arena.workspace = true +# only here to parse via token_to_literal +mbe.workspace = true [lints] workspace = true diff --git a/crates/proc-macro-api/src/msg.rs b/crates/proc-macro-api/src/msg.rs index b5f3d0c3aacd..65835048173a 100644 --- a/crates/proc-macro-api/src/msg.rs +++ b/crates/proc-macro-api/src/msg.rs @@ -197,7 +197,7 @@ mod tests { .into(), ), TokenTree::Leaf(Leaf::Literal(Literal { - text: "\"Foo\"".into(), + text: "Foo".into(), span: Span { range: TextRange::at(TextSize::new(10), TextSize::of("\"Foo\"")), anchor, @@ -263,32 +263,35 @@ mod tests { #[test] fn test_proc_macro_rpc_works() { let tt = fixture_token_tree(); - let mut span_data_table = Default::default(); - let task = ExpandMacro { - data: ExpandMacroData { - macro_body: FlatTree::new(&tt, CURRENT_API_VERSION, &mut span_data_table), - macro_name: Default::default(), - attributes: None, - has_global_spans: ExpnGlobals { - serialize: true, - def_site: 0, - call_site: 0, - mixed_site: 0, + for v in RUST_ANALYZER_SPAN_SUPPORT..=CURRENT_API_VERSION { + let mut span_data_table = Default::default(); + let task = ExpandMacro { + data: ExpandMacroData { + macro_body: FlatTree::new(&tt, v, &mut span_data_table), + macro_name: Default::default(), + attributes: None, + has_global_spans: ExpnGlobals { + serialize: true, + def_site: 0, + call_site: 0, + mixed_site: 0, + }, + span_data_table: Vec::new(), }, - span_data_table: Vec::new(), - }, - lib: Utf8PathBuf::from_path_buf(std::env::current_dir().unwrap()).unwrap(), - env: Default::default(), - current_dir: Default::default(), - }; + lib: Utf8PathBuf::from_path_buf(std::env::current_dir().unwrap()).unwrap(), + env: Default::default(), + current_dir: Default::default(), + }; - let json = serde_json::to_string(&task).unwrap(); - // println!("{}", json); - let back: ExpandMacro = serde_json::from_str(&json).unwrap(); + let json = serde_json::to_string(&task).unwrap(); + // println!("{}", json); + let back: ExpandMacro = serde_json::from_str(&json).unwrap(); - assert_eq!( - tt, - back.data.macro_body.to_subtree_resolved(CURRENT_API_VERSION, &span_data_table) - ); + assert_eq!( + tt, + back.data.macro_body.to_subtree_resolved(v, &span_data_table), + "version: {v}" + ); + } } } diff --git a/crates/proc-macro-api/src/msg/flat.rs b/crates/proc-macro-api/src/msg/flat.rs index 7f5afdb7270f..3d962e99d92c 100644 --- a/crates/proc-macro-api/src/msg/flat.rs +++ b/crates/proc-macro-api/src/msg/flat.rs @@ -141,6 +141,7 @@ impl FlatTree { ident: Vec::new(), token_tree: Vec::new(), text: Vec::new(), + version, }; w.write(subtree); @@ -178,6 +179,7 @@ impl FlatTree { ident: Vec::new(), token_tree: Vec::new(), text: Vec::new(), + version, }; w.write(subtree); @@ -228,6 +230,7 @@ impl FlatTree { token_tree: self.token_tree, text: self.text, span_data_table, + version, } .read() } @@ -253,6 +256,7 @@ impl FlatTree { token_tree: self.token_tree, text: self.text, span_data_table: &(), + version, } .read() } @@ -386,8 +390,9 @@ impl InternableSpan for Span { struct Writer<'a, 'span, S: InternableSpan> { work: VecDeque<(usize, &'a tt::Subtree)>, - string_table: FxHashMap<&'a str, u32>, + string_table: FxHashMap, u32>, span_data_table: &'span mut S::Table, + version: u32, subtree: Vec, literal: Vec, @@ -425,9 +430,15 @@ impl<'a, 'span, S: InternableSpan> Writer<'a, 'span, S> { tt::TokenTree::Leaf(leaf) => match leaf { tt::Leaf::Literal(lit) => { let idx = self.literal.len() as u32; - let text = self.intern(&lit.text); let id = self.token_id_of(lit.span); - let suffix = lit.suffix.as_ref().map(|s| self.intern(s)).unwrap_or(!0); + let (text, suffix) = if self.version >= EXTENDED_LEAF_DATA { + ( + self.intern(&lit.text), + lit.suffix.as_ref().map(|s| self.intern(s)).unwrap_or(!0), + ) + } else { + (self.intern_owned(format!("{lit}")), !0) + }; self.literal.push(LiteralRepr { id, text, @@ -456,13 +467,15 @@ impl<'a, 'span, S: InternableSpan> Writer<'a, 'span, S> { } tt::Leaf::Ident(ident) => { let idx = self.ident.len() as u32; - let text = self.intern(&ident.text); let id = self.token_id_of(ident.span); - self.ident.push(IdentRepr { - id, - text, - is_raw: ident.is_raw == tt::IdentIsRaw::Yes, - }); + let text = if self.version >= EXTENDED_LEAF_DATA { + self.intern(&ident.text) + } else if ident.is_raw.yes() { + self.intern_owned(format!("r#{}", ident.text,)) + } else { + self.intern(&ident.text) + }; + self.ident.push(IdentRepr { id, text, is_raw: ident.is_raw.yes() }); idx << 2 | 0b11 } }, @@ -484,15 +497,25 @@ impl<'a, 'span, S: InternableSpan> Writer<'a, 'span, S> { pub(crate) fn intern(&mut self, text: &'a str) -> u32 { let table = &mut self.text; - *self.string_table.entry(text).or_insert_with(|| { + *self.string_table.entry(text.into()).or_insert_with(|| { let idx = table.len(); table.push(text.to_owned()); idx as u32 }) } + + pub(crate) fn intern_owned(&mut self, text: String) -> u32 { + let table = &mut self.text; + *self.string_table.entry(text.clone().into()).or_insert_with(|| { + let idx = table.len(); + table.push(text); + idx as u32 + }) + } } struct Reader<'span, S: InternableSpan> { + version: u32, subtree: Vec, literal: Vec, punct: Vec, @@ -528,30 +551,36 @@ impl<'span, S: InternableSpan> Reader<'span, S> { 0b01 => { use tt::LitKind::*; let repr = &self.literal[idx]; - tt::Leaf::Literal(tt::Literal { - text: self.text[repr.text as usize].as_str().into(), - span: read_span(repr.id), - kind: match u16::to_le_bytes(repr.kind) { - [0, _] => Err(()), - [1, _] => Byte, - [2, _] => Char, - [3, _] => Integer, - [4, _] => Float, - [5, _] => Str, - [6, r] => StrRaw(r), - [7, _] => ByteStr, - [8, r] => ByteStrRaw(r), - [9, _] => CStr, - [10, r] => CStrRaw(r), - _ => unreachable!(), - }, - suffix: if repr.suffix != !0 { - Some(Box::new( - self.text[repr.suffix as usize].as_str().into(), - )) - } else { - None - }, + let text = self.text[repr.text as usize].as_str(); + let span = read_span(repr.id); + tt::Leaf::Literal(if self.version >= EXTENDED_LEAF_DATA { + tt::Literal { + text: text.into(), + span, + kind: match u16::to_le_bytes(repr.kind) { + [0, _] => Err(()), + [1, _] => Byte, + [2, _] => Char, + [3, _] => Integer, + [4, _] => Float, + [5, _] => Str, + [6, r] => StrRaw(r), + [7, _] => ByteStr, + [8, r] => ByteStrRaw(r), + [9, _] => CStr, + [10, r] => CStrRaw(r), + _ => unreachable!(), + }, + suffix: if repr.suffix != !0 { + Some(Box::new( + self.text[repr.suffix as usize].as_str().into(), + )) + } else { + None + }, + } + } else { + tt::token_to_literal(text.into(), span) }) .into() } @@ -566,14 +595,23 @@ impl<'span, S: InternableSpan> Reader<'span, S> { } 0b11 => { let repr = &self.ident[idx]; + let text = self.text[repr.text as usize].as_str(); + let (is_raw, text) = if self.version >= EXTENDED_LEAF_DATA { + ( + if repr.is_raw { + tt::IdentIsRaw::Yes + } else { + tt::IdentIsRaw::No + }, + text, + ) + } else { + tt::IdentIsRaw::split_from_symbol(text) + }; tt::Leaf::Ident(tt::Ident { - text: self.text[repr.text as usize].as_str().into(), + text: text.into(), span: read_span(repr.id), - is_raw: if repr.is_raw { - tt::IdentIsRaw::Yes - } else { - tt::IdentIsRaw::No - }, + is_raw, }) .into() } diff --git a/crates/proc-macro-srv/Cargo.toml b/crates/proc-macro-srv/Cargo.toml index 735f781c4391..065701c05cc7 100644 --- a/crates/proc-macro-srv/Cargo.toml +++ b/crates/proc-macro-srv/Cargo.toml @@ -34,7 +34,7 @@ proc-macro-test.path = "./proc-macro-test" [features] sysroot-abi = [] -in-rust-tree = ["mbe/in-rust-tree", "sysroot-abi"] +in-rust-tree = ["mbe/in-rust-tree", "tt/in-rust-tree","sysroot-abi"] [lints] workspace = true diff --git a/crates/tt/Cargo.toml b/crates/tt/Cargo.toml index 1311e2ddf89e..1900635b995e 100644 --- a/crates/tt/Cargo.toml +++ b/crates/tt/Cargo.toml @@ -17,6 +17,10 @@ smol_str.workspace = true text-size.workspace = true stdx.workspace = true +ra-ap-rustc_lexer.workspace = true + +[features] +in-rust-tree = [] [lints] workspace = true diff --git a/crates/tt/src/lib.rs b/crates/tt/src/lib.rs index 24fd0abada5f..c328b3f8a3cc 100644 --- a/crates/tt/src/lib.rs +++ b/crates/tt/src/lib.rs @@ -2,14 +2,21 @@ //! input and output) of macros. It closely mirrors `proc_macro` crate's //! `TokenTree`. +#![cfg_attr(feature = "in-rust-tree", feature(rustc_private))] + +#[cfg(not(feature = "in-rust-tree"))] +extern crate ra_ap_rustc_lexer as rustc_lexer; +#[cfg(feature = "in-rust-tree")] +extern crate rustc_lexer; + pub mod buffer; pub mod iter; use std::fmt; -use stdx::impl_from; +use stdx::{impl_from, itertools::Itertools as _}; -pub use smol_str::SmolStr; +pub use smol_str::{format_smolstr, SmolStr}; pub use text_size::{TextRange, TextSize}; #[derive(Clone, PartialEq, Debug)] @@ -196,6 +203,56 @@ pub struct Literal { pub suffix: Option>, } +pub fn token_to_literal(text: SmolStr, span: S) -> Literal +where + S: Copy, +{ + use rustc_lexer::LiteralKind; + + let token = rustc_lexer::tokenize(&text).next_tuple(); + let Some((rustc_lexer::Token { + kind: rustc_lexer::TokenKind::Literal { kind, suffix_start }, + .. + },)) = token + else { + return Literal { span, text, kind: LitKind::Err(()), suffix: None }; + }; + + let (kind, start_offset, end_offset) = match kind { + LiteralKind::Int { .. } => (LitKind::Integer, 0, 0), + LiteralKind::Float { .. } => (LitKind::Float, 0, 0), + LiteralKind::Char { terminated } => (LitKind::Char, 1, terminated as usize), + LiteralKind::Byte { terminated } => (LitKind::Byte, 2, terminated as usize), + LiteralKind::Str { terminated } => (LitKind::Str, 1, terminated as usize), + LiteralKind::ByteStr { terminated } => (LitKind::ByteStr, 2, terminated as usize), + LiteralKind::CStr { terminated } => (LitKind::CStr, 2, terminated as usize), + LiteralKind::RawStr { n_hashes } => ( + LitKind::StrRaw(n_hashes.unwrap_or_default()), + 2 + n_hashes.unwrap_or_default() as usize, + 1 + n_hashes.unwrap_or_default() as usize, + ), + LiteralKind::RawByteStr { n_hashes } => ( + LitKind::ByteStrRaw(n_hashes.unwrap_or_default()), + 3 + n_hashes.unwrap_or_default() as usize, + 1 + n_hashes.unwrap_or_default() as usize, + ), + LiteralKind::RawCStr { n_hashes } => ( + LitKind::CStrRaw(n_hashes.unwrap_or_default()), + 3 + n_hashes.unwrap_or_default() as usize, + 1 + n_hashes.unwrap_or_default() as usize, + ), + }; + + let (lit, suffix) = text.split_at(suffix_start as usize); + let lit = &lit[start_offset..lit.len() - end_offset]; + let suffix = match suffix { + "" | "_" => None, + suffix => Some(Box::new(suffix.into())), + }; + + Literal { span, text: lit.into(), kind, suffix } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct Punct { pub char: char,