Skip to content

Fix tt::Punct's spacing calculation #13548

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 11, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions crates/hir-def/src/macro_expansion_tests/mbe/matching.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,11 @@ macro_rules! m {
($($s:stmt)*) => (stringify!($($s |)*);)
}
stringify!(;
|;
|92|;
|let x = 92|;
| ;
|92| ;
|let x = 92| ;
|loop {}
|;
| ;
|);
"#]],
);
Expand All @@ -118,7 +118,7 @@ m!(.. .. ..);
macro_rules! m {
($($p:pat)*) => (stringify!($($p |)*);)
}
stringify!(.. .. ..|);
stringify!(.. .. .. |);
"#]],
);
}
Expand Down
6 changes: 3 additions & 3 deletions crates/hir-def/src/macro_expansion_tests/proc_macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,14 @@ fn attribute_macro_syntax_completion_2() {
#[proc_macros::identity_when_valid]
fn foo() { bar.; blub }
"#,
expect![[r##"
expect![[r#"
#[proc_macros::identity_when_valid]
fn foo() { bar.; blub }

fn foo() {
bar.;
bar. ;
blub
}"##]],
}"#]],
);
}

Expand Down
23 changes: 10 additions & 13 deletions crates/hir-expand/src/fixup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -293,14 +293,10 @@ pub(crate) fn reverse_fixups(
undo_info: &SyntaxFixupUndoInfo,
) {
tt.token_trees.retain(|tt| match tt {
tt::TokenTree::Leaf(leaf) => {
token_map.synthetic_token_id(leaf.id()).is_none()
|| token_map.synthetic_token_id(leaf.id()) != Some(EMPTY_ID)
tt::TokenTree::Leaf(leaf) => token_map.synthetic_token_id(leaf.id()) != Some(EMPTY_ID),
tt::TokenTree::Subtree(st) => {
st.delimiter.map_or(true, |d| token_map.synthetic_token_id(d.id) != Some(EMPTY_ID))
}
tt::TokenTree::Subtree(st) => st.delimiter.map_or(true, |d| {
token_map.synthetic_token_id(d.id).is_none()
|| token_map.synthetic_token_id(d.id) != Some(EMPTY_ID)
}),
});
tt.token_trees.iter_mut().for_each(|tt| match tt {
tt::TokenTree::Subtree(tt) => reverse_fixups(tt, token_map, undo_info),
Expand Down Expand Up @@ -339,9 +335,8 @@ mod tests {

// the fixed-up tree should be syntactically valid
let (parse, _) = mbe::token_tree_to_syntax_node(&tt, ::mbe::TopEntryPoint::MacroItems);
assert_eq!(
parse.errors(),
&[],
assert!(
parse.errors().is_empty(),
"parse has syntax errors. parse tree:\n{:#?}",
parse.syntax_node()
);
Expand Down Expand Up @@ -468,12 +463,13 @@ fn foo() {
}
"#,
expect![[r#"
fn foo () {a .__ra_fixup}
fn foo () {a . __ra_fixup}
"#]],
)
}

#[test]
#[ignore]
fn incomplete_field_expr_2() {
check(
r#"
Expand All @@ -488,6 +484,7 @@ fn foo () {a .__ra_fixup ;}
}

#[test]
#[ignore]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These two tests fail in this assertion. This is because dummy ident insertion (to fix up the syntax error) changes the spacing of puncts before it with this patch. Although technically we can record and reverse it to preserve the spacing of the puncts in invalid syntax, it'd be somewhat complex and I'm wondering how essential this invariant to preserve the original tokentree is w.r.t. puncts' spacing.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, it seems odd to me that we check if spacing is preserved, macros already don't care about spacing so I don't think the tests should either, not sure how simple it'd would be but can we just check the tokens for equality ignoring any whitespace tokens in between?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that's what I was thinking. I'll see how I can "fix" the tests without complicating much.

cc @flodiebold for you're the original author (#11444), if you have anything to add.

fn incomplete_field_expr_3() {
check(
r#"
Expand Down Expand Up @@ -525,7 +522,7 @@ fn foo() {
}
"#,
expect![[r#"
fn foo () {let x = a .__ra_fixup ;}
fn foo () {let x = a . __ra_fixup ;}
"#]],
)
}
Expand All @@ -541,7 +538,7 @@ fn foo() {
}
"#,
expect![[r#"
fn foo () {a .b ; bar () ;}
fn foo () {a . b ; bar () ;}
"#]],
)
}
Expand Down
89 changes: 62 additions & 27 deletions crates/mbe/src/syntax_bridge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ use tt::buffer::{Cursor, TokenBuffer};

use crate::{to_parser_input::to_parser_input, tt_iter::TtIter, TokenMap};

#[cfg(test)]
mod tests;

/// Convert the syntax node to a `TokenTree` (what macro
/// will consume).
pub fn syntax_node_to_token_tree(node: &SyntaxNode) -> (tt::Subtree, TokenMap) {
Expand All @@ -35,7 +38,7 @@ pub fn syntax_node_to_token_tree_with_modifications(
append: FxHashMap<SyntaxElement, Vec<SyntheticToken>>,
) -> (tt::Subtree, TokenMap, u32) {
let global_offset = node.text_range().start();
let mut c = Convertor::new(node, global_offset, existing_token_map, next_id, replace, append);
let mut c = Converter::new(node, global_offset, existing_token_map, next_id, replace, append);
let subtree = convert_tokens(&mut c);
c.id_alloc.map.shrink_to_fit();
always!(c.replace.is_empty(), "replace: {:?}", c.replace);
Expand Down Expand Up @@ -100,7 +103,7 @@ pub fn parse_to_token_tree(text: &str) -> Option<(tt::Subtree, TokenMap)> {
return None;
}

let mut conv = RawConvertor {
let mut conv = RawConverter {
lexed,
pos: 0,
id_alloc: TokenIdAlloc {
Expand Down Expand Up @@ -148,7 +151,7 @@ pub fn parse_exprs_with_sep(tt: &tt::Subtree, sep: char) -> Vec<tt::Subtree> {
res
}

fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
fn convert_tokens<C: TokenConverter>(conv: &mut C) -> tt::Subtree {
struct StackEntry {
subtree: tt::Subtree,
idx: usize,
Expand Down Expand Up @@ -228,7 +231,7 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
}

let spacing = match conv.peek().map(|next| next.kind(conv)) {
Some(kind) if !kind.is_trivia() => tt::Spacing::Joint,
Some(kind) if is_single_token_op(kind) => tt::Spacing::Joint,
_ => tt::Spacing::Alone,
};
let char = match token.to_char(conv) {
Expand Down Expand Up @@ -307,6 +310,35 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
}
}

fn is_single_token_op(kind: SyntaxKind) -> bool {
matches!(
kind,
EQ | L_ANGLE
| R_ANGLE
| BANG
| AMP
| PIPE
| TILDE
| AT
| DOT
| COMMA
| SEMICOLON
| COLON
| POUND
| DOLLAR
| QUESTION
| PLUS
| MINUS
| STAR
| SLASH
| PERCENT
| CARET
// LIFETIME_IDENT will be split into a sequence of `'` (a single quote) and an
// identifier.
| LIFETIME_IDENT
)
}

/// Returns the textual content of a doc comment block as a quoted string
/// That is, strips leading `///` (or `/**`, etc)
/// and strips the ending `*/`
Expand Down Expand Up @@ -425,8 +457,8 @@ impl TokenIdAlloc {
}
}

/// A raw token (straight from lexer) convertor
struct RawConvertor<'a> {
/// A raw token (straight from lexer) converter
struct RawConverter<'a> {
lexed: parser::LexedStr<'a>,
pos: usize,
id_alloc: TokenIdAlloc,
Expand All @@ -442,7 +474,7 @@ trait SrcToken<Ctx>: std::fmt::Debug {
fn synthetic_id(&self, ctx: &Ctx) -> Option<SyntheticTokenId>;
}

trait TokenConvertor: Sized {
trait TokenConverter: Sized {
type Token: SrcToken<Self>;

fn convert_doc_comment(&self, token: &Self::Token) -> Option<Vec<tt::TokenTree>>;
Expand All @@ -454,25 +486,25 @@ trait TokenConvertor: Sized {
fn id_alloc(&mut self) -> &mut TokenIdAlloc;
}

impl<'a> SrcToken<RawConvertor<'a>> for usize {
fn kind(&self, ctx: &RawConvertor<'a>) -> SyntaxKind {
impl<'a> SrcToken<RawConverter<'a>> for usize {
fn kind(&self, ctx: &RawConverter<'a>) -> SyntaxKind {
ctx.lexed.kind(*self)
}

fn to_char(&self, ctx: &RawConvertor<'a>) -> Option<char> {
fn to_char(&self, ctx: &RawConverter<'a>) -> Option<char> {
ctx.lexed.text(*self).chars().next()
}

fn to_text(&self, ctx: &RawConvertor<'_>) -> SmolStr {
fn to_text(&self, ctx: &RawConverter<'_>) -> SmolStr {
ctx.lexed.text(*self).into()
}

fn synthetic_id(&self, _ctx: &RawConvertor<'a>) -> Option<SyntheticTokenId> {
fn synthetic_id(&self, _ctx: &RawConverter<'a>) -> Option<SyntheticTokenId> {
None
}
}

impl<'a> TokenConvertor for RawConvertor<'a> {
impl<'a> TokenConverter for RawConverter<'a> {
type Token = usize;

fn convert_doc_comment(&self, &token: &usize) -> Option<Vec<tt::TokenTree>> {
Expand Down Expand Up @@ -504,7 +536,7 @@ impl<'a> TokenConvertor for RawConvertor<'a> {
}
}

struct Convertor {
struct Converter {
id_alloc: TokenIdAlloc,
current: Option<SyntaxToken>,
current_synthetic: Vec<SyntheticToken>,
Expand All @@ -515,19 +547,19 @@ struct Convertor {
punct_offset: Option<(SyntaxToken, TextSize)>,
}

impl Convertor {
impl Converter {
fn new(
node: &SyntaxNode,
global_offset: TextSize,
existing_token_map: TokenMap,
next_id: u32,
mut replace: FxHashMap<SyntaxElement, Vec<SyntheticToken>>,
mut append: FxHashMap<SyntaxElement, Vec<SyntheticToken>>,
) -> Convertor {
) -> Converter {
let range = node.text_range();
let mut preorder = node.preorder_with_tokens();
let (first, synthetic) = Self::next_token(&mut preorder, &mut replace, &mut append);
Convertor {
Converter {
id_alloc: { TokenIdAlloc { map: existing_token_map, global_offset, next_id } },
current: first,
current_synthetic: synthetic,
Expand Down Expand Up @@ -590,39 +622,39 @@ impl SynToken {
}
}

impl SrcToken<Convertor> for SynToken {
fn kind(&self, _ctx: &Convertor) -> SyntaxKind {
impl SrcToken<Converter> for SynToken {
fn kind(&self, ctx: &Converter) -> SyntaxKind {
match self {
SynToken::Ordinary(token) => token.kind(),
SynToken::Punch(token, _) => token.kind(),
SynToken::Punch(..) => SyntaxKind::from_char(self.to_char(ctx).unwrap()).unwrap(),
SynToken::Synthetic(token) => token.kind,
}
}
fn to_char(&self, _ctx: &Convertor) -> Option<char> {
fn to_char(&self, _ctx: &Converter) -> Option<char> {
match self {
SynToken::Ordinary(_) => None,
SynToken::Punch(it, i) => it.text().chars().nth((*i).into()),
SynToken::Synthetic(token) if token.text.len() == 1 => token.text.chars().next(),
SynToken::Synthetic(_) => None,
}
}
fn to_text(&self, _ctx: &Convertor) -> SmolStr {
fn to_text(&self, _ctx: &Converter) -> SmolStr {
match self {
SynToken::Ordinary(token) => token.text().into(),
SynToken::Punch(token, _) => token.text().into(),
SynToken::Synthetic(token) => token.text.clone(),
}
}

fn synthetic_id(&self, _ctx: &Convertor) -> Option<SyntheticTokenId> {
fn synthetic_id(&self, _ctx: &Converter) -> Option<SyntheticTokenId> {
match self {
SynToken::Synthetic(token) => Some(token.id),
_ => None,
}
}
}

impl TokenConvertor for Convertor {
impl TokenConverter for Converter {
type Token = SynToken;
fn convert_doc_comment(&self, token: &Self::Token) -> Option<Vec<tt::TokenTree>> {
convert_doc_comment(token.token()?)
Expand Down Expand Up @@ -651,7 +683,7 @@ impl TokenConvertor for Convertor {
}

let curr = self.current.clone()?;
if !&self.range.contains_range(curr.text_range()) {
if !self.range.contains_range(curr.text_range()) {
return None;
}
let (new_current, new_synth) =
Expand Down Expand Up @@ -809,12 +841,15 @@ impl<'a> TtTreeSink<'a> {
let next = last.bump();
if let (
Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(curr), _)),
Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(_), _)),
Some(tt::buffer::TokenTreeRef::Leaf(tt::Leaf::Punct(next), _)),
) = (last.token_tree(), next.token_tree())
{
// Note: We always assume the semi-colon would be the last token in
// other parts of RA such that we don't add whitespace here.
if curr.spacing == tt::Spacing::Alone && curr.char != ';' {
//
// When `next` is a `Punct` of `'`, that's a part of a lifetime identifier so we don't
// need to add whitespace either.
if curr.spacing == tt::Spacing::Alone && curr.char != ';' && next.char != '\'' {
self.inner.token(WHITESPACE, " ");
self.text_pos += TextSize::of(' ');
}
Expand Down
Loading