From a78ed54b647b9f9c7e781c2b07b2ff24f62ccc1b Mon Sep 17 00:00:00 2001 From: Julian <juliandomke@outlook.de> Date: Sat, 12 Apr 2025 16:08:11 +0200 Subject: [PATCH 01/15] simplify --- crates/pgt_statement_splitter/src/parser.rs | 99 +++++++++---------- .../src/parser/common.rs | 10 +- .../pgt_statement_splitter/src/parser/dml.rs | 2 +- 3 files changed, 55 insertions(+), 56 deletions(-) diff --git a/crates/pgt_statement_splitter/src/parser.rs b/crates/pgt_statement_splitter/src/parser.rs index 4cdf1fc6..b985de0e 100644 --- a/crates/pgt_statement_splitter/src/parser.rs +++ b/crates/pgt_statement_splitter/src/parser.rs @@ -13,18 +13,20 @@ use crate::diagnostics::SplitDiagnostic; /// Main parser that exposes the `cstree` api, and collects errors and statements /// It is modelled after a Pratt Parser. For a gentle introduction to Pratt Parsing, see https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html pub struct Parser { - /// The ranges of the statements - ranges: Vec<(usize, usize)>, + stmt_ranges: Vec<(usize, usize)>, + /// The syntax errors accumulated during parsing errors: Vec<SplitDiagnostic>, + /// The start of the current statement, if any current_stmt_start: Option<usize>, + /// The tokens to parse pub tokens: Vec<Token>, eof_token: Token, - next_pos: usize, + current_pos: usize, } /// Result of Building @@ -46,35 +48,29 @@ impl Parser { )); // next_pos should be the initialised with the first valid token already - let mut next_pos = 0; - loop { - let token = tokens.get(next_pos).unwrap_or(&eof_token); - - if is_irrelevant_token(token) { - next_pos += 1; - } else { - break; - } + let mut current_pos = 0; + while is_irrelevant_token(tokens.get(current_pos).unwrap_or(&eof_token)) { + current_pos += 1; } Self { - ranges: Vec::new(), + stmt_ranges: Vec::new(), eof_token, errors: Vec::new(), current_stmt_start: None, tokens, - next_pos, + current_pos, } } pub fn finish(self) -> Parse { Parse { ranges: self - .ranges + .stmt_ranges .iter() - .map(|(start, end)| { - let from = self.tokens.get(*start); - let to = self.tokens.get(*end).unwrap_or(&self.eof_token); + .map(|(start_token_pos, end_token_pos)| { + let from = self.tokens.get(*start_token_pos); + let to = self.tokens.get(*end_token_pos).unwrap_or(&self.eof_token); TextRange::new(from.unwrap().span.start(), to.span.end()) }) @@ -90,15 +86,25 @@ impl Parser { "cannot start statement within statement at {:?}", self.tokens.get(self.current_stmt_start.unwrap()) ); - self.current_stmt_start = Some(self.next_pos); + self.current_stmt_start = Some(self.current_pos); } /// Close statement pub fn close_stmt(&mut self) { - assert!(self.next_pos > 0); + assert!( + matches!(self.current_stmt_start, Some(_)), + "Must start statement before closing it." + ); + + let start_token_pos = self.current_stmt_start.unwrap(); + + assert!( + self.current_pos > start_token_pos, + "Must close the statement on a token that's later than the start token." + ); // go back the positions until we find the first relevant token - let mut end_token_pos = self.next_pos - 1; + let mut end_token_pos = self.current_pos - 1; loop { let token = self.tokens.get(end_token_pos); @@ -106,17 +112,14 @@ impl Parser { break; } - if !is_irrelevant_token(token.unwrap()) { + if is_relevant(token.unwrap()) { break; } end_token_pos -= 1; } - self.ranges.push(( - self.current_stmt_start.expect("Expected active statement"), - end_token_pos, - )); + self.stmt_ranges.push((start_token_pos, end_token_pos)); self.current_stmt_start = None; } @@ -124,23 +127,23 @@ impl Parser { fn advance(&mut self) -> &Token { let mut first_relevant_token = None; loop { - let token = self.tokens.get(self.next_pos).unwrap_or(&self.eof_token); + let token = self.tokens.get(self.current_pos).unwrap_or(&self.eof_token); // we need to continue with next_pos until the next relevant token after we already // found the first one - if !is_irrelevant_token(token) { + if is_relevant(token) { if let Some(t) = first_relevant_token { return t; } first_relevant_token = Some(token); } - self.next_pos += 1; + self.current_pos += 1; } } - fn peek(&self) -> &Token { - match self.tokens.get(self.next_pos) { + fn current(&self) -> &Token { + match self.tokens.get(self.current_pos) { Some(token) => token, None => &self.eof_token, } @@ -148,22 +151,15 @@ impl Parser { /// Look ahead to the next relevant token fn look_ahead(&self) -> Option<&Token> { - // we need to look ahead to the next relevant token - let mut look_ahead_pos = self.next_pos + 1; - loop { - let token = self.tokens.get(look_ahead_pos)?; - - if !is_irrelevant_token(token) { - return Some(token); - } - - look_ahead_pos += 1; - } + self.tokens + .iter() + .skip(self.current_pos + 1) + .find(|t| is_relevant(t)) } fn look_back(&self) -> Option<&Token> { // we need to look back to the last relevant token - let mut look_back_pos = self.next_pos - 1; + let mut look_back_pos = self.current_pos - 1; loop { let token = self.tokens.get(look_back_pos); @@ -171,7 +167,7 @@ impl Parser { return None; } - if !is_irrelevant_token(token.unwrap()) { + if is_relevant(token.unwrap()) { return token; } @@ -179,10 +175,9 @@ impl Parser { } } - /// checks if the current token is of `kind` and advances if true - /// returns true if the current token is of `kind` - pub fn eat(&mut self, kind: SyntaxKind) -> bool { - if self.peek().kind == kind { + /// Returns `true` when it advanced, `false` if it didn't + pub fn advance_if_kind(&mut self, kind: SyntaxKind) -> bool { + if self.current().kind == kind { self.advance(); true } else { @@ -191,13 +186,13 @@ impl Parser { } pub fn expect(&mut self, kind: SyntaxKind) { - if self.eat(kind) { + if self.advance_if_kind(kind) { return; } self.errors.push(SplitDiagnostic::new( format!("Expected {:#?}", kind), - self.peek().span, + self.current().span, )); } } @@ -217,3 +212,7 @@ fn is_irrelevant_token(t: &Token) -> bool { WHITESPACE_TOKENS.contains(&t.kind) && (t.kind != SyntaxKind::Newline || t.text.chars().count() == 1) } + +fn is_relevant(t: &Token) -> bool { + !is_irrelevant_token(t) +} diff --git a/crates/pgt_statement_splitter/src/parser/common.rs b/crates/pgt_statement_splitter/src/parser/common.rs index af3dc6cc..d327477d 100644 --- a/crates/pgt_statement_splitter/src/parser/common.rs +++ b/crates/pgt_statement_splitter/src/parser/common.rs @@ -9,7 +9,7 @@ use super::{ pub fn source(p: &mut Parser) { loop { - match p.peek() { + match p.current() { Token { kind: SyntaxKind::Eof, .. @@ -33,7 +33,7 @@ pub fn source(p: &mut Parser) { pub(crate) fn statement(p: &mut Parser) { p.start_stmt(); - match p.peek().kind { + match p.current().kind { SyntaxKind::With => { cte(p); } @@ -68,7 +68,7 @@ pub(crate) fn parenthesis(p: &mut Parser) { let mut depth = 1; loop { - match p.peek().kind { + match p.current().kind { SyntaxKind::Ascii40 => { p.advance(); depth += 1; @@ -91,7 +91,7 @@ pub(crate) fn case(p: &mut Parser) { p.expect(SyntaxKind::Case); loop { - match p.peek().kind { + match p.current().kind { SyntaxKind::EndP => { p.advance(); break; @@ -105,7 +105,7 @@ pub(crate) fn case(p: &mut Parser) { pub(crate) fn unknown(p: &mut Parser, exclude: &[SyntaxKind]) { loop { - match p.peek() { + match p.current() { Token { kind: SyntaxKind::Ascii59, .. diff --git a/crates/pgt_statement_splitter/src/parser/dml.rs b/crates/pgt_statement_splitter/src/parser/dml.rs index a45f6c40..d1dc0bdd 100644 --- a/crates/pgt_statement_splitter/src/parser/dml.rs +++ b/crates/pgt_statement_splitter/src/parser/dml.rs @@ -13,7 +13,7 @@ pub(crate) fn cte(p: &mut Parser) { p.expect(SyntaxKind::As); parenthesis(p); - if !p.eat(SyntaxKind::Ascii44) { + if !p.advance_if_kind(SyntaxKind::Ascii44) { break; } } From ed246e0e948322e6722ed4be8c2425f39d001a31 Mon Sep 17 00:00:00 2001 From: Julian <juliandomke@outlook.de> Date: Sat, 12 Apr 2025 16:10:01 +0200 Subject: [PATCH 02/15] simplify 2 --- crates/pgt_statement_splitter/src/parser.rs | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/crates/pgt_statement_splitter/src/parser.rs b/crates/pgt_statement_splitter/src/parser.rs index b985de0e..d9dcde9b 100644 --- a/crates/pgt_statement_splitter/src/parser.rs +++ b/crates/pgt_statement_splitter/src/parser.rs @@ -158,21 +158,11 @@ impl Parser { } fn look_back(&self) -> Option<&Token> { - // we need to look back to the last relevant token - let mut look_back_pos = self.current_pos - 1; - loop { - let token = self.tokens.get(look_back_pos); - - if look_back_pos == 0 || token.is_none() { - return None; - } - - if is_relevant(token.unwrap()) { - return token; - } - - look_back_pos -= 1; - } + self.tokens + .iter() + .take(self.current_pos) + .rev() + .find(|t| is_relevant(t)) } /// Returns `true` when it advanced, `false` if it didn't From b55298586c218eedfb130db3fe0c910b861a2484 Mon Sep 17 00:00:00 2001 From: Julian <juliandomke@outlook.de> Date: Sat, 12 Apr 2025 16:16:47 +0200 Subject: [PATCH 03/15] simplify 3 --- crates/pgt_statement_splitter/src/parser.rs | 25 +++++++-------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/crates/pgt_statement_splitter/src/parser.rs b/crates/pgt_statement_splitter/src/parser.rs index d9dcde9b..d7d4e4c9 100644 --- a/crates/pgt_statement_splitter/src/parser.rs +++ b/crates/pgt_statement_splitter/src/parser.rs @@ -103,21 +103,13 @@ impl Parser { "Must close the statement on a token that's later than the start token." ); - // go back the positions until we find the first relevant token - let mut end_token_pos = self.current_pos - 1; - loop { - let token = self.tokens.get(end_token_pos); - - if end_token_pos == 0 || token.is_none() { - break; - } - - if is_relevant(token.unwrap()) { - break; - } - - end_token_pos -= 1; - } + // find last relevant token before current position + let (end_token_pos, _) = self + .tokens + .iter() + .enumerate() + .rfind(|(i, t)| is_relevant(t) && i < &self.current_pos) + .unwrap(); self.stmt_ranges.push((start_token_pos, end_token_pos)); @@ -161,8 +153,7 @@ impl Parser { self.tokens .iter() .take(self.current_pos) - .rev() - .find(|t| is_relevant(t)) + .rfind(|t| is_relevant(t)) } /// Returns `true` when it advanced, `false` if it didn't From 3000182c4037813e9dcff891072ed3eac2ab1674 Mon Sep 17 00:00:00 2001 From: Julian <juliandomke@outlook.de> Date: Sat, 12 Apr 2025 17:02:41 +0200 Subject: [PATCH 04/15] ok --- crates/pgt_statement_splitter/src/parser.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/pgt_statement_splitter/src/parser.rs b/crates/pgt_statement_splitter/src/parser.rs index d7d4e4c9..aabdb0a8 100644 --- a/crates/pgt_statement_splitter/src/parser.rs +++ b/crates/pgt_statement_splitter/src/parser.rs @@ -108,7 +108,8 @@ impl Parser { .tokens .iter() .enumerate() - .rfind(|(i, t)| is_relevant(t) && i < &self.current_pos) + .take(self.current_pos) + .rfind(|(_, t)| is_relevant(t)) .unwrap(); self.stmt_ranges.push((start_token_pos, end_token_pos)); From 64f2bb5339f3881a830335e9a4bb7347a9d9f2e7 Mon Sep 17 00:00:00 2001 From: Julian <juliandomke@outlook.de> Date: Sat, 12 Apr 2025 17:19:14 +0200 Subject: [PATCH 05/15] ffs --- crates/pgt_statement_splitter/src/parser.rs | 73 ++++++++++++++++----- 1 file changed, 58 insertions(+), 15 deletions(-) diff --git a/crates/pgt_statement_splitter/src/parser.rs b/crates/pgt_statement_splitter/src/parser.rs index aabdb0a8..1dd73b19 100644 --- a/crates/pgt_statement_splitter/src/parser.rs +++ b/crates/pgt_statement_splitter/src/parser.rs @@ -53,6 +53,8 @@ impl Parser { current_pos += 1; } + println!("We are starting at {}", current_pos); + Self { stmt_ranges: Vec::new(), eof_token, @@ -118,21 +120,16 @@ impl Parser { } fn advance(&mut self) -> &Token { - let mut first_relevant_token = None; - loop { - let token = self.tokens.get(self.current_pos).unwrap_or(&self.eof_token); - - // we need to continue with next_pos until the next relevant token after we already - // found the first one - if is_relevant(token) { - if let Some(t) = first_relevant_token { - return t; - } - first_relevant_token = Some(token); - } - - self.current_pos += 1; - } + let (pos, token) = self + .tokens + .iter() + .enumerate() + .skip(self.current_pos + 1) + .find(|(_, t)| is_relevant(t)) + .unwrap_or((self.tokens.len(), &self.eof_token)); + + self.current_pos = pos; + token } fn current(&self) -> &Token { @@ -198,3 +195,49 @@ fn is_irrelevant_token(t: &Token) -> bool { fn is_relevant(t: &Token) -> bool { !is_irrelevant_token(t) } + +#[cfg(test)] +mod tests { + use pgt_lexer::SyntaxKind; + + use crate::parser::Parser; + + #[test] + fn advance_works_as_expected() { + let sql = r#" + create table users ( + id serial primary key, + name text, + email text + ); + "#; + let tokens = pgt_lexer::lex(sql).unwrap(); + + let mut parser = Parser::new(tokens); + + let expected = vec![ + (SyntaxKind::Create, 2), + (SyntaxKind::Table, 4), + (SyntaxKind::Ident, 6), + (SyntaxKind::Ascii40, 8), + (SyntaxKind::Ident, 11), + (SyntaxKind::Ident, 13), + (SyntaxKind::Primary, 15), + (SyntaxKind::Key, 17), + (SyntaxKind::Ascii44, 18), + (SyntaxKind::NameP, 21), + (SyntaxKind::TextP, 23), + (SyntaxKind::Ascii44, 24), + (SyntaxKind::Ident, 27), + (SyntaxKind::TextP, 29), + (SyntaxKind::Ascii41, 32), + (SyntaxKind::Ascii59, 33), + ]; + + for (kind, pos) in expected { + assert_eq!(parser.current().kind, kind); + assert_eq!(parser.current_pos, pos); + parser.advance(); + } + } +} From 2e0805a6ab0f5db3eb063449dac57eddc647eff6 Mon Sep 17 00:00:00 2001 From: Julian <juliandomke@outlook.de> Date: Sat, 12 Apr 2025 17:20:26 +0200 Subject: [PATCH 06/15] comment --- crates/pgt_statement_splitter/src/parser.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/pgt_statement_splitter/src/parser.rs b/crates/pgt_statement_splitter/src/parser.rs index 1dd73b19..6b78d49f 100644 --- a/crates/pgt_statement_splitter/src/parser.rs +++ b/crates/pgt_statement_splitter/src/parser.rs @@ -164,6 +164,8 @@ impl Parser { } } + /// Will advance if the `kind` matches the current token. + /// Otherwise, will add a diagnostic to the internal `errors`. pub fn expect(&mut self, kind: SyntaxKind) { if self.advance_if_kind(kind) { return; From 9ac96385544c6da2ef26ee72656c6af7dbbb5497 Mon Sep 17 00:00:00 2001 From: Julian <juliandomke@outlook.de> Date: Sat, 12 Apr 2025 17:30:43 +0200 Subject: [PATCH 07/15] ok............ --- crates/pgt_statement_splitter/src/parser.rs | 36 ++++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/crates/pgt_statement_splitter/src/parser.rs b/crates/pgt_statement_splitter/src/parser.rs index 6b78d49f..df22acaa 100644 --- a/crates/pgt_statement_splitter/src/parser.rs +++ b/crates/pgt_statement_splitter/src/parser.rs @@ -106,13 +106,7 @@ impl Parser { ); // find last relevant token before current position - let (end_token_pos, _) = self - .tokens - .iter() - .enumerate() - .take(self.current_pos) - .rfind(|(_, t)| is_relevant(t)) - .unwrap(); + let (end_token_pos, _) = self.find_last_relevant().unwrap(); self.stmt_ranges.push((start_token_pos, end_token_pos)); @@ -120,6 +114,7 @@ impl Parser { } fn advance(&mut self) -> &Token { + // can't reuse `find_next_relevant` because of Mr. Borrow Checker let (pos, token) = self .tokens .iter() @@ -139,19 +134,12 @@ impl Parser { } } - /// Look ahead to the next relevant token fn look_ahead(&self) -> Option<&Token> { - self.tokens - .iter() - .skip(self.current_pos + 1) - .find(|t| is_relevant(t)) + self.find_next_relevant().map(|t| t.1) } fn look_back(&self) -> Option<&Token> { - self.tokens - .iter() - .take(self.current_pos) - .rfind(|t| is_relevant(t)) + self.find_last_relevant().map(|it| it.1) } /// Returns `true` when it advanced, `false` if it didn't @@ -176,6 +164,22 @@ impl Parser { self.current().span, )); } + + fn find_last_relevant(&self) -> Option<(usize, &Token)> { + self.tokens + .iter() + .enumerate() + .take(self.current_pos) + .rfind(|(_, t)| is_relevant(t)) + } + + fn find_next_relevant(&self) -> Option<(usize, &Token)> { + self.tokens + .iter() + .enumerate() + .skip(self.current_pos + 1) + .find(|(_, t)| is_relevant(t)) + } } #[cfg(windows)] From 41a927e897e6564c06d381f9bb16559c65f97028 Mon Sep 17 00:00:00 2001 From: Julian <juliandomke@outlook.de> Date: Sat, 12 Apr 2025 17:35:07 +0200 Subject: [PATCH 08/15] tidying up --- crates/pgt_statement_splitter/src/parser.rs | 60 +++++++------------ .../pgt_statement_splitter/src/parser/dml.rs | 4 +- 2 files changed, 23 insertions(+), 41 deletions(-) diff --git a/crates/pgt_statement_splitter/src/parser.rs b/crates/pgt_statement_splitter/src/parser.rs index df22acaa..b862fc21 100644 --- a/crates/pgt_statement_splitter/src/parser.rs +++ b/crates/pgt_statement_splitter/src/parser.rs @@ -43,7 +43,7 @@ impl Parser { let eof_token = Token::eof(usize::from( tokens .last() - .map(|t| t.span.start()) + .map(|t| t.span.end()) .unwrap_or(TextSize::from(0)), )); @@ -53,8 +53,6 @@ impl Parser { current_pos += 1; } - println!("We are starting at {}", current_pos); - Self { stmt_ranges: Vec::new(), eof_token, @@ -81,7 +79,6 @@ impl Parser { } } - /// Start statement pub fn start_stmt(&mut self) { assert!( self.current_stmt_start.is_none(), @@ -91,7 +88,6 @@ impl Parser { self.current_stmt_start = Some(self.current_pos); } - /// Close statement pub fn close_stmt(&mut self) { assert!( matches!(self.current_stmt_start, Some(_)), @@ -105,7 +101,6 @@ impl Parser { "Must close the statement on a token that's later than the start token." ); - // find last relevant token before current position let (end_token_pos, _) = self.find_last_relevant().unwrap(); self.stmt_ranges.push((start_token_pos, end_token_pos)); @@ -113,8 +108,15 @@ impl Parser { self.current_stmt_start = None; } + fn current(&self) -> &Token { + match self.tokens.get(self.current_pos) { + Some(token) => token, + None => &self.eof_token, + } + } + fn advance(&mut self) -> &Token { - // can't reuse `find_next_relevant` because of Mr. Borrow Checker + // can't reuse any `find_next_relevant` logic because of Mr. Borrow Checker let (pos, token) = self .tokens .iter() @@ -127,42 +129,28 @@ impl Parser { token } - fn current(&self) -> &Token { - match self.tokens.get(self.current_pos) { - Some(token) => token, - None => &self.eof_token, - } - } - fn look_ahead(&self) -> Option<&Token> { - self.find_next_relevant().map(|t| t.1) + self.tokens + .iter() + .skip(self.current_pos + 1) + .find(|t| is_relevant(t)) } fn look_back(&self) -> Option<&Token> { self.find_last_relevant().map(|it| it.1) } - /// Returns `true` when it advanced, `false` if it didn't - pub fn advance_if_kind(&mut self, kind: SyntaxKind) -> bool { - if self.current().kind == kind { - self.advance(); - true - } else { - false - } - } - /// Will advance if the `kind` matches the current token. /// Otherwise, will add a diagnostic to the internal `errors`. pub fn expect(&mut self, kind: SyntaxKind) { - if self.advance_if_kind(kind) { - return; + if self.current().kind == kind { + self.advance(); + } else { + self.errors.push(SplitDiagnostic::new( + format!("Expected {:#?}", kind), + self.current().span, + )); } - - self.errors.push(SplitDiagnostic::new( - format!("Expected {:#?}", kind), - self.current().span, - )); } fn find_last_relevant(&self) -> Option<(usize, &Token)> { @@ -172,14 +160,6 @@ impl Parser { .take(self.current_pos) .rfind(|(_, t)| is_relevant(t)) } - - fn find_next_relevant(&self) -> Option<(usize, &Token)> { - self.tokens - .iter() - .enumerate() - .skip(self.current_pos + 1) - .find(|(_, t)| is_relevant(t)) - } } #[cfg(windows)] diff --git a/crates/pgt_statement_splitter/src/parser/dml.rs b/crates/pgt_statement_splitter/src/parser/dml.rs index d1dc0bdd..015c50b6 100644 --- a/crates/pgt_statement_splitter/src/parser/dml.rs +++ b/crates/pgt_statement_splitter/src/parser/dml.rs @@ -13,7 +13,9 @@ pub(crate) fn cte(p: &mut Parser) { p.expect(SyntaxKind::As); parenthesis(p); - if !p.advance_if_kind(SyntaxKind::Ascii44) { + if p.current().kind == SyntaxKind::Ascii44 { + p.advance(); + } else { break; } } From dc0ab6e319c1d982e4365c66f95e6c927d3341bc Mon Sep 17 00:00:00 2001 From: Julian <juliandomke@outlook.de> Date: Sat, 12 Apr 2025 17:35:33 +0200 Subject: [PATCH 09/15] =?UTF-8?q?comment=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/pgt_statement_splitter/src/parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/pgt_statement_splitter/src/parser.rs b/crates/pgt_statement_splitter/src/parser.rs index b862fc21..995e7265 100644 --- a/crates/pgt_statement_splitter/src/parser.rs +++ b/crates/pgt_statement_splitter/src/parser.rs @@ -47,7 +47,7 @@ impl Parser { .unwrap_or(TextSize::from(0)), )); - // next_pos should be the initialised with the first valid token already + // Place `current_pos` on the first relevant token let mut current_pos = 0; while is_irrelevant_token(tokens.get(current_pos).unwrap_or(&eof_token)) { current_pos += 1; From 4252abefddefa7689009179d6efd71faa82fdceb Mon Sep 17 00:00:00 2001 From: Julian <juliandomke@outlook.de> Date: Sat, 12 Apr 2025 17:36:26 +0200 Subject: [PATCH 10/15] ok --- crates/pgt_statement_splitter/src/parser.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crates/pgt_statement_splitter/src/parser.rs b/crates/pgt_statement_splitter/src/parser.rs index 995e7265..7ad2e419 100644 --- a/crates/pgt_statement_splitter/src/parser.rs +++ b/crates/pgt_statement_splitter/src/parser.rs @@ -21,8 +21,7 @@ pub struct Parser { /// The start of the current statement, if any current_stmt_start: Option<usize>, - /// The tokens to parse - pub tokens: Vec<Token>, + tokens: Vec<Token>, eof_token: Token, @@ -32,7 +31,7 @@ pub struct Parser { /// Result of Building #[derive(Debug)] pub struct Parse { - /// The ranges of the errors + /// The ranges of the parsed statements pub ranges: Vec<TextRange>, /// The syntax errors accumulated during parsing pub errors: Vec<SplitDiagnostic>, From 5383d943e3634b5e6ca8e6d31b082b0490466612 Mon Sep 17 00:00:00 2001 From: Julian <juliandomke@outlook.de> Date: Sat, 12 Apr 2025 17:37:23 +0200 Subject: [PATCH 11/15] comment --- crates/pgt_statement_splitter/src/parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/pgt_statement_splitter/src/parser.rs b/crates/pgt_statement_splitter/src/parser.rs index 7ad2e419..1caacf86 100644 --- a/crates/pgt_statement_splitter/src/parser.rs +++ b/crates/pgt_statement_splitter/src/parser.rs @@ -13,12 +13,12 @@ use crate::diagnostics::SplitDiagnostic; /// Main parser that exposes the `cstree` api, and collects errors and statements /// It is modelled after a Pratt Parser. For a gentle introduction to Pratt Parsing, see https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html pub struct Parser { + // Ranges are defined by the indices of the start/end tokens stmt_ranges: Vec<(usize, usize)>, /// The syntax errors accumulated during parsing errors: Vec<SplitDiagnostic>, - /// The start of the current statement, if any current_stmt_start: Option<usize>, tokens: Vec<Token>, From 8a2865731e588dfc4be68878328ae14db725e22b Mon Sep 17 00:00:00 2001 From: Julian <juliandomke@outlook.de> Date: Sat, 12 Apr 2025 17:37:46 +0200 Subject: [PATCH 12/15] more --- crates/pgt_statement_splitter/src/parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/pgt_statement_splitter/src/parser.rs b/crates/pgt_statement_splitter/src/parser.rs index 1caacf86..2611aa99 100644 --- a/crates/pgt_statement_splitter/src/parser.rs +++ b/crates/pgt_statement_splitter/src/parser.rs @@ -13,7 +13,7 @@ use crate::diagnostics::SplitDiagnostic; /// Main parser that exposes the `cstree` api, and collects errors and statements /// It is modelled after a Pratt Parser. For a gentle introduction to Pratt Parsing, see https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html pub struct Parser { - // Ranges are defined by the indices of the start/end tokens + /// The statement ranges are defined by the indices of the start/end tokens stmt_ranges: Vec<(usize, usize)>, /// The syntax errors accumulated during parsing From 466e1e2ab601a4a42cbeaed66bce18bf909ac38f Mon Sep 17 00:00:00 2001 From: Julian <juliandomke@outlook.de> Date: Sat, 12 Apr 2025 17:41:31 +0200 Subject: [PATCH 13/15] ok --- crates/pgt_statement_splitter/src/lib.rs | 6 +++--- crates/pgt_statement_splitter/src/parser.rs | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/crates/pgt_statement_splitter/src/lib.rs b/crates/pgt_statement_splitter/src/lib.rs index 68f5daaf..63e68cd2 100644 --- a/crates/pgt_statement_splitter/src/lib.rs +++ b/crates/pgt_statement_splitter/src/lib.rs @@ -4,10 +4,10 @@ pub mod diagnostics; mod parser; -use parser::{Parse, Parser, source}; +use parser::{Parser, ParserResult, source}; use pgt_lexer::diagnostics::ScanError; -pub fn split(sql: &str) -> Result<Parse, Vec<ScanError>> { +pub fn split(sql: &str) -> Result<ParserResult, Vec<ScanError>> { let tokens = pgt_lexer::lex(sql)?; let mut parser = Parser::new(tokens); @@ -28,7 +28,7 @@ mod tests { struct Tester { input: String, - parse: Parse, + parse: ParserResult, } impl From<&str> for Tester { diff --git a/crates/pgt_statement_splitter/src/parser.rs b/crates/pgt_statement_splitter/src/parser.rs index 2611aa99..820a20a5 100644 --- a/crates/pgt_statement_splitter/src/parser.rs +++ b/crates/pgt_statement_splitter/src/parser.rs @@ -28,9 +28,9 @@ pub struct Parser { current_pos: usize, } -/// Result of Building +/// #[derive(Debug)] -pub struct Parse { +pub struct ParserResult { /// The ranges of the parsed statements pub ranges: Vec<TextRange>, /// The syntax errors accumulated during parsing @@ -62,8 +62,8 @@ impl Parser { } } - pub fn finish(self) -> Parse { - Parse { + pub fn finish(self) -> ParserResult { + ParserResult { ranges: self .stmt_ranges .iter() From 1ebe9b536da06569e8a15b9b17051616a18ba299 Mon Sep 17 00:00:00 2001 From: Julian <juliandomke@outlook.de> Date: Sat, 12 Apr 2025 17:42:12 +0200 Subject: [PATCH 14/15] ok --- crates/pgt_statement_splitter/src/parser.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/pgt_statement_splitter/src/parser.rs b/crates/pgt_statement_splitter/src/parser.rs index 820a20a5..baf118b5 100644 --- a/crates/pgt_statement_splitter/src/parser.rs +++ b/crates/pgt_statement_splitter/src/parser.rs @@ -28,7 +28,6 @@ pub struct Parser { current_pos: usize, } -/// #[derive(Debug)] pub struct ParserResult { /// The ranges of the parsed statements @@ -89,7 +88,7 @@ impl Parser { pub fn close_stmt(&mut self) { assert!( - matches!(self.current_stmt_start, Some(_)), + self.current_stmt_start.is_some(), "Must start statement before closing it." ); From f4129e003734b3ae5975c11404877e3862ae9696 Mon Sep 17 00:00:00 2001 From: Julian <juliandomke@outlook.de> Date: Sat, 12 Apr 2025 17:52:45 +0200 Subject: [PATCH 15/15] end test --- crates/pgt_statement_splitter/src/parser.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/pgt_statement_splitter/src/parser.rs b/crates/pgt_statement_splitter/src/parser.rs index baf118b5..93390aa0 100644 --- a/crates/pgt_statement_splitter/src/parser.rs +++ b/crates/pgt_statement_splitter/src/parser.rs @@ -196,6 +196,7 @@ mod tests { ); "#; let tokens = pgt_lexer::lex(sql).unwrap(); + let total_num_tokens = tokens.len(); let mut parser = Parser::new(tokens); @@ -223,5 +224,8 @@ mod tests { assert_eq!(parser.current_pos, pos); parser.advance(); } + + assert_eq!(parser.current().kind, SyntaxKind::Eof); + assert_eq!(parser.current_pos, total_num_tokens); } }