Skip to content

Commit 4d97205

Browse files
committed
chore: add comments
1 parent e65f8de commit 4d97205

File tree

3 files changed

+15
-10
lines changed

3 files changed

+15
-10
lines changed

crates/parser/src/lib.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,17 @@
33
//! This crate provides a parser for the Postgres SQL dialect.
44
//! It is based in the pg_query.rs crate, which is a wrapper around the PostgreSQL query parser.
55
//! The main `Parser` struct parses a source file and individual statements.
6-
//! The `Parse` struct contains the resulting concrete syntax tree, syntax errors, and the abtract syntax tree, which is a list of pg_query statements and their positions.
6+
//! The `Parse` result struct contains the resulting concrete syntax tree, syntax errors, and the abtract syntax tree, which is a list of pg_query statements and their positions.
77
//!
88
//! The idea is to offload the heavy lifting to the same parser that the PostgreSQL server uses,
9-
//! and just fill in the gaps to be able to build both cst and ast from a a source file that
9+
//! and just fill in the gaps to be able to build both cst and ast from a source file that
1010
//! potentially contains erroneous statements.
1111
//!
1212
//! The main drawbacks of the PostgreSQL query parser mitigated by this parser are:
1313
//! - it only parsed a full source text, and if there is any syntax error in a file, it will not parse anything and return an error.
14-
//! - it does not parse whitespaces and newlines, so it is not possible to build a concrete syntax tree build a concrete syntax tree.
14+
//! - it does not parse whitespaces and newlines, and it only returns ast nodes. The concrete syntax tree has to be reverse-engineered.
1515
//!
16-
//! To see how these drawbacks are mitigated, see the `statement.rs` and the `source_file.rs` module.
16+
//! To see how these drawbacks are mitigated, see the `statement_parser.rs` and the `source_parser.rs` module.
1717
1818
mod ast_node;
1919
mod estimate_node_range;

crates/parser/src/statement_parser.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,7 @@ use crate::{
99
syntax_kind_codegen::SyntaxKind,
1010
};
1111

12-
/// A super simple lexer for sql statements.
13-
///
14-
/// One weakness of pg_query.rs is that it does not parse whitespace or newlines. We use a very
15-
/// simple lexer to fill the gaps.
12+
/// Super simple lexer that only catches the tokens that libpg_query ignores.
1613
#[derive(Logos, Debug, PartialEq)]
1714
pub enum StatementToken {
1815
// comments and whitespaces
@@ -27,7 +24,7 @@ pub enum StatementToken {
2724
}
2825

2926
impl StatementToken {
30-
/// Creates a `SyntaxKind` from a `StatementToken`.
27+
/// Create a `SyntaxKind` from a `StatementToken`.
3128
pub fn syntax_kind(&self) -> SyntaxKind {
3229
match self {
3330
StatementToken::Whitespace => SyntaxKind::Whitespace,
@@ -39,6 +36,13 @@ impl StatementToken {
3936
}
4037

4138
impl Parser {
39+
/// Parse a single statement passed in `text`. If `at_offset` is `Some`, the statement is assumed to be at that offset in the source file.
40+
///
41+
/// On a high level, the parser works as follows:
42+
/// - 1. Collect all information from pg_query.rs and `StatementToken` lexer
43+
/// - 2. Derive as much information as possible from the collected information
44+
/// - 3. Collect AST node and errors, if any
45+
/// - 3. Walk the statement token by token, and reverse-engineer the concrete syntax tree
4246
pub fn parse_statement_at(&mut self, text: &str, at_offset: Option<u32>) {
4347
// 1. Collect as much information as possible from pg_query.rs and `StatementToken` lexer
4448

@@ -98,6 +102,7 @@ impl Parser {
98102
let mut statement_token_lexer = StatementToken::lexer(&text);
99103

100104
// 2. Setup data structures required for the parsing algorithm
105+
101106
// A buffer for tokens that are not applied immediately to the cst
102107
let mut token_buffer: VecDeque<(SyntaxKind, String)> = VecDeque::new();
103108
// Keeps track of currently open nodes. Latest opened is last.

crates/parser/src/syntax_error.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use std::fmt;
22

33
use cstree::text::{TextRange, TextSize};
44

5-
/// Represents the result of unsuccessful tokenization, parsing
5+
/// Represents the result of unsuccessful tokenization, parsing,
66
/// or tree validation.
77
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
88
pub struct SyntaxError(String, TextRange);

0 commit comments

Comments
 (0)