diff --git a/src/ast/mod.rs b/src/ast/mod.rs index ab3be35c1..74e8cb55c 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -81,7 +81,8 @@ pub use self::query::{ TableSampleBucket, TableSampleKind, TableSampleMethod, TableSampleModifier, TableSampleQuantity, TableSampleSeed, TableSampleSeedModifier, TableSampleUnit, TableVersion, TableWithJoins, Top, TopQuantity, UpdateTableFromKind, ValueTableMode, Values, - WildcardAdditionalOptions, With, WithFill, + WildcardAdditionalOptions, With, WithFill, XmlNamespaceDefinition, XmlPassingArgument, + XmlPassingClause, XmlTableColumn, XmlTableColumnOption, }; pub use self::trigger::{ diff --git a/src/ast/query.rs b/src/ast/query.rs index abc115a0d..982985ec3 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1271,6 +1271,37 @@ pub enum TableFactor { symbols: Vec, alias: Option, }, + /// The `XMLTABLE` table-valued function. + /// Part of the SQL standard, supported by PostgreSQL, Oracle, and DB2. + /// + /// + /// + /// ```sql + /// SELECT xmltable.* + /// FROM xmldata, + /// XMLTABLE('//ROWS/ROW' + /// PASSING data + /// COLUMNS id int PATH '@id', + /// ordinality FOR ORDINALITY, + /// "COUNTRY_NAME" text, + /// country_id text PATH 'COUNTRY_ID', + /// size_sq_km float PATH 'SIZE[@unit = "sq_km"]', + /// size_other text PATH 'concat(SIZE[@unit!="sq_km"], " ", SIZE[@unit!="sq_km"]/@unit)', + /// premier_name text PATH 'PREMIER_NAME' DEFAULT 'not specified' + /// ); + /// ```` + XmlTable { + /// Optional XMLNAMESPACES clause (empty if not present) + namespaces: Vec, + /// The row-generating XPath expression. + row_expression: Expr, + /// The PASSING clause specifying the document expression. + passing: XmlPassingClause, + /// The columns to be extracted from each generated row. + columns: Vec, + /// The alias for the table. + alias: Option, + }, } /// The table sample modifier options @@ -1936,6 +1967,31 @@ impl fmt::Display for TableFactor { } Ok(()) } + TableFactor::XmlTable { + row_expression, + passing, + columns, + alias, + namespaces, + } => { + write!(f, "XMLTABLE(")?; + if !namespaces.is_empty() { + write!( + f, + "XMLNAMESPACES({}), ", + display_comma_separated(namespaces) + )?; + } + write!( + f, + "{row_expression}{passing} COLUMNS {columns})", + columns = display_comma_separated(columns) + )?; + if let Some(alias) = alias { + write!(f, " AS {alias}")?; + } + Ok(()) + } } } } @@ -3082,3 +3138,133 @@ pub enum UpdateTableFromKind { /// For Example: `UPDATE SET t1.name='aaa' FROM t1` AfterSet(Vec), } + +/// Defines the options for an XmlTable column: Named or ForOrdinality +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum XmlTableColumnOption { + /// A named column with a type, optional path, and default value. + NamedInfo { + /// The type of the column to be extracted. + r#type: DataType, + /// The path to the column to be extracted. If None, defaults to the column name. + path: Option, + /// Default value if path does not match + default: Option, + /// Whether the column is nullable (NULL=true, NOT NULL=false) + nullable: bool, + }, + /// The FOR ORDINALITY marker + ForOrdinality, +} + +/// A single column definition in XMLTABLE +/// +/// ```sql +/// COLUMNS +/// id int PATH '@id', +/// ordinality FOR ORDINALITY, +/// "COUNTRY_NAME" text, +/// country_id text PATH 'COUNTRY_ID', +/// size_sq_km float PATH 'SIZE[@unit = "sq_km"]', +/// size_other text PATH 'concat(SIZE[@unit!="sq_km"], " ", SIZE[@unit!="sq_km"]/@unit)', +/// premier_name text PATH 'PREMIER_NAME' DEFAULT 'not specified' +/// ``` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct XmlTableColumn { + /// The name of the column. + pub name: Ident, + /// Column options: type/path/default or FOR ORDINALITY + pub option: XmlTableColumnOption, +} + +impl fmt::Display for XmlTableColumn { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.name)?; + match &self.option { + XmlTableColumnOption::NamedInfo { + r#type, + path, + default, + nullable, + } => { + write!(f, " {}", r#type)?; + if let Some(p) = path { + write!(f, " PATH {}", p)?; + } + if let Some(d) = default { + write!(f, " DEFAULT {}", d)?; + } + if !*nullable { + write!(f, " NOT NULL")?; + } + Ok(()) + } + XmlTableColumnOption::ForOrdinality => { + write!(f, " FOR ORDINALITY") + } + } + } +} + +/// Argument passed in the XMLTABLE PASSING clause +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct XmlPassingArgument { + pub expr: Expr, + pub alias: Option, + pub by_value: bool, // True if BY VALUE is specified +} + +impl fmt::Display for XmlPassingArgument { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.by_value { + write!(f, "BY VALUE ")?; + } + write!(f, "{}", self.expr)?; + if let Some(alias) = &self.alias { + write!(f, " AS {}", alias)?; + } + Ok(()) + } +} + +/// The PASSING clause for XMLTABLE +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct XmlPassingClause { + pub arguments: Vec, +} + +impl fmt::Display for XmlPassingClause { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if !self.arguments.is_empty() { + write!(f, " PASSING {}", display_comma_separated(&self.arguments))?; + } + Ok(()) + } +} + +/// Represents a single XML namespace definition in the XMLNAMESPACES clause. +/// +/// `namespace_uri AS namespace_name` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct XmlNamespaceDefinition { + /// The namespace URI (a text expression). + pub uri: Expr, + /// The alias for the namespace (a simple identifier). + pub name: Ident, +} + +impl fmt::Display for XmlNamespaceDefinition { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} AS {}", self.uri, self.name) + } +} diff --git a/src/ast/spans.rs b/src/ast/spans.rs index a241fdf4d..27d52c26f 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1909,6 +1909,7 @@ impl Spanned for TableFactor { .chain(alias.as_ref().map(|alias| alias.span())), ), TableFactor::JsonTable { .. } => Span::empty(), + TableFactor::XmlTable { .. } => Span::empty(), TableFactor::Pivot { table, aggregate_functions, diff --git a/src/keywords.rs b/src/keywords.rs index a5400a5b0..4eaad7ed2 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -654,6 +654,7 @@ define_keywords!( PARTITION, PARTITIONED, PARTITIONS, + PASSING, PASSWORD, PAST, PATH, @@ -989,6 +990,8 @@ define_keywords!( WORK, WRITE, XML, + XMLNAMESPACES, + XMLTABLE, XOR, YEAR, YEARS, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a9ddd1837..77466b97e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11992,6 +11992,7 @@ impl<'a> Parser<'a> { | TableFactor::Function { alias, .. } | TableFactor::UNNEST { alias, .. } | TableFactor::JsonTable { alias, .. } + | TableFactor::XmlTable { alias, .. } | TableFactor::OpenJsonTable { alias, .. } | TableFactor::TableFunction { alias, .. } | TableFactor::Pivot { alias, .. } @@ -12107,6 +12108,9 @@ impl<'a> Parser<'a> { } else if self.parse_keyword_with_tokens(Keyword::OPENJSON, &[Token::LParen]) { self.prev_token(); self.parse_open_json_table_factor() + } else if self.parse_keyword_with_tokens(Keyword::XMLTABLE, &[Token::LParen]) { + self.prev_token(); + self.parse_xml_table_factor() } else { let name = self.parse_object_name(true)?; @@ -12339,6 +12343,99 @@ impl<'a> Parser<'a> { }) } + fn parse_xml_table_factor(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let namespaces = if self.parse_keyword(Keyword::XMLNAMESPACES) { + self.expect_token(&Token::LParen)?; + let namespaces = self.parse_comma_separated(Parser::parse_xml_namespace_definition)?; + self.expect_token(&Token::RParen)?; + self.expect_token(&Token::Comma)?; + namespaces + } else { + vec![] + }; + let row_expression = self.parse_expr()?; + let passing = self.parse_xml_passing_clause()?; + self.expect_keyword_is(Keyword::COLUMNS)?; + let columns = self.parse_comma_separated(Parser::parse_xml_table_column)?; + self.expect_token(&Token::RParen)?; + let alias = self.maybe_parse_table_alias()?; + Ok(TableFactor::XmlTable { + namespaces, + row_expression, + passing, + columns, + alias, + }) + } + + fn parse_xml_namespace_definition(&mut self) -> Result { + let uri = self.parse_expr()?; + self.expect_keyword_is(Keyword::AS)?; + let name = self.parse_identifier()?; + Ok(XmlNamespaceDefinition { uri, name }) + } + + fn parse_xml_table_column(&mut self) -> Result { + let name = self.parse_identifier()?; + + let option = if self.parse_keyword(Keyword::FOR) { + self.expect_keyword(Keyword::ORDINALITY)?; + XmlTableColumnOption::ForOrdinality + } else { + let r#type = self.parse_data_type()?; + let mut path = None; + let mut default = None; + + if self.parse_keyword(Keyword::PATH) { + path = Some(self.parse_expr()?); + } + + if self.parse_keyword(Keyword::DEFAULT) { + default = Some(self.parse_expr()?); + } + + let not_null = self.parse_keywords(&[Keyword::NOT, Keyword::NULL]); + if !not_null { + // NULL is the default but can be specified explicitly + let _ = self.parse_keyword(Keyword::NULL); + } + + XmlTableColumnOption::NamedInfo { + r#type, + path, + default, + nullable: !not_null, + } + }; + Ok(XmlTableColumn { name, option }) + } + + fn parse_xml_passing_clause(&mut self) -> Result { + let mut arguments = vec![]; + if self.parse_keyword(Keyword::PASSING) { + loop { + let by_value = + self.parse_keyword(Keyword::BY) && self.expect_keyword(Keyword::VALUE).is_ok(); + let expr = self.parse_expr()?; + let alias = if self.parse_keyword(Keyword::AS) { + Some(self.parse_identifier()?) + } else { + None + }; + arguments.push(XmlPassingArgument { + expr, + alias, + by_value, + }); + if !self.consume_token(&Token::Comma) { + break; + } + } + } + Ok(XmlPassingClause { arguments }) + } + fn parse_match_recognize(&mut self, table: TableFactor) -> Result { self.expect_token(&Token::LParen)?; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index be848a603..15b9bef66 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -11729,6 +11729,44 @@ fn test_group_by_grouping_sets() { ); } +#[test] +fn test_xmltable() { + all_dialects() + .verified_only_select("SELECT * FROM XMLTABLE('/root' PASSING data COLUMNS element TEXT)"); + + // Minimal meaningful working example: returns a single row with a single column named y containing the value z + all_dialects().verified_only_select( + "SELECT y FROM XMLTABLE('/X' PASSING 'z' COLUMNS y TEXT)", + ); + + // Test using subqueries + all_dialects().verified_only_select("SELECT y FROM XMLTABLE((SELECT '/X') PASSING (SELECT CAST('z' AS xml)) COLUMNS y TEXT PATH (SELECT 'y'))"); + + // NOT NULL + all_dialects().verified_only_select( + "SELECT y FROM XMLTABLE('/X' PASSING '' COLUMNS y TEXT NOT NULL)", + ); + + all_dialects().verified_only_select("SELECT * FROM XMLTABLE('/root/row' PASSING xmldata COLUMNS id INT PATH '@id', name TEXT PATH 'name/text()', value FLOAT PATH 'value')"); + + all_dialects().verified_only_select("SELECT * FROM XMLTABLE('//ROWS/ROW' PASSING data COLUMNS row_num FOR ORDINALITY, id INT PATH '@id', name TEXT PATH 'NAME' DEFAULT 'unnamed')"); + + // Example from https://www.postgresql.org/docs/15/functions-xml.html#FUNCTIONS-XML-PROCESSING + all_dialects().verified_only_select( + "SELECT xmltable.* FROM xmldata, XMLTABLE('//ROWS/ROW' PASSING data COLUMNS id INT PATH '@id', ordinality FOR ORDINALITY, \"COUNTRY_NAME\" TEXT, country_id TEXT PATH 'COUNTRY_ID', size_sq_km FLOAT PATH 'SIZE[@unit = \"sq_km\"]', size_other TEXT PATH 'concat(SIZE[@unit!=\"sq_km\"], \" \", SIZE[@unit!=\"sq_km\"]/@unit)', premier_name TEXT PATH 'PREMIER_NAME' DEFAULT 'not specified')" + ); + + // Example from DB2 docs without explicit PASSING clause: https://www.ibm.com/docs/en/db2/12.1.0?topic=xquery-simple-column-name-passing-xmlexists-xmlquery-xmltable + all_dialects().verified_only_select( + "SELECT X.* FROM T1, XMLTABLE('$CUSTLIST/customers/customerinfo' COLUMNS \"Cid\" BIGINT PATH '@Cid', \"Info\" XML PATH 'document{.}', \"History\" XML PATH 'NULL') AS X" + ); + + // Example from PostgreSQL with XMLNAMESPACES + all_dialects().verified_only_select( + "SELECT xmltable.* FROM XMLTABLE(XMLNAMESPACES('http://example.com/myns' AS x, 'http://example.com/b' AS \"B\"), '/x:example/x:item' PASSING (SELECT data FROM xmldata) COLUMNS foo INT PATH '@foo', bar INT PATH '@B:bar')" + ); +} + #[test] fn test_match_recognize() { use MatchRecognizePattern::*;