Skip to content

Commit 018992b

Browse files
committed
Add support for Snowflake identifier function
1 parent ed8757f commit 018992b

File tree

7 files changed

+247
-50
lines changed

7 files changed

+247
-50
lines changed

src/ast/mod.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,12 +344,14 @@ impl fmt::Display for ObjectName {
344344
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
345345
pub enum ObjectNamePart {
346346
Identifier(Ident),
347+
Function(ObjectNamePartFunction),
347348
}
348349

349350
impl ObjectNamePart {
350351
pub fn as_ident(&self) -> Option<&Ident> {
351352
match self {
352353
ObjectNamePart::Identifier(ident) => Some(ident),
354+
ObjectNamePart::Function(_) => None,
353355
}
354356
}
355357
}
@@ -358,10 +360,30 @@ impl fmt::Display for ObjectNamePart {
358360
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
359361
match self {
360362
ObjectNamePart::Identifier(ident) => write!(f, "{ident}"),
363+
ObjectNamePart::Function(func) => write!(f, "{func}"),
361364
}
362365
}
363366
}
364367

368+
/// An object name part that consists of a function that dynamically
369+
/// constructs identifiers.
370+
///
371+
/// - [Snowflake](https://docs.snowflake.com/en/sql-reference/identifier-literal)
372+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
373+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
374+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
375+
pub struct ObjectNamePartFunction {
376+
pub name: Ident,
377+
pub args: Vec<FunctionArg>,
378+
}
379+
380+
impl fmt::Display for ObjectNamePartFunction {
381+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
382+
write!(f, "{}(", self.name)?;
383+
write!(f, "{})", display_comma_separated(&self.args))
384+
}
385+
}
386+
365387
/// Represents an Array Expression, either
366388
/// `ARRAY[..]`, or `[..]`
367389
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]

src/ast/spans.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1671,6 +1671,10 @@ impl Spanned for ObjectNamePart {
16711671
fn span(&self) -> Span {
16721672
match self {
16731673
ObjectNamePart::Identifier(ident) => ident.span,
1674+
ObjectNamePart::Function(func) => func
1675+
.name
1676+
.span
1677+
.union(&union_spans(func.args.iter().map(|i| i.span()))),
16741678
}
16751679
}
16761680
}

src/dialect/mod.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ pub use self::postgresql::PostgreSqlDialect;
4949
pub use self::redshift::RedshiftSqlDialect;
5050
pub use self::snowflake::SnowflakeDialect;
5151
pub use self::sqlite::SQLiteDialect;
52-
use crate::ast::{ColumnOption, Expr, GranteesType, Statement};
52+
use crate::ast::{ColumnOption, Expr, GranteesType, Ident, Statement};
5353
pub use crate::keywords;
5454
use crate::keywords::Keyword;
5555
use crate::parser::{Parser, ParserError};
@@ -1076,6 +1076,15 @@ pub trait Dialect: Debug + Any {
10761076
fn supports_comma_separated_drop_column_list(&self) -> bool {
10771077
false
10781078
}
1079+
1080+
/// Returns true if the dialect considers the specified ident as a function
1081+
/// that returns an identifier. Typically used to generate identifiers
1082+
/// programmatically.
1083+
///
1084+
/// - [Snowflake](https://docs.snowflake.com/en/sql-reference/identifier-literal)
1085+
fn is_identifier_generating_function_name(&self, _ident: &Ident) -> bool {
1086+
false
1087+
}
10791088
}
10801089

10811090
/// This represents the operators for which precedence must be defined

src/dialect/snowflake.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,15 @@ impl Dialect for SnowflakeDialect {
367367
fn supports_comma_separated_drop_column_list(&self) -> bool {
368368
true
369369
}
370+
371+
fn is_identifier_generating_function_name(&self, ident: &Ident) -> bool {
372+
ident.quote_style.is_none() && ident.value.to_lowercase() == "identifier"
373+
}
374+
375+
// For example: `SELECT IDENTIFIER('alias1').* FROM tbl AS alias1`
376+
fn supports_select_expr_star(&self) -> bool {
377+
true
378+
}
370379
}
371380

372381
fn parse_file_staging_command(kw: Keyword, parser: &mut Parser) -> Result<Statement, ParserError> {

src/parser/mod.rs

Lines changed: 78 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -10353,70 +10353,84 @@ impl<'a> Parser<'a> {
1035310353
}
1035410354
}
1035510355

10356-
/// Parse a possibly qualified, possibly quoted identifier, optionally allowing for wildcards,
10356+
/// Parse a possibly qualified, possibly quoted identifier, e.g.
10357+
/// `foo` or `myschema."table"
10358+
///
10359+
/// The `in_table_clause` parameter indicates whether the object name is a table in a FROM, JOIN,
10360+
/// or similar table clause. Currently, this is used only to support unquoted hyphenated identifiers
10361+
/// in this context on BigQuery.
10362+
pub fn parse_object_name(&mut self, in_table_clause: bool) -> Result<ObjectName, ParserError> {
10363+
self.parse_object_name_inner(in_table_clause, false)
10364+
}
10365+
10366+
/// Parse a possibly qualified, possibly quoted identifier, e.g.
10367+
/// `foo` or `myschema."table"
10368+
///
10369+
/// The `in_table_clause` parameter indicates whether the object name is a table in a FROM, JOIN,
10370+
/// or similar table clause. Currently, this is used only to support unquoted hyphenated identifiers
10371+
/// in this context on BigQuery.
10372+
///
10373+
/// The `allow_wildcards` parameter indicates whether to allow for wildcards in the object name
1035710374
/// e.g. *, *.*, `foo`.*, or "foo"."bar"
10358-
fn parse_object_name_with_wildcards(
10375+
fn parse_object_name_inner(
1035910376
&mut self,
1036010377
in_table_clause: bool,
1036110378
allow_wildcards: bool,
1036210379
) -> Result<ObjectName, ParserError> {
10363-
let mut idents = vec![];
10364-
10380+
let mut parts = vec![];
1036510381
if dialect_of!(self is BigQueryDialect) && in_table_clause {
1036610382
loop {
1036710383
let (ident, end_with_period) = self.parse_unquoted_hyphenated_identifier()?;
10368-
idents.push(ident);
10384+
parts.push(ObjectNamePart::Identifier(ident));
1036910385
if !self.consume_token(&Token::Period) && !end_with_period {
1037010386
break;
1037110387
}
1037210388
}
1037310389
} else {
1037410390
loop {
10375-
let ident = if allow_wildcards && self.peek_token().token == Token::Mul {
10391+
if allow_wildcards && self.peek_token().token == Token::Mul {
1037610392
let span = self.next_token().span;
10377-
Ident {
10393+
parts.push(ObjectNamePart::Identifier(Ident {
1037810394
value: Token::Mul.to_string(),
1037910395
quote_style: None,
1038010396
span,
10397+
}));
10398+
} else if let Some(func_part) =
10399+
self.maybe_parse(|parser| parser.parse_object_name_function_part())?
10400+
{
10401+
parts.push(ObjectNamePart::Function(func_part));
10402+
} else if dialect_of!(self is BigQueryDialect) && in_table_clause {
10403+
let (ident, end_with_period) = self.parse_unquoted_hyphenated_identifier()?;
10404+
parts.push(ObjectNamePart::Identifier(ident));
10405+
if !self.consume_token(&Token::Period) && !end_with_period {
10406+
break;
1038110407
}
10408+
} else if self.dialect.supports_object_name_double_dot_notation()
10409+
&& parts.len() == 1
10410+
&& matches!(self.peek_token().token, Token::Period)
10411+
{
10412+
// Empty string here means default schema
10413+
parts.push(ObjectNamePart::Identifier(Ident::new("")));
1038210414
} else {
10383-
if self.dialect.supports_object_name_double_dot_notation()
10384-
&& idents.len() == 1
10385-
&& self.consume_token(&Token::Period)
10386-
{
10387-
// Empty string here means default schema
10388-
idents.push(Ident::new(""));
10389-
}
10390-
self.parse_identifier()?
10391-
};
10392-
idents.push(ident);
10415+
let ident = self.parse_identifier()?;
10416+
parts.push(ObjectNamePart::Identifier(ident));
10417+
}
10418+
1039310419
if !self.consume_token(&Token::Period) {
1039410420
break;
1039510421
}
1039610422
}
1039710423
}
10398-
Ok(ObjectName::from(idents))
10399-
}
10400-
10401-
/// Parse a possibly qualified, possibly quoted identifier, e.g.
10402-
/// `foo` or `myschema."table"
10403-
///
10404-
/// The `in_table_clause` parameter indicates whether the object name is a table in a FROM, JOIN,
10405-
/// or similar table clause. Currently, this is used only to support unquoted hyphenated identifiers
10406-
/// in this context on BigQuery.
10407-
pub fn parse_object_name(&mut self, in_table_clause: bool) -> Result<ObjectName, ParserError> {
10408-
let ObjectName(mut idents) =
10409-
self.parse_object_name_with_wildcards(in_table_clause, false)?;
1041010424

1041110425
// BigQuery accepts any number of quoted identifiers of a table name.
1041210426
// https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_identifiers
1041310427
if dialect_of!(self is BigQueryDialect)
10414-
&& idents.iter().any(|part| {
10428+
&& parts.iter().any(|part| {
1041510429
part.as_ident()
1041610430
.is_some_and(|ident| ident.value.contains('.'))
1041710431
})
1041810432
{
10419-
idents = idents
10433+
parts = parts
1042010434
.into_iter()
1042110435
.flat_map(|part| match part.as_ident() {
1042210436
Some(ident) => ident
@@ -10435,7 +10449,23 @@ impl<'a> Parser<'a> {
1043510449
.collect()
1043610450
}
1043710451

10438-
Ok(ObjectName(idents))
10452+
Ok(ObjectName(parts))
10453+
}
10454+
10455+
fn parse_object_name_function_part(&mut self) -> Result<ObjectNamePartFunction, ParserError> {
10456+
let name = self.parse_identifier()?;
10457+
if self.dialect.is_identifier_generating_function_name(&name) {
10458+
self.expect_token(&Token::LParen)?;
10459+
let args: Vec<FunctionArg> =
10460+
self.parse_comma_separated0(Self::parse_function_args, Token::RParen)?;
10461+
self.expect_token(&Token::RParen)?;
10462+
Ok(ObjectNamePartFunction { name, args })
10463+
} else {
10464+
self.expected(
10465+
"dialect specific identifier-generating function",
10466+
self.peek_token(),
10467+
)
10468+
}
1043910469
}
1044010470

1044110471
/// Parse identifiers
@@ -13938,25 +13968,25 @@ impl<'a> Parser<'a> {
1393813968
schemas: self.parse_comma_separated(|p| p.parse_object_name(false))?,
1393913969
})
1394013970
} else if self.parse_keywords(&[Keyword::RESOURCE, Keyword::MONITOR]) {
13941-
Some(GrantObjects::ResourceMonitors(self.parse_comma_separated(
13942-
|p| p.parse_object_name_with_wildcards(false, true),
13943-
)?))
13971+
Some(GrantObjects::ResourceMonitors(
13972+
self.parse_comma_separated(|p| p.parse_object_name(false))?,
13973+
))
1394413974
} else if self.parse_keywords(&[Keyword::COMPUTE, Keyword::POOL]) {
13945-
Some(GrantObjects::ComputePools(self.parse_comma_separated(
13946-
|p| p.parse_object_name_with_wildcards(false, true),
13947-
)?))
13975+
Some(GrantObjects::ComputePools(
13976+
self.parse_comma_separated(|p| p.parse_object_name(false))?,
13977+
))
1394813978
} else if self.parse_keywords(&[Keyword::FAILOVER, Keyword::GROUP]) {
13949-
Some(GrantObjects::FailoverGroup(self.parse_comma_separated(
13950-
|p| p.parse_object_name_with_wildcards(false, true),
13951-
)?))
13979+
Some(GrantObjects::FailoverGroup(
13980+
self.parse_comma_separated(|p| p.parse_object_name(false))?,
13981+
))
1395213982
} else if self.parse_keywords(&[Keyword::REPLICATION, Keyword::GROUP]) {
13953-
Some(GrantObjects::ReplicationGroup(self.parse_comma_separated(
13954-
|p| p.parse_object_name_with_wildcards(false, true),
13955-
)?))
13983+
Some(GrantObjects::ReplicationGroup(
13984+
self.parse_comma_separated(|p| p.parse_object_name(false))?,
13985+
))
1395613986
} else if self.parse_keywords(&[Keyword::EXTERNAL, Keyword::VOLUME]) {
13957-
Some(GrantObjects::ExternalVolumes(self.parse_comma_separated(
13958-
|p| p.parse_object_name_with_wildcards(false, true),
13959-
)?))
13987+
Some(GrantObjects::ExternalVolumes(
13988+
self.parse_comma_separated(|p| p.parse_object_name(false))?,
13989+
))
1396013990
} else {
1396113991
let object_type = self.parse_one_of_keywords(&[
1396213992
Keyword::SEQUENCE,
@@ -13973,7 +14003,7 @@ impl<'a> Parser<'a> {
1397314003
Keyword::CONNECTION,
1397414004
]);
1397514005
let objects =
13976-
self.parse_comma_separated(|p| p.parse_object_name_with_wildcards(false, true));
14006+
self.parse_comma_separated(|p| p.parse_object_name_inner(false, true));
1397714007
match object_type {
1397814008
Some(Keyword::DATABASE) => Some(GrantObjects::Databases(objects?)),
1397914009
Some(Keyword::SCHEMA) => Some(GrantObjects::Schemas(objects?)),

tests/sqlparser_common.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1232,14 +1232,18 @@ fn parse_select_expr_star() {
12321232
"SELECT 2. * 3 FROM T",
12331233
);
12341234
dialects.verified_only_select("SELECT myfunc().* FROM T");
1235-
dialects.verified_only_select("SELECT myfunc().* EXCEPT (foo) FROM T");
12361235

12371236
// Invalid
12381237
let res = dialects.parse_sql_statements("SELECT foo.*.* FROM T");
12391238
assert_eq!(
12401239
ParserError::ParserError("Expected: end of statement, found: .".to_string()),
12411240
res.unwrap_err()
12421241
);
1242+
1243+
let dialects = all_dialects_where(|d| {
1244+
d.supports_select_expr_star() && d.supports_select_wildcard_except()
1245+
});
1246+
dialects.verified_only_select("SELECT myfunc().* EXCEPT (foo) FROM T");
12431247
}
12441248

12451249
#[test]

0 commit comments

Comments
 (0)