Skip to content

Commit c1fe077

Browse files
x
1 parent ed41654 commit c1fe077

File tree

7 files changed

+901
-10
lines changed

7 files changed

+901
-10
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ serde = { version = "1.0", default-features = false, features = ["derive", "allo
5555
# https://github.com/rust-lang/cargo/issues/1596
5656
serde_json = { version = "1.0", optional = true }
5757
sqlparser_derive = { version = "0.3.0", path = "derive", optional = true }
58+
regex = "1"
5859

5960
[dev-dependencies]
6061
simple_logger = "5.0"

examples/parse_select.rs

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,17 @@
1717

1818
#![warn(clippy::all)]
1919

20-
use sqlparser::dialect::GenericDialect;
20+
use sqlparser::dialect::GreptimeDbDialect;
2121
use sqlparser::parser::*;
2222

2323
fn main() {
24-
let sql = "SELECT a, b, 123, myfunc(b) \
25-
FROM table_1 \
26-
WHERE a > b AND b < 100 \
27-
ORDER BY a DESC, b";
24+
simple_logger::init().unwrap();
2825

29-
let dialect = GenericDialect {};
26+
let sql = "SELECT sum(metrics) FILL MAX FROM t FILL NULL ALIGN '1h';";
27+
28+
let dialect = GreptimeDbDialect;
3029

3130
let ast = Parser::parse_sql(&dialect, sql).unwrap();
3231

33-
println!("AST: {ast:?}");
32+
println!("AST: {ast:#?}");
3433
}

src/dialect/greptimedb.rs

Lines changed: 298 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,298 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use crate::ast::{
19+
Expr, Function, FunctionArg, FunctionArgExpr, FunctionArgumentList, FunctionArguments, Ident,
20+
Value, ValueWithSpan,
21+
};
22+
use crate::dialect::Dialect;
23+
use crate::keywords::Keyword;
24+
use crate::parser::{Parser, ParserError};
25+
use crate::tokenizer::Token;
26+
use regex::Regex;
27+
use std::sync::LazyLock;
28+
29+
/// A dialect for [GreptimeDB](https://greptime.com/).
30+
#[derive(Debug, Copy, Clone)]
31+
pub struct GreptimeDbDialect;
32+
33+
impl GreptimeDbDialect {
34+
/// Parse "Range" expr, syntax:
35+
/// `RANGE [ duration literal | (INTERVAL [interval expr]) ] FILL [ NULL | PREV | LINEAR | x]`
36+
fn parse_range_expr(&self, parser: &mut Parser, expr: &Expr) -> Result<Expr, ParserError> {
37+
// consume "RANGE" keyword
38+
parser.advance_token();
39+
40+
let range = if parser.consume_token(&Token::LParen) {
41+
let expr = parser.parse_expr()?;
42+
parser.expect_token(&Token::RParen)?;
43+
expr
44+
} else if let Ok(value) = parser.parse_value() {
45+
if !is_valid_duration_literal(&value) {
46+
return Err(ParserError::ParserError(format!(
47+
r#"Expected valid duration literal, found: "{}""#,
48+
value
49+
)));
50+
}
51+
Expr::Value(value)
52+
} else {
53+
return parser.expected_ref(
54+
"duration literal or interval expr",
55+
parser.get_current_token(),
56+
);
57+
};
58+
59+
let fill = if parser.parse_keyword(Keyword::FILL) {
60+
Value::SingleQuotedString(parser.next_token().to_string())
61+
} else {
62+
Value::SingleQuotedString(String::new())
63+
};
64+
65+
// TODO(LFC): rewrite it
66+
67+
// Recursively rewrite function nested in expr to range function when RANGE keyword appear in Expr
68+
// Treat Function Argument as scalar function, not execute rewrite
69+
// follow the pattern of `range_fn(func, range, fill)`
70+
// if `fill` is `None`, the last parameter will be a empty single quoted string for placeholder
71+
// rate(metrics) RANGE '5m' -> range_fn(rate(metrics), '5m', '')
72+
// rate() RANGE '5m' FILL MAX -> range_fn(rate(), '5m', 'MAX')
73+
let mut rewrite_count = 0;
74+
let expr = rewrite_calculation_expr(&expr, false, &mut |e: &Expr| {
75+
if matches!(e, Expr::Function(..)) {
76+
let args = vec![
77+
FunctionArg::Unnamed(FunctionArgExpr::Expr(e.clone())),
78+
FunctionArg::Unnamed(FunctionArgExpr::Expr(range.clone())),
79+
FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(fill.clone().into()))),
80+
];
81+
let range_func = Function {
82+
name: vec![Ident::new("range_fn")].into(),
83+
over: None,
84+
filter: None,
85+
null_treatment: None,
86+
args: FunctionArguments::List(FunctionArgumentList {
87+
duplicate_treatment: None,
88+
args,
89+
clauses: vec![],
90+
}),
91+
parameters: FunctionArguments::None,
92+
within_group: vec![],
93+
uses_odbc_syntax: false,
94+
};
95+
rewrite_count += 1;
96+
Ok(Some(Expr::Function(range_func)))
97+
} else {
98+
Ok(None)
99+
}
100+
})?;
101+
if rewrite_count == 0 {
102+
return Err(ParserError::ParserError(format!(
103+
"Can't use the RANGE keyword in Expr {} without function",
104+
expr
105+
)));
106+
}
107+
Ok(expr)
108+
}
109+
}
110+
111+
/// Recursively rewrite a nested calculation `Expr`
112+
///
113+
/// The function's return type is `Result<Option<Expr>>>`, where:
114+
///
115+
/// * `Ok(Some(replacement_expr))`: A replacement `Expr` is provided, use replacement `Expr`.
116+
/// * `Ok(None)`: A replacement `Expr` is not provided, use old `Expr`.
117+
/// * `Err(err)`: Any error returned.
118+
fn rewrite_calculation_expr<F>(
119+
expr: &Expr,
120+
rewrite_func_expr: bool,
121+
replacement_fn: &mut F,
122+
) -> Result<Expr, ParserError>
123+
where
124+
F: FnMut(&Expr) -> Result<Option<Expr>, ParserError>,
125+
{
126+
match replacement_fn(expr)? {
127+
Some(replacement) => Ok(replacement),
128+
None => match expr {
129+
Expr::BinaryOp { left, op, right } => Ok(Expr::BinaryOp {
130+
left: Box::new(rewrite_calculation_expr(
131+
left,
132+
rewrite_func_expr,
133+
replacement_fn,
134+
)?),
135+
op: op.clone(),
136+
right: Box::new(rewrite_calculation_expr(
137+
right,
138+
rewrite_func_expr,
139+
replacement_fn,
140+
)?),
141+
}),
142+
Expr::Nested(expr) => Ok(Expr::Nested(Box::new(rewrite_calculation_expr(
143+
expr,
144+
rewrite_func_expr,
145+
replacement_fn,
146+
)?))),
147+
Expr::Cast {
148+
kind,
149+
expr,
150+
data_type,
151+
format,
152+
} => Ok(Expr::Cast {
153+
kind: kind.clone(),
154+
expr: Box::new(rewrite_calculation_expr(
155+
expr,
156+
rewrite_func_expr,
157+
replacement_fn,
158+
)?),
159+
data_type: data_type.clone(),
160+
format: format.clone(),
161+
}),
162+
// Scalar function `ceil(val)` will be parse as `Expr::Ceil` instead of `Expr::Function`
163+
Expr::Ceil { expr, field } => Ok(Expr::Ceil {
164+
expr: Box::new(rewrite_calculation_expr(
165+
expr,
166+
rewrite_func_expr,
167+
replacement_fn,
168+
)?),
169+
field: field.clone(),
170+
}),
171+
// Scalar function `floor(val)` will be parse as `Expr::Floor` instead of `Expr::Function`
172+
Expr::Floor { expr, field } => Ok(Expr::Floor {
173+
expr: Box::new(rewrite_calculation_expr(
174+
expr,
175+
rewrite_func_expr,
176+
replacement_fn,
177+
)?),
178+
field: field.clone(),
179+
}),
180+
Expr::Function(func) if rewrite_func_expr => {
181+
let mut func = func.clone();
182+
if let FunctionArguments::List(args) = &mut func.args {
183+
for fn_arg in &mut args.args {
184+
if let FunctionArg::Named {
185+
arg: FunctionArgExpr::Expr(expr),
186+
..
187+
}
188+
| FunctionArg::Unnamed(FunctionArgExpr::Expr(expr)) = fn_arg
189+
{
190+
*expr =
191+
rewrite_calculation_expr(expr, rewrite_func_expr, replacement_fn)?;
192+
}
193+
}
194+
}
195+
Ok(Expr::Function(func))
196+
}
197+
expr => Ok(expr.clone()),
198+
},
199+
}
200+
}
201+
202+
impl Dialect for GreptimeDbDialect {
203+
fn is_identifier_start(&self, ch: char) -> bool {
204+
ch.is_alphabetic() || ch == '_' || ch == '#' || ch == '@'
205+
}
206+
207+
fn is_identifier_part(&self, ch: char) -> bool {
208+
ch.is_alphabetic()
209+
|| ch.is_ascii_digit()
210+
|| ch == '@'
211+
|| ch == '$'
212+
|| ch == '#'
213+
|| ch == '_'
214+
}
215+
216+
fn supports_filter_during_aggregation(&self) -> bool {
217+
true
218+
}
219+
220+
fn parse_infix(
221+
&self,
222+
parser: &mut Parser,
223+
expr: &Expr,
224+
_precedence: u8,
225+
) -> Option<Result<Expr, ParserError>> {
226+
log::debug!("dialect parse_infix: expr {:?}", expr);
227+
if parser.peek_keyword(Keyword::RANGE) {
228+
Some(self.parse_range_expr(parser, expr))
229+
} else {
230+
None
231+
}
232+
}
233+
234+
fn get_next_precedence(&self, parser: &Parser) -> Option<Result<u8, ParserError>> {
235+
let token = parser.peek_token();
236+
match token.token {
237+
Token::Word(w) if w.keyword == Keyword::RANGE => Some(Ok(u8::MAX)),
238+
_ => None,
239+
}
240+
}
241+
}
242+
243+
static DURATION_LITERAL: LazyLock<Regex> = LazyLock::new(|| {
244+
Regex::new(
245+
r"(?x)
246+
^
247+
((?P<y>[0-9]+)y)?
248+
((?P<w>[0-9]+)w)?
249+
((?P<d>[0-9]+)d)?
250+
((?P<h>[0-9]+)h)?
251+
((?P<m>[0-9]+)m)?
252+
((?P<s>[0-9]+)s)?
253+
((?P<ms>[0-9]+)ms)?
254+
$",
255+
)
256+
.unwrap_or_else(|e| panic!("{e}"))
257+
});
258+
259+
// Checks if Value is a valid Duration literal.
260+
// Regular Expression Reference: https://github.com/GreptimeTeam/promql-parser/blob/main/src/util/duration.rs
261+
fn is_valid_duration_literal(v: &ValueWithSpan) -> bool {
262+
match &v.value {
263+
Value::SingleQuotedString(s) | Value::DoubleQuotedString(s) => {
264+
!s.is_empty() && DURATION_LITERAL.is_match(s)
265+
}
266+
_ => false,
267+
}
268+
}
269+
270+
#[cfg(test)]
271+
mod tests {
272+
use super::*;
273+
274+
#[test]
275+
fn test_is_valid_duration_literal() {
276+
// valid
277+
vec![
278+
"1y", "2w", "3d", "4h", "5m", "6s", "7ms", "1y2w3d", "4h30m", "3600ms",
279+
]
280+
.iter()
281+
.for_each(|x| {
282+
assert!(
283+
is_valid_duration_literal(&Value::SingleQuotedString(x.to_string()).into()),
284+
"{x}"
285+
)
286+
});
287+
288+
// invalid
289+
vec!["1", "1y1m1d", "-1w", "1.5d", "d", "", "0"]
290+
.iter()
291+
.for_each(|x| {
292+
assert!(
293+
!is_valid_duration_literal(&Value::SingleQuotedString(x.to_string()).into()),
294+
"{x}"
295+
)
296+
});
297+
}
298+
}

src/dialect/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ mod clickhouse;
2121
mod databricks;
2222
mod duckdb;
2323
mod generic;
24+
mod greptimedb;
2425
mod hive;
2526
mod mssql;
2627
mod mysql;
@@ -42,6 +43,7 @@ pub use self::clickhouse::ClickHouseDialect;
4243
pub use self::databricks::DatabricksDialect;
4344
pub use self::duckdb::DuckDbDialect;
4445
pub use self::generic::GenericDialect;
46+
pub use self::greptimedb::GreptimeDbDialect;
4547
pub use self::hive::HiveDialect;
4648
pub use self::mssql::MsSqlDialect;
4749
pub use self::mysql::MySqlDialect;

src/keywords.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ define_keywords!(
8787
ALERT,
8888
ALGORITHM,
8989
ALIAS,
90+
ALIGN,
9091
ALL,
9192
ALLOCATE,
9293
ALTER,
@@ -1038,6 +1039,9 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
10381039
Keyword::SAMPLE,
10391040
Keyword::TABLESAMPLE,
10401041
Keyword::FROM,
1042+
// for GreptimeDB Range select
1043+
Keyword::ALIGN,
1044+
Keyword::FILL,
10411045
];
10421046

10431047
/// Can't be used as a column alias, so that `SELECT <expr> alias`
@@ -1070,6 +1074,9 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[
10701074
Keyword::FROM,
10711075
Keyword::INTO,
10721076
Keyword::END,
1077+
// for GreptimeDB Range select
1078+
Keyword::RANGE,
1079+
Keyword::FILL,
10731080
];
10741081

10751082
// Global list of reserved keywords alloweed after FROM.

0 commit comments

Comments
 (0)