|
| 1 | +// Licensed to the Apache Software Foundation (ASF) under one |
| 2 | +// or more contributor license agreements. See the NOTICE file |
| 3 | +// distributed with this work for additional information |
| 4 | +// regarding copyright ownership. The ASF licenses this file |
| 5 | +// to you under the Apache License, Version 2.0 (the |
| 6 | +// "License"); you may not use this file except in compliance |
| 7 | +// with the License. You may obtain a copy of the License at |
| 8 | +// |
| 9 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +// |
| 11 | +// Unless required by applicable law or agreed to in writing, |
| 12 | +// software distributed under the License is distributed on an |
| 13 | +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 14 | +// KIND, either express or implied. See the License for the |
| 15 | +// specific language governing permissions and limitations |
| 16 | +// under the License. |
| 17 | + |
| 18 | +use crate::ast::{ |
| 19 | + Expr, Function, FunctionArg, FunctionArgExpr, FunctionArgumentList, FunctionArguments, Ident, |
| 20 | + Value, ValueWithSpan, |
| 21 | +}; |
| 22 | +use crate::dialect::Dialect; |
| 23 | +use crate::keywords::Keyword; |
| 24 | +use crate::parser::{Parser, ParserError}; |
| 25 | +use crate::tokenizer::Token; |
| 26 | +use regex::Regex; |
| 27 | +use std::sync::LazyLock; |
| 28 | + |
| 29 | +/// A dialect for [GreptimeDB](https://greptime.com/). |
| 30 | +#[derive(Debug, Copy, Clone)] |
| 31 | +pub struct GreptimeDbDialect; |
| 32 | + |
| 33 | +impl GreptimeDbDialect { |
| 34 | + /// Parse "Range" expr, syntax: |
| 35 | + /// `RANGE [ duration literal | (INTERVAL [interval expr]) ] FILL [ NULL | PREV | LINEAR | x]` |
| 36 | + fn parse_range_expr(&self, parser: &mut Parser, expr: &Expr) -> Result<Expr, ParserError> { |
| 37 | + // consume "RANGE" keyword |
| 38 | + parser.advance_token(); |
| 39 | + |
| 40 | + let range = if parser.consume_token(&Token::LParen) { |
| 41 | + let expr = parser.parse_expr()?; |
| 42 | + parser.expect_token(&Token::RParen)?; |
| 43 | + expr |
| 44 | + } else if let Ok(value) = parser.parse_value() { |
| 45 | + if !is_valid_duration_literal(&value) { |
| 46 | + return Err(ParserError::ParserError(format!( |
| 47 | + r#"Expected valid duration literal, found: "{}""#, |
| 48 | + value |
| 49 | + ))); |
| 50 | + } |
| 51 | + Expr::Value(value) |
| 52 | + } else { |
| 53 | + return parser.expected_ref( |
| 54 | + "duration literal or interval expr", |
| 55 | + parser.get_current_token(), |
| 56 | + ); |
| 57 | + }; |
| 58 | + |
| 59 | + let fill = if parser.parse_keyword(Keyword::FILL) { |
| 60 | + Value::SingleQuotedString(parser.next_token().to_string()) |
| 61 | + } else { |
| 62 | + Value::SingleQuotedString(String::new()) |
| 63 | + }; |
| 64 | + |
| 65 | + // TODO(LFC): rewrite it |
| 66 | + |
| 67 | + // Recursively rewrite function nested in expr to range function when RANGE keyword appear in Expr |
| 68 | + // Treat Function Argument as scalar function, not execute rewrite |
| 69 | + // follow the pattern of `range_fn(func, range, fill)` |
| 70 | + // if `fill` is `None`, the last parameter will be a empty single quoted string for placeholder |
| 71 | + // rate(metrics) RANGE '5m' -> range_fn(rate(metrics), '5m', '') |
| 72 | + // rate() RANGE '5m' FILL MAX -> range_fn(rate(), '5m', 'MAX') |
| 73 | + let mut rewrite_count = 0; |
| 74 | + let expr = rewrite_calculation_expr(&expr, false, &mut |e: &Expr| { |
| 75 | + if matches!(e, Expr::Function(..)) { |
| 76 | + let args = vec![ |
| 77 | + FunctionArg::Unnamed(FunctionArgExpr::Expr(e.clone())), |
| 78 | + FunctionArg::Unnamed(FunctionArgExpr::Expr(range.clone())), |
| 79 | + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(fill.clone().into()))), |
| 80 | + ]; |
| 81 | + let range_func = Function { |
| 82 | + name: vec![Ident::new("range_fn")].into(), |
| 83 | + over: None, |
| 84 | + filter: None, |
| 85 | + null_treatment: None, |
| 86 | + args: FunctionArguments::List(FunctionArgumentList { |
| 87 | + duplicate_treatment: None, |
| 88 | + args, |
| 89 | + clauses: vec![], |
| 90 | + }), |
| 91 | + parameters: FunctionArguments::None, |
| 92 | + within_group: vec![], |
| 93 | + uses_odbc_syntax: false, |
| 94 | + }; |
| 95 | + rewrite_count += 1; |
| 96 | + Ok(Some(Expr::Function(range_func))) |
| 97 | + } else { |
| 98 | + Ok(None) |
| 99 | + } |
| 100 | + })?; |
| 101 | + if rewrite_count == 0 { |
| 102 | + return Err(ParserError::ParserError(format!( |
| 103 | + "Can't use the RANGE keyword in Expr {} without function", |
| 104 | + expr |
| 105 | + ))); |
| 106 | + } |
| 107 | + Ok(expr) |
| 108 | + } |
| 109 | +} |
| 110 | + |
| 111 | +/// Recursively rewrite a nested calculation `Expr` |
| 112 | +/// |
| 113 | +/// The function's return type is `Result<Option<Expr>>>`, where: |
| 114 | +/// |
| 115 | +/// * `Ok(Some(replacement_expr))`: A replacement `Expr` is provided, use replacement `Expr`. |
| 116 | +/// * `Ok(None)`: A replacement `Expr` is not provided, use old `Expr`. |
| 117 | +/// * `Err(err)`: Any error returned. |
| 118 | +fn rewrite_calculation_expr<F>( |
| 119 | + expr: &Expr, |
| 120 | + rewrite_func_expr: bool, |
| 121 | + replacement_fn: &mut F, |
| 122 | +) -> Result<Expr, ParserError> |
| 123 | +where |
| 124 | + F: FnMut(&Expr) -> Result<Option<Expr>, ParserError>, |
| 125 | +{ |
| 126 | + match replacement_fn(expr)? { |
| 127 | + Some(replacement) => Ok(replacement), |
| 128 | + None => match expr { |
| 129 | + Expr::BinaryOp { left, op, right } => Ok(Expr::BinaryOp { |
| 130 | + left: Box::new(rewrite_calculation_expr( |
| 131 | + left, |
| 132 | + rewrite_func_expr, |
| 133 | + replacement_fn, |
| 134 | + )?), |
| 135 | + op: op.clone(), |
| 136 | + right: Box::new(rewrite_calculation_expr( |
| 137 | + right, |
| 138 | + rewrite_func_expr, |
| 139 | + replacement_fn, |
| 140 | + )?), |
| 141 | + }), |
| 142 | + Expr::Nested(expr) => Ok(Expr::Nested(Box::new(rewrite_calculation_expr( |
| 143 | + expr, |
| 144 | + rewrite_func_expr, |
| 145 | + replacement_fn, |
| 146 | + )?))), |
| 147 | + Expr::Cast { |
| 148 | + kind, |
| 149 | + expr, |
| 150 | + data_type, |
| 151 | + format, |
| 152 | + } => Ok(Expr::Cast { |
| 153 | + kind: kind.clone(), |
| 154 | + expr: Box::new(rewrite_calculation_expr( |
| 155 | + expr, |
| 156 | + rewrite_func_expr, |
| 157 | + replacement_fn, |
| 158 | + )?), |
| 159 | + data_type: data_type.clone(), |
| 160 | + format: format.clone(), |
| 161 | + }), |
| 162 | + // Scalar function `ceil(val)` will be parse as `Expr::Ceil` instead of `Expr::Function` |
| 163 | + Expr::Ceil { expr, field } => Ok(Expr::Ceil { |
| 164 | + expr: Box::new(rewrite_calculation_expr( |
| 165 | + expr, |
| 166 | + rewrite_func_expr, |
| 167 | + replacement_fn, |
| 168 | + )?), |
| 169 | + field: field.clone(), |
| 170 | + }), |
| 171 | + // Scalar function `floor(val)` will be parse as `Expr::Floor` instead of `Expr::Function` |
| 172 | + Expr::Floor { expr, field } => Ok(Expr::Floor { |
| 173 | + expr: Box::new(rewrite_calculation_expr( |
| 174 | + expr, |
| 175 | + rewrite_func_expr, |
| 176 | + replacement_fn, |
| 177 | + )?), |
| 178 | + field: field.clone(), |
| 179 | + }), |
| 180 | + Expr::Function(func) if rewrite_func_expr => { |
| 181 | + let mut func = func.clone(); |
| 182 | + if let FunctionArguments::List(args) = &mut func.args { |
| 183 | + for fn_arg in &mut args.args { |
| 184 | + if let FunctionArg::Named { |
| 185 | + arg: FunctionArgExpr::Expr(expr), |
| 186 | + .. |
| 187 | + } |
| 188 | + | FunctionArg::Unnamed(FunctionArgExpr::Expr(expr)) = fn_arg |
| 189 | + { |
| 190 | + *expr = |
| 191 | + rewrite_calculation_expr(expr, rewrite_func_expr, replacement_fn)?; |
| 192 | + } |
| 193 | + } |
| 194 | + } |
| 195 | + Ok(Expr::Function(func)) |
| 196 | + } |
| 197 | + expr => Ok(expr.clone()), |
| 198 | + }, |
| 199 | + } |
| 200 | +} |
| 201 | + |
| 202 | +impl Dialect for GreptimeDbDialect { |
| 203 | + fn is_identifier_start(&self, ch: char) -> bool { |
| 204 | + ch.is_alphabetic() || ch == '_' || ch == '#' || ch == '@' |
| 205 | + } |
| 206 | + |
| 207 | + fn is_identifier_part(&self, ch: char) -> bool { |
| 208 | + ch.is_alphabetic() |
| 209 | + || ch.is_ascii_digit() |
| 210 | + || ch == '@' |
| 211 | + || ch == '$' |
| 212 | + || ch == '#' |
| 213 | + || ch == '_' |
| 214 | + } |
| 215 | + |
| 216 | + fn supports_filter_during_aggregation(&self) -> bool { |
| 217 | + true |
| 218 | + } |
| 219 | + |
| 220 | + fn parse_infix( |
| 221 | + &self, |
| 222 | + parser: &mut Parser, |
| 223 | + expr: &Expr, |
| 224 | + _precedence: u8, |
| 225 | + ) -> Option<Result<Expr, ParserError>> { |
| 226 | + log::debug!("dialect parse_infix: expr {:?}", expr); |
| 227 | + if parser.peek_keyword(Keyword::RANGE) { |
| 228 | + Some(self.parse_range_expr(parser, expr)) |
| 229 | + } else { |
| 230 | + None |
| 231 | + } |
| 232 | + } |
| 233 | + |
| 234 | + fn get_next_precedence(&self, parser: &Parser) -> Option<Result<u8, ParserError>> { |
| 235 | + let token = parser.peek_token(); |
| 236 | + match token.token { |
| 237 | + Token::Word(w) if w.keyword == Keyword::RANGE => Some(Ok(u8::MAX)), |
| 238 | + _ => None, |
| 239 | + } |
| 240 | + } |
| 241 | +} |
| 242 | + |
| 243 | +static DURATION_LITERAL: LazyLock<Regex> = LazyLock::new(|| { |
| 244 | + Regex::new( |
| 245 | + r"(?x) |
| 246 | +^ |
| 247 | +((?P<y>[0-9]+)y)? |
| 248 | +((?P<w>[0-9]+)w)? |
| 249 | +((?P<d>[0-9]+)d)? |
| 250 | +((?P<h>[0-9]+)h)? |
| 251 | +((?P<m>[0-9]+)m)? |
| 252 | +((?P<s>[0-9]+)s)? |
| 253 | +((?P<ms>[0-9]+)ms)? |
| 254 | +$", |
| 255 | + ) |
| 256 | + .unwrap_or_else(|e| panic!("{e}")) |
| 257 | +}); |
| 258 | + |
| 259 | +// Checks if Value is a valid Duration literal. |
| 260 | +// Regular Expression Reference: https://github.com/GreptimeTeam/promql-parser/blob/main/src/util/duration.rs |
| 261 | +fn is_valid_duration_literal(v: &ValueWithSpan) -> bool { |
| 262 | + match &v.value { |
| 263 | + Value::SingleQuotedString(s) | Value::DoubleQuotedString(s) => { |
| 264 | + !s.is_empty() && DURATION_LITERAL.is_match(s) |
| 265 | + } |
| 266 | + _ => false, |
| 267 | + } |
| 268 | +} |
| 269 | + |
| 270 | +#[cfg(test)] |
| 271 | +mod tests { |
| 272 | + use super::*; |
| 273 | + |
| 274 | + #[test] |
| 275 | + fn test_is_valid_duration_literal() { |
| 276 | + // valid |
| 277 | + vec![ |
| 278 | + "1y", "2w", "3d", "4h", "5m", "6s", "7ms", "1y2w3d", "4h30m", "3600ms", |
| 279 | + ] |
| 280 | + .iter() |
| 281 | + .for_each(|x| { |
| 282 | + assert!( |
| 283 | + is_valid_duration_literal(&Value::SingleQuotedString(x.to_string()).into()), |
| 284 | + "{x}" |
| 285 | + ) |
| 286 | + }); |
| 287 | + |
| 288 | + // invalid |
| 289 | + vec!["1", "1y1m1d", "-1w", "1.5d", "d", "", "0"] |
| 290 | + .iter() |
| 291 | + .for_each(|x| { |
| 292 | + assert!( |
| 293 | + !is_valid_duration_literal(&Value::SingleQuotedString(x.to_string()).into()), |
| 294 | + "{x}" |
| 295 | + ) |
| 296 | + }); |
| 297 | + } |
| 298 | +} |
0 commit comments