Skip to content

Commit

Permalink
feat: issue #8969 adding position function
Browse files Browse the repository at this point in the history
adding proto
  • Loading branch information
Lordworms committed Jan 25, 2024
1 parent 5e9c9a1 commit 2ef9087
Show file tree
Hide file tree
Showing 12 changed files with 173 additions and 10 deletions.
10 changes: 9 additions & 1 deletion datafusion/expr/src/built_in_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,8 @@ pub enum BuiltinScalarFunction {
NullIf,
/// octet_length
OctetLength,
/// position
Position,
/// random
Random,
/// regexp_replace
Expand Down Expand Up @@ -460,6 +462,7 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::MD5 => Volatility::Immutable,
BuiltinScalarFunction::NullIf => Volatility::Immutable,
BuiltinScalarFunction::OctetLength => Volatility::Immutable,
BuiltinScalarFunction::Position => Volatility::Immutable,
BuiltinScalarFunction::Radians => Volatility::Immutable,
BuiltinScalarFunction::RegexpReplace => Volatility::Immutable,
BuiltinScalarFunction::Repeat => Volatility::Immutable,
Expand Down Expand Up @@ -735,6 +738,9 @@ impl BuiltinScalarFunction {
utf8_to_int_type(&input_expr_types[0], "octet_length")
}
BuiltinScalarFunction::Pi => Ok(Float64),
BuiltinScalarFunction::Position => {
utf8_to_int_type(&input_expr_types[0], "position")
}
BuiltinScalarFunction::Random => Ok(Float64),
BuiltinScalarFunction::Uuid => Ok(Utf8),
BuiltinScalarFunction::RegexpReplace => {
Expand Down Expand Up @@ -1225,7 +1231,8 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::EndsWith
| BuiltinScalarFunction::InStr
| BuiltinScalarFunction::Strpos
| BuiltinScalarFunction::StartsWith => Signature::one_of(
| BuiltinScalarFunction::StartsWith
| BuiltinScalarFunction::Position => Signature::one_of(
vec![
Exact(vec![Utf8, Utf8]),
Exact(vec![Utf8, LargeUtf8]),
Expand Down Expand Up @@ -1498,6 +1505,7 @@ impl BuiltinScalarFunction {
BuiltinScalarFunction::Reverse => &["reverse"],
BuiltinScalarFunction::Right => &["right"],
BuiltinScalarFunction::Rpad => &["rpad"],
BuiltinScalarFunction::Position => &["position"],
BuiltinScalarFunction::Rtrim => &["rtrim"],
BuiltinScalarFunction::SplitPart => &["split_part"],
BuiltinScalarFunction::StringToArray => {
Expand Down
6 changes: 6 additions & 0 deletions datafusion/expr/src/expr_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,12 @@ scalar_expr!(
string,
"returns the number of bytes of a string"
);
scalar_expr!(
Position,
position,
substring string,
"return the position of the appearence of `substring` in `string`"
);
scalar_expr!(Replace, replace, string from to, "replaces all occurrences of `from` with `to` in the `string`");
scalar_expr!(Repeat, repeat, string n, "repeats the `string` to `n` times");
scalar_expr!(Reverse, reverse, string, "reverses the `string`");
Expand Down
11 changes: 11 additions & 0 deletions datafusion/physical-expr/src/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -938,6 +938,17 @@ pub fn create_physical_fun(
"Unsupported data type {other:?} for function overlay",
))),
}),
BuiltinScalarFunction::Position => Arc::new(|args| match args[0].data_type() {
DataType::Utf8 => {
make_scalar_function_inner(string_expressions::position::<i32>)(args)
}
DataType::LargeUtf8 => {
make_scalar_function_inner(string_expressions::position::<i64>)(args)
}
other => Err(DataFusionError::Internal(format!(
"Unsupported data type {other:?} for function position"
))),
}),
BuiltinScalarFunction::Levenshtein => {
Arc::new(|args| match args[0].data_type() {
DataType::Utf8 => make_scalar_function_inner(
Expand Down
53 changes: 53 additions & 0 deletions datafusion/physical-expr/src/string_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,48 @@ pub fn uuid(args: &[ColumnarValue]) -> Result<ColumnarValue> {
let array = GenericStringArray::<i32>::from_iter_values(values);
Ok(ColumnarValue::Array(Arc::new(array)))
}
/// position function, similar logic as instr
/// position('world' in 'Helloworld') = 6
pub fn position<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
let substr_arr = as_generic_string_array::<T>(&args[0])?;
let str_arr = as_generic_string_array::<T>(&args[1])?;

match args[0].data_type() {
DataType::Utf8 => {
let result = str_arr
.iter()
.zip(substr_arr.iter())
.map(|(string, substr)| match (string, substr) {
(Some(string), Some(substr)) => string
.find(substr)
.map_or(Some(0), |index| Some((index + 1) as i32)),
_ => None,
})
.collect::<Int32Array>();

Ok(Arc::new(result) as ArrayRef)
}
DataType::LargeUtf8 => {
let result = str_arr
.iter()
.zip(substr_arr.iter())
.map(|(string, substr)| match (string, substr) {
(Some(string), Some(substr)) => string
.find(substr)
.map_or(Some(0), |index| Some((index + 1) as i64)),
_ => None,
})
.collect::<Int64Array>();

Ok(Arc::new(result) as ArrayRef)
}
other => {
internal_err!(
"position was called with {other} datatype arguments. It requires Utf8 or LargeUtf8."
)
}
}
}

/// OVERLAY(string1 PLACING string2 FROM integer FOR integer2)
/// Replaces a substring of string1 with string2 starting at the integer bit
Expand Down Expand Up @@ -787,4 +829,15 @@ mod tests {

Ok(())
}
#[test]
fn to_position() -> Result<()> {
let substr_arr = Arc::new(StringArray::from(vec!["world"]));
let str_arr = Arc::new(StringArray::from(vec!["Hello, world"]));
let res = position::<i32>(&[substr_arr, str_arr]).unwrap();
let result =
as_int32_array(&res).expect("failed to initialized function position");
let expected = Int32Array::from(vec![8]);
assert_eq!(&expected, result);
Ok(())
}
}
1 change: 1 addition & 0 deletions datafusion/proto/proto/datafusion.proto
Original file line number Diff line number Diff line change
Expand Up @@ -671,6 +671,7 @@ enum ScalarFunction {
ArrayResize = 130;
EndsWith = 131;
InStr = 132;
Position = 133;
}

message ScalarFunctionNode {
Expand Down
3 changes: 3 additions & 0 deletions datafusion/proto/src/generated/pbjson.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions datafusion/proto/src/generated/prost.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 12 additions & 7 deletions datafusion/proto/src/logical_plan/from_proto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,13 @@ use datafusion_expr::{
factorial, find_in_set, flatten, floor, from_unixtime, gcd, gen_range, initcap,
instr, isnan, iszero, lcm, left, levenshtein, ln, log, log10, log2,
logical_plan::{PlanType, StringifiedPlan},
lower, lpad, ltrim, md5, nanvl, now, nullif, octet_length, overlay, pi, power,
radians, random, regexp_match, regexp_replace, repeat, replace, reverse, right,
round, rpad, rtrim, sha224, sha256, sha384, sha512, signum, sin, sinh, split_part,
sqrt, starts_with, string_to_array, strpos, struct_fun, substr, substr_index,
substring, tan, tanh, to_hex, translate, trim, trunc, upper, uuid, AggregateFunction,
Between, BinaryExpr, BuiltInWindowFunction, BuiltinScalarFunction, Case, Cast, Expr,
GetFieldAccess, GetIndexedField, GroupingSet,
lower, lpad, ltrim, md5, nanvl, now, nullif, octet_length, overlay, pi, position,
power, radians, random, regexp_match, regexp_replace, repeat, replace, reverse,
right, round, rpad, rtrim, sha224, sha256, sha384, sha512, signum, sin, sinh,
split_part, sqrt, starts_with, string_to_array, strpos, struct_fun, substr,
substr_index, substring, tan, tanh, to_hex, translate, trim, trunc, upper, uuid,
AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, BuiltinScalarFunction,
Case, Cast, Expr, GetFieldAccess, GetIndexedField, GroupingSet,
GroupingSet::GroupingSets,
JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, WindowFrameBound,
WindowFrameUnits,
Expand Down Expand Up @@ -534,6 +534,7 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
ScalarFunction::InStr => Self::InStr,
ScalarFunction::Left => Self::Left,
ScalarFunction::Lpad => Self::Lpad,
ScalarFunction::Position => Self::Position,
ScalarFunction::Random => Self::Random,
ScalarFunction::RegexpReplace => Self::RegexpReplace,
ScalarFunction::Repeat => Self::Repeat,
Expand Down Expand Up @@ -1592,6 +1593,10 @@ pub fn parse_expr(
parse_expr(&args[0], registry)?,
parse_expr(&args[1], registry)?,
)),
ScalarFunction::Position => Ok(position(
parse_expr(&args[0], registry)?,
parse_expr(&args[1], registry)?,
)),
ScalarFunction::Gcd => Ok(gcd(
parse_expr(&args[0], registry)?,
parse_expr(&args[1], registry)?,
Expand Down
1 change: 1 addition & 0 deletions datafusion/proto/src/logical_plan/to_proto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1530,6 +1530,7 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
BuiltinScalarFunction::InStr => Self::InStr,
BuiltinScalarFunction::Left => Self::Left,
BuiltinScalarFunction::Lpad => Self::Lpad,
BuiltinScalarFunction::Position => Self::Position,
BuiltinScalarFunction::Random => Self::Random,
BuiltinScalarFunction::Uuid => Self::Uuid,
BuiltinScalarFunction::RegexpReplace => Self::RegexpReplace,
Expand Down
19 changes: 17 additions & 2 deletions datafusion/sql/src/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
SQLExpr::Struct { values, fields } => {
self.parse_struct(values, fields, schema, planner_context)
}

SQLExpr::Position { expr, r#in } => {
self.sql_position_to_expr(*expr, *r#in, schema, planner_context)
}
_ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"),
}
}
Expand Down Expand Up @@ -704,7 +706,20 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
};
Ok(Expr::ScalarFunction(ScalarFunction::new(fun, args)))
}

fn sql_position_to_expr(
&self,
substr_expr: SQLExpr,
str_expr: SQLExpr,
schema: &DFSchema,
planner_context: &mut PlannerContext,
) -> Result<Expr> {
let fun = BuiltinScalarFunction::Position;
let substr =
self.sql_expr_to_logical_expr(substr_expr, schema, planner_context)?;
let fullstr = self.sql_expr_to_logical_expr(str_expr, schema, planner_context)?;
let args = vec![substr, fullstr];
Ok(Expr::ScalarFunction(ScalarFunction::new(fun, args)))
}
fn sql_agg_with_filter_to_expr(
&self,
expr: SQLExpr,
Expand Down
43 changes: 43 additions & 0 deletions datafusion/sqllogictest/test_files/position.slt
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# test position in select
query I
select position('world' in 'hello world');
----
7



# test in expression
query I
select 1000 where position('world' in 'hello world') != 100;
----
1000


# test in expression
query I
select 100000 where position('legend' in 'league of legend') = 11;
----
100000


# test in expression
query I
select 100000 where position('legend' in 'league of legend') != 11;
----
14 changes: 14 additions & 0 deletions docs/source/user-guide/sql/scalar_functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,7 @@ nullif(expression1, expression2)
- [levenshtein](#levenshtein)
- [substr_index](#substr_index)
- [find_in_set](#find_in_set)
- [position](#position)

### `ascii`

Expand Down Expand Up @@ -1300,6 +1301,19 @@ regexp_replace(str, regexp, replacement, flags)
- **g**: (global) Search globally and don't return after the first match.
- **i**: (insensitive) Ignore case when matching.

### `position`

Returns the position of substr in orig_str

```
position(substr in origstr)
```

#### Arguments

- **substr**: he pattern string.
- **origstr**: The model string.

## Time and Date Functions

- [now](#now)
Expand Down

0 comments on commit 2ef9087

Please sign in to comment.