Skip to content

Commit

Permalink
Add HEX, UNHEX, QUOTE, LTRIM, RTRIM scalar functions.
Browse files Browse the repository at this point in the history
  • Loading branch information
sunli829 committed Nov 26, 2021
1 parent 9359d21 commit 5660c8d
Show file tree
Hide file tree
Showing 19 changed files with 549 additions and 2 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions common/functions/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ sha1 = "0.6.0"
sha2 = "0.9.8"
md5 = "0.7.0"
rand = "0.8.4"
hex = "0.4.3"

[dev-dependencies]
bumpalo = "3.8.0"
Expand Down
135 changes: 135 additions & 0 deletions common/functions/src/scalars/strings/hex.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
// Copyright 2021 Datafuse Labs.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::cmp::Ordering;
use std::fmt;

use common_datavalues::prelude::*;
use common_exception::ErrorCode;
use common_exception::Result;

use crate::scalars::function_factory::FunctionDescription;
use crate::scalars::function_factory::FunctionFeatures;
use crate::scalars::Function;

#[derive(Clone)]
pub struct HexFunction {
_display_name: String,
}

impl HexFunction {
pub fn try_create(display_name: &str) -> Result<Box<dyn Function>> {
Ok(Box::new(HexFunction {
_display_name: display_name.to_string(),
}))
}

pub fn desc() -> FunctionDescription {
FunctionDescription::creator(Box::new(Self::try_create))
.features(FunctionFeatures::default().deterministic())
}
}

impl Function for HexFunction {
fn name(&self) -> &str {
"hex"
}

fn num_arguments(&self) -> usize {
1
}

fn return_type(&self, args: &[DataType]) -> Result<DataType> {
if !args[0].is_integer() && args[0] != DataType::String && args[0] != DataType::Null {
return Err(ErrorCode::IllegalDataType(format!(
"Expected integer or string or null, but got {}",
args[0]
)));
}

Ok(DataType::String)
}

fn nullable(&self, _input_schema: &DataSchema) -> Result<bool> {
Ok(true)
}

fn eval(&self, columns: &DataColumnsWithField, _input_rows: usize) -> Result<DataColumn> {
match columns[0].data_type() {
DataType::UInt8 | DataType::UInt16 | DataType::UInt32 | DataType::UInt64 => {
let mut string_array = StringArrayBuilder::with_capacity(columns[0].column().len());
for value in columns[0]
.column()
.cast_with_type(&DataType::UInt64)?
.to_minimal_array()?
.u64()?
{
string_array.append_option(value.map(|n| format!("{:x}", n)));
}

let column: DataColumn = string_array.finish().into();
Ok(column.resize_constant(columns[0].column().len()))
}
DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64 => {
let mut string_array = StringArrayBuilder::with_capacity(columns[0].column().len());
for value in columns[0]
.column()
.cast_with_type(&DataType::Int64)?
.to_minimal_array()?
.i64()?
{
string_array.append_option(value.map(|n| match n.cmp(&0) {
Ordering::Less => {
format!("-{:x}", n.unsigned_abs())
}
_ => format!("{:x}", n),
}));
}

let column: DataColumn = string_array.finish().into();
Ok(column.resize_constant(columns[0].column().len()))
}
_ => {
const BUFFER_SIZE: usize = 32;
let mut buffer = [0; BUFFER_SIZE * 2];
let mut string_array = StringArrayBuilder::with_capacity(columns[0].column().len());
for value in columns[0]
.column()
.cast_with_type(&DataType::String)?
.to_minimal_array()?
.string()?
{
match value {
Some(value) if value.len() <= BUFFER_SIZE => {
let size = value.len() * 2;
let _ = hex::encode_to_slice(value, &mut buffer[..size]);
string_array.append_value(&buffer[..size])
}
Some(value) => string_array.append_value(hex::encode(value)),
None => string_array.append_null(),
}
}

let column: DataColumn = string_array.finish().into();
Ok(column.resize_constant(columns[0].column().len()))
}
}
}
}

impl fmt::Display for HexFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "HEX")
}
}
11 changes: 11 additions & 0 deletions common/functions/src/scalars/strings/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,22 @@
// See the License for the specific language governing permissions and
// limitations under the License.

mod hex;
mod oct;
mod quote;
mod repeat;
mod string;
mod substring;
mod trim;
mod unhex;

pub use oct::OctFunction;
pub use quote::QuoteFunction;
pub use repeat::RepeatFunction;
pub use string::StringFunction;
pub use substring::SubstringFunction;
pub use trim::LTrimFunction;
pub use trim::RTrimFunction;
pub use unhex::UnhexFunction;

pub use self::hex::HexFunction;
2 changes: 1 addition & 1 deletion common/functions/src/scalars/strings/oct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::cmp::Ordering;
use std::fmt;

use common_arrow::arrow_format::ipc::flatbuffers::bitflags::_core::cmp::Ordering;
use common_datavalues::prelude::*;
use common_exception::ErrorCode;
use common_exception::Result;
Expand Down
111 changes: 111 additions & 0 deletions common/functions/src/scalars/strings/quote.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
// Copyright 2021 Datafuse Labs.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::fmt;

use common_datavalues::prelude::*;
use common_exception::ErrorCode;
use common_exception::Result;

use crate::scalars::function_factory::FunctionDescription;
use crate::scalars::function_factory::FunctionFeatures;
use crate::scalars::Function;

#[derive(Clone)]
pub struct QuoteFunction {
_display_name: String,
}

impl QuoteFunction {
pub fn try_create(display_name: &str) -> Result<Box<dyn Function>> {
Ok(Box::new(QuoteFunction {
_display_name: display_name.to_string(),
}))
}

pub fn desc() -> FunctionDescription {
FunctionDescription::creator(Box::new(Self::try_create))
.features(FunctionFeatures::default().deterministic())
}
}

impl Function for QuoteFunction {
fn name(&self) -> &str {
"quote"
}

fn num_arguments(&self) -> usize {
1
}

fn return_type(&self, args: &[DataType]) -> Result<DataType> {
if args[0] != DataType::String && args[0] != DataType::Null {
return Err(ErrorCode::IllegalDataType(format!(
"Expected string or null, but got {}",
args[0]
)));
}

Ok(DataType::String)
}

fn nullable(&self, _input_schema: &DataSchema) -> Result<bool> {
Ok(true)
}

fn eval(&self, columns: &DataColumnsWithField, _input_rows: usize) -> Result<DataColumn> {
let mut string_array = StringArrayBuilder::with_capacity(columns[0].column().len());
let mut buffer = Vec::new();

for value in columns[0]
.column()
.cast_with_type(&DataType::String)?
.to_minimal_array()?
.string()?
{
match value {
Some(value) => string_array.append_value(quote_string(value, &mut buffer)),
None => string_array.append_null(),
}
}

Ok(string_array.finish().into())
}
}

impl fmt::Display for QuoteFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "QUOTE")
}
}

fn quote_string<'a>(value: &[u8], buffer: &'a mut Vec<u8>) -> &'a [u8] {
buffer.clear();

for ch in value {
match *ch {
0 => buffer.extend_from_slice(&[b'\\', b'0']),
b'\'' => buffer.extend_from_slice(&[b'\\', b'\'']),
b'\"' => buffer.extend_from_slice(&[b'\\', b'\"']),
8 => buffer.extend_from_slice(&[b'\\', b'b']),
b'\n' => buffer.extend_from_slice(&[b'\\', b'n']),
b'\r' => buffer.extend_from_slice(&[b'\\', b'r']),
b'\t' => buffer.extend_from_slice(&[b'\\', b't']),
b'\\' => buffer.extend_from_slice(&[b'\\', b'\\']),
_ => buffer.push(*ch),
}
}

&buffer[..buffer.len()]
}
12 changes: 11 additions & 1 deletion common/functions/src/scalars/strings/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,14 @@
// limitations under the License.

use crate::scalars::function_factory::FunctionFactory;
use crate::scalars::strings::repeat::RepeatFunction;
use crate::scalars::HexFunction;
use crate::scalars::LTrimFunction;
use crate::scalars::OctFunction;
use crate::scalars::QuoteFunction;
use crate::scalars::RTrimFunction;
use crate::scalars::RepeatFunction;
use crate::scalars::SubstringFunction;
use crate::scalars::UnhexFunction;

#[derive(Clone)]
pub struct StringFunction;
Expand All @@ -25,5 +30,10 @@ impl StringFunction {
factory.register("substring", SubstringFunction::desc());
factory.register("oct", OctFunction::desc());
factory.register("repeat", RepeatFunction::desc());
factory.register("ltrim", LTrimFunction::desc());
factory.register("rtrim", RTrimFunction::desc());
factory.register("hex", HexFunction::desc());
factory.register("unhex", UnhexFunction::desc());
factory.register("quote", QuoteFunction::desc());
}
}
Loading

0 comments on commit 5660c8d

Please sign in to comment.