Skip to content

Commit

Permalink
Match array min/max behavior with presto. (facebookincubator#7128)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: facebookincubator#7128

Based on  prestodb/presto#21065

Differential Revision: D50386287

fbshipit-source-id: 404c7c885c54d3f70215811a1e2e7bc5874c50c9
  • Loading branch information
amitkdutta authored and facebook-github-bot committed Oct 19, 2023
1 parent 9d9cf5c commit ae9418c
Show file tree
Hide file tree
Showing 5 changed files with 134 additions and 10 deletions.
8 changes: 6 additions & 2 deletions velox/docs/functions/presto/array.rst
Original file line number Diff line number Diff line change
Expand Up @@ -93,21 +93,25 @@ Array Functions

.. function:: array_max(array(E)) -> E

Returns the maximum value of input array. ::
Returns the maximum value of input array. If the array contains a NAN element, NAN is returned regardless of array contains any NULL or Not.
If the array contains a NULL element, but no element is NAN, NULL is returned. ::

SELECT array_max(ARRAY [1, 2, 3]); -- 3
SELECT array_max(ARRAY [-1, -2, -2]); -- -1
SELECT array_max(ARRAY [-1, -2, NULL]); -- NULL
SELECT array_max(ARRAY []); -- NULL
SELECT array_max(ARRAY[3, -1, NULL, NULL, nan()]); -- NaN

.. function:: array_min(array(E)) -> E

Returns the minimum value of input array. ::
Returns the minimum value of input array. If the array contains a NAN element, NAN is returned regardless of array contains any NULL or Not.
If the array contains a NULL element, but no element is NAN, NULL is returned. ::

SELECT array_min(ARRAY [1, 2, 3]); -- 1
SELECT array_min(ARRAY [-1, -2, -2]); -- -2
SELECT array_min(ARRAY [-1, -2, NULL]); -- NULL
SELECT array_min(ARRAY []); -- NULL
SELECT array_min(ARRAY[-0.0001, -0.0002, -0.0003, nan()]); -- NaN

.. function:: array_normalize(array(E), E) -> array(E)

Expand Down
78 changes: 78 additions & 0 deletions velox/functions/prestosql/ArrayFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,90 @@ struct ArrayMinMaxFunction {
}
};

template <typename TExecCtx, bool isMax>
struct ArrayMinMaxFloatFunction {
VELOX_DEFINE_FUNCTION_TYPES(TExecCtx);

template <typename T>
void update(T& currentValue, const T& candidateValue) {
if constexpr (isMax) {
if (candidateValue > currentValue) {
currentValue = candidateValue;
}
} else {
if (candidateValue < currentValue) {
currentValue = candidateValue;
}
}
}

template <typename TReturn, typename TInput>
FOLLY_ALWAYS_INLINE bool call(TReturn& out, const TInput& array) {
if (array.size() == 0) {
return false;
}

bool hasNull = false;
auto it = array.begin();

// Find the first non-null item (if any)
while (it != array.end()) {
if (it->has_value()) {
break;
} else {
hasNull = true;
}
++it;
}

// If first non-null item is NAN, return immediately.
auto currentValue = it->value();
if (std::isnan(currentValue)) {
out = currentValue;
return true;
}

++it;
while (it != array.end()) {
if (it->has_value()) {
auto newValue = it->value();
if (std::isnan(newValue)) {
out = newValue;
return true;
}
update(currentValue, newValue);
} else {
hasNull = true;
}
++it;
}

// If we found a null, return false. Note that, if we found
// a NAN, the function will return at earlier stage as soon as
// a NAN is observed.
if (hasNull) {
return false;
}

out = currentValue;
return true;
}
};

template <typename TExecCtx>
struct ArrayMinFunction : public ArrayMinMaxFunction<TExecCtx, false> {};

template <typename TExecCtx>
struct ArrayMaxFunction : public ArrayMinMaxFunction<TExecCtx, true> {};

template <typename TExecCtx>
struct ArrayMinFunctionFloat
: public ArrayMinMaxFloatFunction<TExecCtx, false> {};

template <typename TExecCtx>
struct ArrayMaxFunctionFloat : public ArrayMinMaxFloatFunction<TExecCtx, true> {
};

template <typename TExecCtx, typename T>
struct ArrayJoinFunction {
VELOX_DEFINE_FUNCTION_TYPES(TExecCtx);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ inline void registerArrayMinMaxFunctions(const std::string& prefix) {
registerFunction<ArrayMaxFunction, T, Array<T>>({prefix + "array_max"});
}

template <typename T>
inline void registerArrayMinMaxFloatFunctions(const std::string& prefix) {
registerFunction<ArrayMinFunctionFloat, T, Array<T>>({prefix + "array_min"});
registerFunction<ArrayMaxFunctionFloat, T, Array<T>>({prefix + "array_max"});
}

template <typename T>
inline void registerArrayJoinFunctions(const std::string& prefix) {
registerFunction<
Expand Down Expand Up @@ -152,12 +158,12 @@ void registerArrayFunctions(const std::string& prefix) {
registerArrayMinMaxFunctions<int32_t>(prefix);
registerArrayMinMaxFunctions<int64_t>(prefix);
registerArrayMinMaxFunctions<int128_t>(prefix);
registerArrayMinMaxFunctions<float>(prefix);
registerArrayMinMaxFunctions<double>(prefix);
registerArrayMinMaxFunctions<bool>(prefix);
registerArrayMinMaxFunctions<Varchar>(prefix);
registerArrayMinMaxFunctions<Timestamp>(prefix);
registerArrayMinMaxFunctions<Date>(prefix);
registerArrayMinMaxFloatFunctions<float>(prefix);
registerArrayMinMaxFloatFunctions<double>(prefix);

registerArrayJoinFunctions<int8_t>(prefix);
registerArrayJoinFunctions<int16_t>(prefix);
Expand Down
31 changes: 28 additions & 3 deletions velox/functions/prestosql/tests/ArrayMaxTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,17 @@ TEST_F(ArrayMaxTest, longVarcharNoNulls) {
// Test documented example.
TEST_F(ArrayMaxTest, docs) {
auto input = makeNullableArrayVector<int32_t>(
{{1, 2, 3}, {-1, -2, -2}, {-1, -2, std::nullopt}, {}});
auto expected =
makeNullableFlatVector<int32_t>({3, -1, std::nullopt, std::nullopt});
{{1, 2, 3},
{-1, -2, -2},
{-1, -2, std::nullopt},
{},
{-0.0001, -0.0002, -0.0003, std::numeric_limits<double>::quiet_NaN()}});
auto expected = makeNullableFlatVector<int32_t>(
{3,
-1,
std::nullopt,
std::nullopt,
std::numeric_limits<double>::quiet_NaN()});
testArrayMax(input, expected);
}

Expand Down Expand Up @@ -278,6 +286,19 @@ class ArrayMaxFloatingPointTest : public FunctionBaseTest {
0.0001});
testArrayMax(input, expected);
}

void testNan() {
auto input = makeArrayVector<T>(
{{std::numeric_limits<T>::lowest(),
-0.0001,
-0.0002,
-0.0003,
std::numeric_limits<T>::max(),
std::numeric_limits<T>::quiet_NaN()}});
auto expected =
makeNullableFlatVector<T>({std::numeric_limits<T>::quiet_NaN()});
testArrayMax(input, expected);
}
};

} // namespace
Expand All @@ -303,3 +324,7 @@ TYPED_TEST(ArrayMaxFloatingPointTest, arrayMaxNullable) {
TYPED_TEST(ArrayMaxFloatingPointTest, arrayMax) {
this->testNoNulls();
}

TYPED_TEST(ArrayMaxFloatingPointTest, arrayMaxNan) {
this->testNan();
}
17 changes: 14 additions & 3 deletions velox/functions/prestosql/tests/ArrayMinTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,20 @@ class ArrayMinTest : public FunctionBaseTest {

void testDocExample() {
auto arrayVector = makeNullableArrayVector<int64_t>(
{{1, 2, 3}, {-1, -2, -2}, {-1, -2, std::nullopt}, {}});
auto expected =
makeNullableFlatVector<int64_t>({1, -2, std::nullopt, std::nullopt});
{{1, 2, 3},
{-1, -2, -2},
{-1, -2, std::nullopt},
{},
{-0.0001,
-0.0002,
-0.0003,
std::numeric_limits<double>::quiet_NaN()}});
auto expected = makeNullableFlatVector<int64_t>(
{1,
-2,
std::nullopt,
std::nullopt,
std::numeric_limits<double>::quiet_NaN()});
testExpr<int64_t>(expected, "array_min(C0)", {arrayVector});
}

Expand Down

0 comments on commit ae9418c

Please sign in to comment.