Match array min/max behavior with presto. (facebookincubator#7128)

Summary: Pull Request resolved: facebookincubator#7128 Based on prestodb/presto#21065 Differential Revision: D50386287 fbshipit-source-id: 404c7c885c54d3f70215811a1e2e7bc5874c50c9
amitkdutta · Oct 19, 2023 · ae9418c · ae9418c
1 parent 9d9cf5c
commit ae9418c
Show file tree

Hide file tree

Showing 5 changed files with 134 additions and 10 deletions.
diff --git a/velox/docs/functions/presto/array.rst b/velox/docs/functions/presto/array.rst
@@ -93,21 +93,25 @@ Array Functions
 
 .. function:: array_max(array(E)) -> E
 
-    Returns the maximum value of input array. ::
+    Returns the maximum value of input array. If the array contains a NAN element, NAN is returned regardless of array contains any NULL or Not.
+    If the array contains a NULL element, but no element is NAN, NULL is returned. ::
 
         SELECT array_max(ARRAY [1, 2, 3]); -- 3
         SELECT array_max(ARRAY [-1, -2, -2]); -- -1
         SELECT array_max(ARRAY [-1, -2, NULL]); -- NULL
         SELECT array_max(ARRAY []); -- NULL
+        SELECT array_max(ARRAY[3, -1, NULL, NULL, nan()]); -- NaN
 
 .. function:: array_min(array(E)) -> E
 
-    Returns the minimum value of input array. ::
+    Returns the minimum value of input array. If the array contains a NAN element, NAN is returned regardless of array contains any NULL or Not.
+    If the array contains a NULL element, but no element is NAN, NULL is returned. ::
 
         SELECT array_min(ARRAY [1, 2, 3]); -- 1
         SELECT array_min(ARRAY [-1, -2, -2]); -- -2
         SELECT array_min(ARRAY [-1, -2, NULL]); -- NULL
         SELECT array_min(ARRAY []); -- NULL
+        SELECT array_min(ARRAY[-0.0001, -0.0002, -0.0003, nan()]); -- NaN
 
 .. function:: array_normalize(array(E), E) -> array(E)
 

diff --git a/velox/functions/prestosql/ArrayFunctions.h b/velox/functions/prestosql/ArrayFunctions.h
@@ -90,12 +90,90 @@ struct ArrayMinMaxFunction {
   }
 };
 
+template <typename TExecCtx, bool isMax>
+struct ArrayMinMaxFloatFunction {
+  VELOX_DEFINE_FUNCTION_TYPES(TExecCtx);
+
+  template <typename T>
+  void update(T& currentValue, const T& candidateValue) {
+    if constexpr (isMax) {
+      if (candidateValue > currentValue) {
+        currentValue = candidateValue;
+      }
+    } else {
+      if (candidateValue < currentValue) {
+        currentValue = candidateValue;
+      }
+    }
+  }
+
+  template <typename TReturn, typename TInput>
+  FOLLY_ALWAYS_INLINE bool call(TReturn& out, const TInput& array) {
+    if (array.size() == 0) {
+      return false;
+    }
+
+    bool hasNull = false;
+    auto it = array.begin();
+
+    // Find the first non-null item (if any)
+    while (it != array.end()) {
+      if (it->has_value()) {
+        break;
+      } else {
+        hasNull = true;
+      }
+      ++it;
+    }
+
+    // If first non-null item is NAN, return immediately.
+    auto currentValue = it->value();
+    if (std::isnan(currentValue)) {
+      out = currentValue;
+      return true;
+    }
+
+    ++it;
+    while (it != array.end()) {
+      if (it->has_value()) {
+        auto newValue = it->value();
+        if (std::isnan(newValue)) {
+          out = newValue;
+          return true;
+        }
+        update(currentValue, newValue);
+      } else {
+        hasNull = true;
+      }
+      ++it;
+    }
+
+    // If we found a null, return false. Note that, if we found
+    // a NAN, the function will return at earlier stage as soon as
+    // a NAN is observed.
+    if (hasNull) {
+      return false;
+    }
+
+    out = currentValue;
+    return true;
+  }
+};
+
 template <typename TExecCtx>
 struct ArrayMinFunction : public ArrayMinMaxFunction<TExecCtx, false> {};
 
 template <typename TExecCtx>
 struct ArrayMaxFunction : public ArrayMinMaxFunction<TExecCtx, true> {};
 
+template <typename TExecCtx>
+struct ArrayMinFunctionFloat
+    : public ArrayMinMaxFloatFunction<TExecCtx, false> {};
+
+template <typename TExecCtx>
+struct ArrayMaxFunctionFloat : public ArrayMinMaxFloatFunction<TExecCtx, true> {
+};
+
 template <typename TExecCtx, typename T>
 struct ArrayJoinFunction {
   VELOX_DEFINE_FUNCTION_TYPES(TExecCtx);

diff --git a/velox/functions/prestosql/registration/ArrayFunctionsRegistration.cpp b/velox/functions/prestosql/registration/ArrayFunctionsRegistration.cpp
@@ -27,6 +27,12 @@ inline void registerArrayMinMaxFunctions(const std::string& prefix) {
   registerFunction<ArrayMaxFunction, T, Array<T>>({prefix + "array_max"});
 }
 
+template <typename T>
+inline void registerArrayMinMaxFloatFunctions(const std::string& prefix) {
+  registerFunction<ArrayMinFunctionFloat, T, Array<T>>({prefix + "array_min"});
+  registerFunction<ArrayMaxFunctionFloat, T, Array<T>>({prefix + "array_max"});
+}
+
 template <typename T>
 inline void registerArrayJoinFunctions(const std::string& prefix) {
   registerFunction<
@@ -152,12 +158,12 @@ void registerArrayFunctions(const std::string& prefix) {
   registerArrayMinMaxFunctions<int32_t>(prefix);
   registerArrayMinMaxFunctions<int64_t>(prefix);
   registerArrayMinMaxFunctions<int128_t>(prefix);
-  registerArrayMinMaxFunctions<float>(prefix);
-  registerArrayMinMaxFunctions<double>(prefix);
   registerArrayMinMaxFunctions<bool>(prefix);
   registerArrayMinMaxFunctions<Varchar>(prefix);
   registerArrayMinMaxFunctions<Timestamp>(prefix);
   registerArrayMinMaxFunctions<Date>(prefix);
+  registerArrayMinMaxFloatFunctions<float>(prefix);
+  registerArrayMinMaxFloatFunctions<double>(prefix);
 
   registerArrayJoinFunctions<int8_t>(prefix);
   registerArrayJoinFunctions<int16_t>(prefix);

diff --git a/velox/functions/prestosql/tests/ArrayMaxTest.cpp b/velox/functions/prestosql/tests/ArrayMaxTest.cpp
@@ -146,9 +146,17 @@ TEST_F(ArrayMaxTest, longVarcharNoNulls) {
 // Test documented example.
 TEST_F(ArrayMaxTest, docs) {
   auto input = makeNullableArrayVector<int32_t>(
-      {{1, 2, 3}, {-1, -2, -2}, {-1, -2, std::nullopt}, {}});
-  auto expected =
-      makeNullableFlatVector<int32_t>({3, -1, std::nullopt, std::nullopt});
+      {{1, 2, 3},
+       {-1, -2, -2},
+       {-1, -2, std::nullopt},
+       {},
+       {-0.0001, -0.0002, -0.0003, std::numeric_limits<double>::quiet_NaN()}});
+  auto expected = makeNullableFlatVector<int32_t>(
+      {3,
+       -1,
+       std::nullopt,
+       std::nullopt,
+       std::numeric_limits<double>::quiet_NaN()});
   testArrayMax(input, expected);
 }
 
@@ -278,6 +286,19 @@ class ArrayMaxFloatingPointTest : public FunctionBaseTest {
          0.0001});
     testArrayMax(input, expected);
   }
+
+  void testNan() {
+    auto input = makeArrayVector<T>(
+        {{std::numeric_limits<T>::lowest(),
+          -0.0001,
+          -0.0002,
+          -0.0003,
+          std::numeric_limits<T>::max(),
+          std::numeric_limits<T>::quiet_NaN()}});
+    auto expected =
+        makeNullableFlatVector<T>({std::numeric_limits<T>::quiet_NaN()});
+    testArrayMax(input, expected);
+  }
 };
 
 } // namespace
@@ -303,3 +324,7 @@ TYPED_TEST(ArrayMaxFloatingPointTest, arrayMaxNullable) {
 TYPED_TEST(ArrayMaxFloatingPointTest, arrayMax) {
   this->testNoNulls();
 }
+
+TYPED_TEST(ArrayMaxFloatingPointTest, arrayMaxNan) {
+  this->testNan();
+}
diff --git a/velox/functions/prestosql/tests/ArrayMinTest.cpp b/velox/functions/prestosql/tests/ArrayMinTest.cpp
@@ -37,9 +37,20 @@ class ArrayMinTest : public FunctionBaseTest {
 
   void testDocExample() {
     auto arrayVector = makeNullableArrayVector<int64_t>(
-        {{1, 2, 3}, {-1, -2, -2}, {-1, -2, std::nullopt}, {}});
-    auto expected =
-        makeNullableFlatVector<int64_t>({1, -2, std::nullopt, std::nullopt});
+        {{1, 2, 3},
+         {-1, -2, -2},
+         {-1, -2, std::nullopt},
+         {},
+         {-0.0001,
+          -0.0002,
+          -0.0003,
+          std::numeric_limits<double>::quiet_NaN()}});
+    auto expected = makeNullableFlatVector<int64_t>(
+        {1,
+         -2,
+         std::nullopt,
+         std::nullopt,
+         std::numeric_limits<double>::quiet_NaN()});
     testExpr<int64_t>(expected, "array_min(C0)", {arrayVector});
   }