From 5877e8b713742b6d80bd1aa9819094be029e3e1f Mon Sep 17 00:00:00 2001
From: Sychev Vadim <svddroid@gmail.com>
Date: Thu, 23 Jun 2022 19:36:47 +0300
Subject: [PATCH] Optimization. Use Min/Max intrinsics if one of arguments is
 constant. (#69434)

* Add xarch optimization for min/max (#65625)

* Changes according to the requirements (#65625)

* Draft for Math.Max/Math.Min optimization (#65625)

* Draft for optimizing Math.Max/Math.Min with a const (#65625)

* Fix tests (#65625)

* Refactoring of the conditions (#65625)

* Fix of the summary (#65625)

* Refactoring due to the PR comments (#65625)

* Add spilling side effect + Fix of formats (#65625)

* Update src/coreclr/jit/importer.cpp

Co-authored-by: Jakob Botsch Nielsen <Jakob.botsch.nielsen@gmail.com>

* Update src/coreclr/jit/importer.cpp

Co-authored-by: Jakob Botsch Nielsen <Jakob.botsch.nielsen@gmail.com>

Co-authored-by: Jakob Botsch Nielsen <Jakob.botsch.nielsen@gmail.com>
---
 src/coreclr/jit/gentree.h    |  38 +++++++++
 src/coreclr/jit/importer.cpp | 153 ++++++++++++++++++++++++++++++++++-
 src/coreclr/jit/utils.cpp    |  16 ++++
 src/coreclr/jit/utils.h      |   2 +
 4 files changed, 208 insertions(+), 1 deletion(-)

diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h
index de7cf8bdf4950..52e18a84d8513 100644
--- a/src/coreclr/jit/gentree.h
+++ b/src/coreclr/jit/gentree.h
@@ -1770,7 +1770,9 @@ struct GenTree
 #endif // DEBUG
 
     inline bool IsIntegralConst(ssize_t constVal) const;
+    inline bool IsFloatNaN() const;
     inline bool IsFloatPositiveZero() const;
+    inline bool IsFloatNegativeZero() const;
     inline bool IsVectorZero() const;
     inline bool IsVectorAllBitsSet() const;
     inline bool IsVectorConst();
@@ -8326,6 +8328,42 @@ inline bool GenTree::IsIntegralConst(ssize_t constVal) const
     return false;
 }
 
+//-------------------------------------------------------------------
+// IsFloatNaN: returns true if this is exactly a const float value of NaN
+//
+// Returns:
+//     True if this represents a const floating-point value of NaN.
+//     Will return false otherwise.
+//
+inline bool GenTree::IsFloatNaN() const
+{
+    if (IsCnsFltOrDbl())
+    {
+        double constValue = AsDblCon()->gtDconVal;
+        return FloatingPointUtils::isNaN(constValue);
+    }
+
+    return false;
+}
+
+//-------------------------------------------------------------------
+// IsFloatNegativeZero: returns true if this is exactly a const float value of negative zero (-0.0)
+//
+// Returns:
+//     True if this represents a const floating-point value of exactly negative zero (-0.0).
+//     Will return false if the value is negative zero (+0.0).
+//
+inline bool GenTree::IsFloatNegativeZero() const
+{
+    if (IsCnsFltOrDbl())
+    {
+        double constValue = AsDblCon()->gtDconVal;
+        return FloatingPointUtils::isNegativeZero(constValue);
+    }
+
+    return false;
+}
+
 //-------------------------------------------------------------------
 // IsFloatPositiveZero: returns true if this is exactly a const float value of postive zero (+0.0)
 //
diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index 0bbd79223045d..ddb9184a7dfee 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -4461,13 +4461,164 @@ GenTree* Compiler::impIntrinsic(GenTree*                newobjThis,
             case NI_System_Math_Log:
             case NI_System_Math_Log2:
             case NI_System_Math_Log10:
-#ifdef TARGET_ARM64
+            {
+                retNode = impMathIntrinsic(method, sig, callType, ni, tailCall);
+                break;
+            }
+#if defined(TARGET_ARM64)
             // ARM64 has fmax/fmin which are IEEE754:2019 minimum/maximum compatible
             // TODO-XARCH-CQ: Enable this for XARCH when one of the arguments is a constant
             // so we can then emit maxss/minss and avoid NaN/-0.0 handling
             case NI_System_Math_Max:
             case NI_System_Math_Min:
 #endif
+
+#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH)
+            case NI_System_Math_Max:
+            case NI_System_Math_Min:
+            {
+                assert(varTypeIsFloating(callType));
+                assert(sig->numArgs == 2);
+
+                GenTreeDblCon* cnsNode   = nullptr;
+                GenTree*       otherNode = nullptr;
+
+                GenTree* op2 = impStackTop().val;
+                GenTree* op1 = impStackTop(1).val;
+
+                if (op2->IsCnsFltOrDbl())
+                {
+                    cnsNode   = op2->AsDblCon();
+                    otherNode = op1;
+                }
+                else if (op1->IsCnsFltOrDbl())
+                {
+                    cnsNode   = op1->AsDblCon();
+                    otherNode = op2;
+                }
+
+                if (cnsNode == nullptr)
+                {
+                    // no constant node, nothing to do
+                    break;
+                }
+
+                if (otherNode->IsCnsFltOrDbl())
+                {
+                    // both are constant, we can fold this operation completely. Pop both peeked values
+
+                    if (ni == NI_System_Math_Max)
+                    {
+                        cnsNode->gtDconVal =
+                            FloatingPointUtils::maximum(cnsNode->gtDconVal, otherNode->AsDblCon()->gtDconVal);
+                    }
+                    else
+                    {
+                        assert(ni == NI_System_Math_Min);
+                        cnsNode->gtDconVal =
+                            FloatingPointUtils::minimum(cnsNode->gtDconVal, otherNode->AsDblCon()->gtDconVal);
+                    }
+
+                    retNode = cnsNode;
+
+                    impPopStack();
+                    impPopStack();
+                    DEBUG_DESTROY_NODE(otherNode);
+
+                    break;
+                }
+
+                // only one is constant, we can fold in specialized scenarios
+
+                if (cnsNode->IsFloatNaN())
+                {
+                    impSpillSideEffects(false, (unsigned)CHECK_SPILL_ALL DEBUGARG(
+                                                   "spill side effects before propagating NaN"));
+
+                    // maxsd, maxss, minsd, and minss all return op2 if either is NaN
+                    // we require NaN to be propagated so ensure the known NaN is op2
+
+                    impPopStack();
+                    impPopStack();
+                    DEBUG_DESTROY_NODE(otherNode);
+
+                    retNode = cnsNode;
+                    break;
+                }
+
+                if (ni == NI_System_Math_Max)
+                {
+                    // maxsd, maxss return op2 if both inputs are 0 of either sign
+                    // we require +0 to be greater than -0, so we can't handle if
+                    // the known constant is +0. This is because if the unknown value
+                    // is -0, we'd need the cns to be op2. But if the unknown value
+                    // is NaN, we'd need the cns to be op1 instead.
+
+                    if (cnsNode->IsFloatPositiveZero())
+                    {
+                        break;
+                    }
+
+                    // Given the checks, op1 can safely be the cns and op2 the other node
+
+                    ni = (callType == TYP_DOUBLE) ? NI_SSE2_Max : NI_SSE_Max;
+
+                    // one is constant and we know its something we can handle, so pop both peeked values
+
+                    op1 = cnsNode;
+                    op2 = otherNode;
+                }
+                else
+                {
+                    assert(ni == NI_System_Math_Min);
+
+                    // minsd, minss return op2 if both inputs are 0 of either sign
+                    // we require -0 to be lesser than +0, so we can't handle if
+                    // the known constant is -0. This is because if the unknown value
+                    // is +0, we'd need the cns to be op2. But if the unknown value
+                    // is NaN, we'd need the cns to be op1 instead.
+
+                    if (cnsNode->IsFloatNegativeZero())
+                    {
+                        break;
+                    }
+
+                    // Given the checks, op1 can safely be the cns and op2 the other node
+
+                    ni = (callType == TYP_DOUBLE) ? NI_SSE2_Min : NI_SSE_Min;
+
+                    // one is constant and we know its something we can handle, so pop both peeked values
+
+                    op1 = cnsNode;
+                    op2 = otherNode;
+                }
+
+                assert(op1->IsCnsFltOrDbl() && !op2->IsCnsFltOrDbl());
+
+                impPopStack();
+                impPopStack();
+
+                GenTreeVecCon* vecCon = gtNewVconNode(TYP_SIMD16, callJitType);
+
+                if (callJitType == CORINFO_TYPE_FLOAT)
+                {
+                    vecCon->gtSimd16Val.f32[0] = (float)op1->AsDblCon()->gtDconVal;
+                }
+                else
+                {
+                    vecCon->gtSimd16Val.f64[0] = op1->AsDblCon()->gtDconVal;
+                }
+
+                op1 = vecCon;
+                op2 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, NI_Vector128_CreateScalarUnsafe, callJitType, 16);
+
+                retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, ni, callJitType, 16);
+                retNode = gtNewSimdHWIntrinsicNode(callType, retNode, NI_Vector128_ToScalar, callJitType, 16);
+
+                break;
+            }
+#endif
+
             case NI_System_Math_Pow:
             case NI_System_Math_Round:
             case NI_System_Math_Sin:
diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp
index 3364c84a1d859..5666e618325c7 100644
--- a/src/coreclr/jit/utils.cpp
+++ b/src/coreclr/jit/utils.cpp
@@ -2294,6 +2294,22 @@ bool FloatingPointUtils::isNaN(double val)
     return (bits & 0x7FFFFFFFFFFFFFFFULL) > 0x7FF0000000000000ULL;
 }
 
+//------------------------------------------------------------------------
+// isNegativeZero: Determines whether the specified value is negative zero (-0.0)
+//
+// Arguments:
+//    val - value to check for (-0.0)
+//
+// Return Value:
+//    True if val is (-0.0)
+//
+
+bool FloatingPointUtils::isNegativeZero(double val)
+{
+    UINT64 bits = *reinterpret_cast<UINT64*>(&val);
+    return bits == 0x8000000000000000ULL;
+}
+
 //------------------------------------------------------------------------
 // maximum: This matches the IEEE 754:2019 `maximum` function
 //    It propagates NaN inputs back to the caller and
diff --git a/src/coreclr/jit/utils.h b/src/coreclr/jit/utils.h
index 9124a88c6558d..a5c40f44e5d0f 100644
--- a/src/coreclr/jit/utils.h
+++ b/src/coreclr/jit/utils.h
@@ -711,6 +711,8 @@ class FloatingPointUtils
 
     static bool isNaN(double val);
 
+    static bool isNegativeZero(double val);
+
     static double maximum(double val1, double val2);
 
     static float maximum(float val1, float val2);