From 5877e8b713742b6d80bd1aa9819094be029e3e1f Mon Sep 17 00:00:00 2001 From: Sychev Vadim Date: Thu, 23 Jun 2022 19:36:47 +0300 Subject: [PATCH] Optimization. Use Min/Max intrinsics if one of arguments is constant. (#69434) * Add xarch optimization for min/max (#65625) * Changes according to the requirements (#65625) * Draft for Math.Max/Math.Min optimization (#65625) * Draft for optimizing Math.Max/Math.Min with a const (#65625) * Fix tests (#65625) * Refactoring of the conditions (#65625) * Fix of the summary (#65625) * Refactoring due to the PR comments (#65625) * Add spilling side effect + Fix of formats (#65625) * Update src/coreclr/jit/importer.cpp Co-authored-by: Jakob Botsch Nielsen * Update src/coreclr/jit/importer.cpp Co-authored-by: Jakob Botsch Nielsen Co-authored-by: Jakob Botsch Nielsen --- src/coreclr/jit/gentree.h | 38 +++++++++ src/coreclr/jit/importer.cpp | 153 ++++++++++++++++++++++++++++++++++- src/coreclr/jit/utils.cpp | 16 ++++ src/coreclr/jit/utils.h | 2 + 4 files changed, 208 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index de7cf8bdf4950..52e18a84d8513 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -1770,7 +1770,9 @@ struct GenTree #endif // DEBUG inline bool IsIntegralConst(ssize_t constVal) const; + inline bool IsFloatNaN() const; inline bool IsFloatPositiveZero() const; + inline bool IsFloatNegativeZero() const; inline bool IsVectorZero() const; inline bool IsVectorAllBitsSet() const; inline bool IsVectorConst(); @@ -8326,6 +8328,42 @@ inline bool GenTree::IsIntegralConst(ssize_t constVal) const return false; } +//------------------------------------------------------------------- +// IsFloatNaN: returns true if this is exactly a const float value of NaN +// +// Returns: +// True if this represents a const floating-point value of NaN. +// Will return false otherwise. +// +inline bool GenTree::IsFloatNaN() const +{ + if (IsCnsFltOrDbl()) + { + double constValue = AsDblCon()->gtDconVal; + return FloatingPointUtils::isNaN(constValue); + } + + return false; +} + +//------------------------------------------------------------------- +// IsFloatNegativeZero: returns true if this is exactly a const float value of negative zero (-0.0) +// +// Returns: +// True if this represents a const floating-point value of exactly negative zero (-0.0). +// Will return false if the value is negative zero (+0.0). +// +inline bool GenTree::IsFloatNegativeZero() const +{ + if (IsCnsFltOrDbl()) + { + double constValue = AsDblCon()->gtDconVal; + return FloatingPointUtils::isNegativeZero(constValue); + } + + return false; +} + //------------------------------------------------------------------- // IsFloatPositiveZero: returns true if this is exactly a const float value of postive zero (+0.0) // diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 0bbd79223045d..ddb9184a7dfee 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -4461,13 +4461,164 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, case NI_System_Math_Log: case NI_System_Math_Log2: case NI_System_Math_Log10: -#ifdef TARGET_ARM64 + { + retNode = impMathIntrinsic(method, sig, callType, ni, tailCall); + break; + } +#if defined(TARGET_ARM64) // ARM64 has fmax/fmin which are IEEE754:2019 minimum/maximum compatible // TODO-XARCH-CQ: Enable this for XARCH when one of the arguments is a constant // so we can then emit maxss/minss and avoid NaN/-0.0 handling case NI_System_Math_Max: case NI_System_Math_Min: #endif + +#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) + case NI_System_Math_Max: + case NI_System_Math_Min: + { + assert(varTypeIsFloating(callType)); + assert(sig->numArgs == 2); + + GenTreeDblCon* cnsNode = nullptr; + GenTree* otherNode = nullptr; + + GenTree* op2 = impStackTop().val; + GenTree* op1 = impStackTop(1).val; + + if (op2->IsCnsFltOrDbl()) + { + cnsNode = op2->AsDblCon(); + otherNode = op1; + } + else if (op1->IsCnsFltOrDbl()) + { + cnsNode = op1->AsDblCon(); + otherNode = op2; + } + + if (cnsNode == nullptr) + { + // no constant node, nothing to do + break; + } + + if (otherNode->IsCnsFltOrDbl()) + { + // both are constant, we can fold this operation completely. Pop both peeked values + + if (ni == NI_System_Math_Max) + { + cnsNode->gtDconVal = + FloatingPointUtils::maximum(cnsNode->gtDconVal, otherNode->AsDblCon()->gtDconVal); + } + else + { + assert(ni == NI_System_Math_Min); + cnsNode->gtDconVal = + FloatingPointUtils::minimum(cnsNode->gtDconVal, otherNode->AsDblCon()->gtDconVal); + } + + retNode = cnsNode; + + impPopStack(); + impPopStack(); + DEBUG_DESTROY_NODE(otherNode); + + break; + } + + // only one is constant, we can fold in specialized scenarios + + if (cnsNode->IsFloatNaN()) + { + impSpillSideEffects(false, (unsigned)CHECK_SPILL_ALL DEBUGARG( + "spill side effects before propagating NaN")); + + // maxsd, maxss, minsd, and minss all return op2 if either is NaN + // we require NaN to be propagated so ensure the known NaN is op2 + + impPopStack(); + impPopStack(); + DEBUG_DESTROY_NODE(otherNode); + + retNode = cnsNode; + break; + } + + if (ni == NI_System_Math_Max) + { + // maxsd, maxss return op2 if both inputs are 0 of either sign + // we require +0 to be greater than -0, so we can't handle if + // the known constant is +0. This is because if the unknown value + // is -0, we'd need the cns to be op2. But if the unknown value + // is NaN, we'd need the cns to be op1 instead. + + if (cnsNode->IsFloatPositiveZero()) + { + break; + } + + // Given the checks, op1 can safely be the cns and op2 the other node + + ni = (callType == TYP_DOUBLE) ? NI_SSE2_Max : NI_SSE_Max; + + // one is constant and we know its something we can handle, so pop both peeked values + + op1 = cnsNode; + op2 = otherNode; + } + else + { + assert(ni == NI_System_Math_Min); + + // minsd, minss return op2 if both inputs are 0 of either sign + // we require -0 to be lesser than +0, so we can't handle if + // the known constant is -0. This is because if the unknown value + // is +0, we'd need the cns to be op2. But if the unknown value + // is NaN, we'd need the cns to be op1 instead. + + if (cnsNode->IsFloatNegativeZero()) + { + break; + } + + // Given the checks, op1 can safely be the cns and op2 the other node + + ni = (callType == TYP_DOUBLE) ? NI_SSE2_Min : NI_SSE_Min; + + // one is constant and we know its something we can handle, so pop both peeked values + + op1 = cnsNode; + op2 = otherNode; + } + + assert(op1->IsCnsFltOrDbl() && !op2->IsCnsFltOrDbl()); + + impPopStack(); + impPopStack(); + + GenTreeVecCon* vecCon = gtNewVconNode(TYP_SIMD16, callJitType); + + if (callJitType == CORINFO_TYPE_FLOAT) + { + vecCon->gtSimd16Val.f32[0] = (float)op1->AsDblCon()->gtDconVal; + } + else + { + vecCon->gtSimd16Val.f64[0] = op1->AsDblCon()->gtDconVal; + } + + op1 = vecCon; + op2 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, NI_Vector128_CreateScalarUnsafe, callJitType, 16); + + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, ni, callJitType, 16); + retNode = gtNewSimdHWIntrinsicNode(callType, retNode, NI_Vector128_ToScalar, callJitType, 16); + + break; + } +#endif + case NI_System_Math_Pow: case NI_System_Math_Round: case NI_System_Math_Sin: diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index 3364c84a1d859..5666e618325c7 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -2294,6 +2294,22 @@ bool FloatingPointUtils::isNaN(double val) return (bits & 0x7FFFFFFFFFFFFFFFULL) > 0x7FF0000000000000ULL; } +//------------------------------------------------------------------------ +// isNegativeZero: Determines whether the specified value is negative zero (-0.0) +// +// Arguments: +// val - value to check for (-0.0) +// +// Return Value: +// True if val is (-0.0) +// + +bool FloatingPointUtils::isNegativeZero(double val) +{ + UINT64 bits = *reinterpret_cast(&val); + return bits == 0x8000000000000000ULL; +} + //------------------------------------------------------------------------ // maximum: This matches the IEEE 754:2019 `maximum` function // It propagates NaN inputs back to the caller and diff --git a/src/coreclr/jit/utils.h b/src/coreclr/jit/utils.h index 9124a88c6558d..a5c40f44e5d0f 100644 --- a/src/coreclr/jit/utils.h +++ b/src/coreclr/jit/utils.h @@ -711,6 +711,8 @@ class FloatingPointUtils static bool isNaN(double val); + static bool isNegativeZero(double val); + static double maximum(double val1, double val2); static float maximum(float val1, float val2);