Skip to content

Commit

Permalink
Optimization. Use Min/Max intrinsics if one of arguments is constant. (
Browse files Browse the repository at this point in the history
…#69434)

* Add xarch optimization for min/max (#65625)

* Changes according to the requirements (#65625)

* Draft for Math.Max/Math.Min optimization (#65625)

* Draft for optimizing Math.Max/Math.Min with a const (#65625)

* Fix tests (#65625)

* Refactoring of the conditions (#65625)

* Fix of the summary (#65625)

* Refactoring due to the PR comments (#65625)

* Add spilling side effect + Fix of formats (#65625)

* Update src/coreclr/jit/importer.cpp

Co-authored-by: Jakob Botsch Nielsen <Jakob.botsch.nielsen@gmail.com>

* Update src/coreclr/jit/importer.cpp

Co-authored-by: Jakob Botsch Nielsen <Jakob.botsch.nielsen@gmail.com>

Co-authored-by: Jakob Botsch Nielsen <Jakob.botsch.nielsen@gmail.com>
  • Loading branch information
SkiFoD and jakobbotsch committed Jun 23, 2022
1 parent fe1ecae commit 5877e8b
Show file tree
Hide file tree
Showing 4 changed files with 208 additions and 1 deletion.
38 changes: 38 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1770,7 +1770,9 @@ struct GenTree
#endif // DEBUG

inline bool IsIntegralConst(ssize_t constVal) const;
inline bool IsFloatNaN() const;
inline bool IsFloatPositiveZero() const;
inline bool IsFloatNegativeZero() const;
inline bool IsVectorZero() const;
inline bool IsVectorAllBitsSet() const;
inline bool IsVectorConst();
Expand Down Expand Up @@ -8326,6 +8328,42 @@ inline bool GenTree::IsIntegralConst(ssize_t constVal) const
return false;
}

//-------------------------------------------------------------------
// IsFloatNaN: returns true if this is exactly a const float value of NaN
//
// Returns:
// True if this represents a const floating-point value of NaN.
// Will return false otherwise.
//
inline bool GenTree::IsFloatNaN() const
{
if (IsCnsFltOrDbl())
{
double constValue = AsDblCon()->gtDconVal;
return FloatingPointUtils::isNaN(constValue);
}

return false;
}

//-------------------------------------------------------------------
// IsFloatNegativeZero: returns true if this is exactly a const float value of negative zero (-0.0)
//
// Returns:
// True if this represents a const floating-point value of exactly negative zero (-0.0).
// Will return false if the value is negative zero (+0.0).
//
inline bool GenTree::IsFloatNegativeZero() const
{
if (IsCnsFltOrDbl())
{
double constValue = AsDblCon()->gtDconVal;
return FloatingPointUtils::isNegativeZero(constValue);
}

return false;
}

//-------------------------------------------------------------------
// IsFloatPositiveZero: returns true if this is exactly a const float value of postive zero (+0.0)
//
Expand Down
153 changes: 152 additions & 1 deletion src/coreclr/jit/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4461,13 +4461,164 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,
case NI_System_Math_Log:
case NI_System_Math_Log2:
case NI_System_Math_Log10:
#ifdef TARGET_ARM64
{
retNode = impMathIntrinsic(method, sig, callType, ni, tailCall);
break;
}
#if defined(TARGET_ARM64)
// ARM64 has fmax/fmin which are IEEE754:2019 minimum/maximum compatible
// TODO-XARCH-CQ: Enable this for XARCH when one of the arguments is a constant
// so we can then emit maxss/minss and avoid NaN/-0.0 handling
case NI_System_Math_Max:
case NI_System_Math_Min:
#endif

#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH)
case NI_System_Math_Max:
case NI_System_Math_Min:
{
assert(varTypeIsFloating(callType));
assert(sig->numArgs == 2);

GenTreeDblCon* cnsNode = nullptr;
GenTree* otherNode = nullptr;

GenTree* op2 = impStackTop().val;
GenTree* op1 = impStackTop(1).val;

if (op2->IsCnsFltOrDbl())
{
cnsNode = op2->AsDblCon();
otherNode = op1;
}
else if (op1->IsCnsFltOrDbl())
{
cnsNode = op1->AsDblCon();
otherNode = op2;
}

if (cnsNode == nullptr)
{
// no constant node, nothing to do
break;
}

if (otherNode->IsCnsFltOrDbl())
{
// both are constant, we can fold this operation completely. Pop both peeked values

if (ni == NI_System_Math_Max)
{
cnsNode->gtDconVal =
FloatingPointUtils::maximum(cnsNode->gtDconVal, otherNode->AsDblCon()->gtDconVal);
}
else
{
assert(ni == NI_System_Math_Min);
cnsNode->gtDconVal =
FloatingPointUtils::minimum(cnsNode->gtDconVal, otherNode->AsDblCon()->gtDconVal);
}

retNode = cnsNode;

impPopStack();
impPopStack();
DEBUG_DESTROY_NODE(otherNode);

break;
}

// only one is constant, we can fold in specialized scenarios

if (cnsNode->IsFloatNaN())
{
impSpillSideEffects(false, (unsigned)CHECK_SPILL_ALL DEBUGARG(
"spill side effects before propagating NaN"));

// maxsd, maxss, minsd, and minss all return op2 if either is NaN
// we require NaN to be propagated so ensure the known NaN is op2

impPopStack();
impPopStack();
DEBUG_DESTROY_NODE(otherNode);

retNode = cnsNode;
break;
}

if (ni == NI_System_Math_Max)
{
// maxsd, maxss return op2 if both inputs are 0 of either sign
// we require +0 to be greater than -0, so we can't handle if
// the known constant is +0. This is because if the unknown value
// is -0, we'd need the cns to be op2. But if the unknown value
// is NaN, we'd need the cns to be op1 instead.

if (cnsNode->IsFloatPositiveZero())
{
break;
}

// Given the checks, op1 can safely be the cns and op2 the other node

ni = (callType == TYP_DOUBLE) ? NI_SSE2_Max : NI_SSE_Max;

// one is constant and we know its something we can handle, so pop both peeked values

op1 = cnsNode;
op2 = otherNode;
}
else
{
assert(ni == NI_System_Math_Min);

// minsd, minss return op2 if both inputs are 0 of either sign
// we require -0 to be lesser than +0, so we can't handle if
// the known constant is -0. This is because if the unknown value
// is +0, we'd need the cns to be op2. But if the unknown value
// is NaN, we'd need the cns to be op1 instead.

if (cnsNode->IsFloatNegativeZero())
{
break;
}

// Given the checks, op1 can safely be the cns and op2 the other node

ni = (callType == TYP_DOUBLE) ? NI_SSE2_Min : NI_SSE_Min;

// one is constant and we know its something we can handle, so pop both peeked values

op1 = cnsNode;
op2 = otherNode;
}

assert(op1->IsCnsFltOrDbl() && !op2->IsCnsFltOrDbl());

impPopStack();
impPopStack();

GenTreeVecCon* vecCon = gtNewVconNode(TYP_SIMD16, callJitType);

if (callJitType == CORINFO_TYPE_FLOAT)
{
vecCon->gtSimd16Val.f32[0] = (float)op1->AsDblCon()->gtDconVal;
}
else
{
vecCon->gtSimd16Val.f64[0] = op1->AsDblCon()->gtDconVal;
}

op1 = vecCon;
op2 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, NI_Vector128_CreateScalarUnsafe, callJitType, 16);

retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, ni, callJitType, 16);
retNode = gtNewSimdHWIntrinsicNode(callType, retNode, NI_Vector128_ToScalar, callJitType, 16);

break;
}
#endif

case NI_System_Math_Pow:
case NI_System_Math_Round:
case NI_System_Math_Sin:
Expand Down
16 changes: 16 additions & 0 deletions src/coreclr/jit/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2294,6 +2294,22 @@ bool FloatingPointUtils::isNaN(double val)
return (bits & 0x7FFFFFFFFFFFFFFFULL) > 0x7FF0000000000000ULL;
}

//------------------------------------------------------------------------
// isNegativeZero: Determines whether the specified value is negative zero (-0.0)
//
// Arguments:
// val - value to check for (-0.0)
//
// Return Value:
// True if val is (-0.0)
//

bool FloatingPointUtils::isNegativeZero(double val)
{
UINT64 bits = *reinterpret_cast<UINT64*>(&val);
return bits == 0x8000000000000000ULL;
}

//------------------------------------------------------------------------
// maximum: This matches the IEEE 754:2019 `maximum` function
// It propagates NaN inputs back to the caller and
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,8 @@ class FloatingPointUtils

static bool isNaN(double val);

static bool isNegativeZero(double val);

static double maximum(double val1, double val2);

static float maximum(float val1, float val2);
Expand Down

0 comments on commit 5877e8b

Please sign in to comment.