Skip to content

Commit

Permalink
ARM64 - Optimizing a % b operations (dotnet#65535)
Browse files Browse the repository at this point in the history
* Initial work for ARM64 mod optimization

* Updated comment

* Updated comment

* Updated comment

* Fixing build

* Remove uneeded var

* Use '%' morph logic for both x64/arm64

* Adding back in divisor check for x64

* Formatting

* Update comments

* Update comments

* Fixing

* Updated comment

* Updated comment

* Tweaking x64 transformation logic for the mod opt

* Tweaking x64 transformation logic for the mod opt

* Using IntCon

* Fixed build

* Minor tweak

* Fixing x64 diffs

* Removing flag set

* Feedback

* Fixing build

* Feedback

* Fixing tests

* Fixing tests

* Fixing tests

* Formatting

* Fixing tests

* Feedback

* Fixing build
  • Loading branch information
TIHan authored and radekdoulik committed Mar 30, 2022
1 parent 4d7fd2e commit 73b1897
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 41 deletions.
1 change: 1 addition & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -6489,6 +6489,7 @@ class Compiler
GenTree* fgPropagateCommaThrow(GenTree* parent, GenTreeOp* commaThrow, GenTreeFlags precedingSideEffects);
GenTree* fgMorphRetInd(GenTreeUnOp* tree);
GenTree* fgMorphModToSubMulDiv(GenTreeOp* tree);
GenTree* fgMorphUModToAndSub(GenTreeOp* tree);
GenTree* fgMorphSmpOpOptional(GenTreeOp* tree);
GenTree* fgMorphMultiOp(GenTreeMultiOp* multiOp);
GenTree* fgMorphConst(GenTree* tree);
Expand Down
47 changes: 47 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -2110,6 +2110,10 @@ struct GenTree

inline bool IsIntegralConst() const;

inline bool IsIntegralConstUnsignedPow2() const;

inline bool IsIntegralConstAbsPow2() const;

inline bool IsIntCnsFitsInI32(); // Constant fits in INT32

inline bool IsCnsFltOrDbl() const;
Expand Down Expand Up @@ -8343,6 +8347,49 @@ inline bool GenTree::IsIntegralConst() const
#endif // !TARGET_64BIT
}

//-------------------------------------------------------------------------
// IsIntegralConstUnsignedPow2: Determines whether the unsigned value of
// an integral constant is the power of 2.
//
// Return Value:
// Returns true if the unsigned value of a GenTree's integral constant
// is the power of 2.
//
// Notes:
// Integral constant nodes store its value in signed form.
// This should handle cases where an unsigned-int was logically used in
// user code.
//
inline bool GenTree::IsIntegralConstUnsignedPow2() const
{
if (IsIntegralConst())
{
return isPow2((UINT64)AsIntConCommon()->IntegralValue());
}

return false;
}

//-------------------------------------------------------------------------
// IsIntegralConstAbsPow2: Determines whether the absolute value of
// an integral constant is the power of 2.
//
// Return Value:
// Returns true if the absolute value of a GenTree's integral constant
// is the power of 2.
//
inline bool GenTree::IsIntegralConstAbsPow2() const
{
if (IsIntegralConst())
{
INT64 svalue = AsIntConCommon()->IntegralValue();
size_t value = (svalue == SSIZE_T_MIN) ? static_cast<size_t>(svalue) : static_cast<size_t>(abs(svalue));
return isPow2(value);
}

return false;
}

// Is this node an integer constant that fits in a 32-bit signed integer (INT32)
inline bool GenTree::IsIntCnsFitsInI32()
{
Expand Down
93 changes: 52 additions & 41 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11442,58 +11442,34 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
#endif
#endif // !TARGET_64BIT

#ifdef TARGET_ARM64
// For ARM64 we don't have a remainder instruction,
// The architecture manual suggests the following transformation to
// generate code for such operator:
//
// a % b = a - (a / b) * b;
//
// TODO: there are special cases where it can be done better, for example
// when the modulo operation is unsigned and the divisor is a
// integer constant power of two. In this case, we can make the transform:
//
// a % b = a & (b - 1);
//
// Lower supports it for all cases except when `a` is constant, but
// in Morph we can't guarantee that `a` won't be transformed into a constant,
// so can't guarantee that lower will be able to do this optimization.
if (!optValnumCSE_phase)
{
// Do "a % b = a - (a / b) * b" morph always, see TODO before this block.
bool doMorphModToSubMulDiv = true;

if (doMorphModToSubMulDiv)
#ifdef TARGET_ARM64
if (tree->OperIs(GT_UMOD) && op2->IsIntegralConstUnsignedPow2())
{
assert(!optValnumCSE_phase);

tree = fgMorphModToSubMulDiv(tree->AsOp());
// Transformation: a % b = a & (b - 1);
tree = fgMorphUModToAndSub(tree->AsOp());
op1 = tree->AsOp()->gtOp1;
op2 = tree->AsOp()->gtOp2;
}
}
#else // !TARGET_ARM64
// If b is not a power of 2 constant then lowering replaces a % b
// with a - (a / b) * b and applies magic division optimization to
// a / b. The code may already contain an a / b expression (e.g.
// x = a / 10; y = a % 10;) and then we end up with redundant code.
// If we convert % to / here we give CSE the opportunity to eliminate
// the redundant division. If there's no redundant division then
// nothing is lost, lowering would have done this transform anyway.

if (!optValnumCSE_phase && ((tree->OperGet() == GT_MOD) && op2->IsIntegralConst()))
{
ssize_t divisorValue = op2->AsIntCon()->IconValue();
size_t absDivisorValue = (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue)
: static_cast<size_t>(abs(divisorValue));

if (!isPow2(absDivisorValue))
// ARM64 architecture manual suggests this transformation
// for the mod operator.
else
#else
// XARCH only applies this transformation if we know
// that magic division will be used - which is determined
// when 'b' is not a power of 2 constant and mod operator is signed.
// Lowering for XARCH does this optimization already,
// but is also done here to take advantage of CSE.
if (tree->OperIs(GT_MOD) && op2->IsIntegralConst() && !op2->IsIntegralConstAbsPow2())
#endif
{
// Transformation: a % b = a - (a / b) * b;
tree = fgMorphModToSubMulDiv(tree->AsOp());
op1 = tree->AsOp()->gtOp1;
op2 = tree->AsOp()->gtOp2;
}
}
#endif // !TARGET_ARM64
break;

USE_HELPER_FOR_ARITH:
Expand Down Expand Up @@ -14784,6 +14760,41 @@ GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree)
return sub;
}

//------------------------------------------------------------------------
// fgMorphUModToAndSub: Transform a % b into the equivalent a & (b - 1).
// '%' must be unsigned (GT_UMOD).
// 'a' and 'b' must be integers.
// 'b' must be a constant and a power of two.
//
// Arguments:
// tree - The GT_UMOD tree to morph
//
// Returns:
// The morphed tree
//
// Notes:
// This is more optimized than calling fgMorphModToSubMulDiv.
//
GenTree* Compiler::fgMorphUModToAndSub(GenTreeOp* tree)
{
JITDUMP("\nMorphing UMOD [%06u] to And/Sub\n", dspTreeID(tree));

assert(tree->OperIs(GT_UMOD));
assert(tree->gtOp2->IsIntegralConstUnsignedPow2());

const var_types type = tree->TypeGet();

const size_t cnsValue = (static_cast<size_t>(tree->gtOp2->AsIntConCommon()->IntegralValue())) - 1;
GenTree* const newTree = gtNewOperNode(GT_AND, type, tree->gtOp1, gtNewIconNode(cnsValue, type));

INDEBUG(newTree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);

DEBUG_DESTROY_NODE(tree->gtOp2);
DEBUG_DESTROY_NODE(tree);

return newTree;
}

//------------------------------------------------------------------------------
// fgOperIsBitwiseRotationRoot : Check if the operation can be a root of a bitwise rotation tree.
//
Expand Down

0 comments on commit 73b1897

Please sign in to comment.