Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support java behaviour w.r.t fmin/fmax/dmin/dmax #20185

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions runtime/compiler/codegen/J9CodeGenerator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,16 @@ void addMonClass(TR::Node* monNode, TR_OpaqueClassBlock* clazz);
*/
void setSupportsInlineVectorizedHashCode() { _j9Flags.set(SupportsInlineVectorizedHashCode); }

/** \brief
* Determines whether the code generator supports inlining of java_lang_Math_max/min_F/D
*/
bool getSupportsInlineMath_MaxMin_FD() { return _j9Flags.testAny(SupportsInlineMath_MaxMin_FD); }

/** \brief
* The code generator supports inlining of java_lang_Math_max/min_F/D
*/
void setSupportsInlineMath_MaxMin_FD() { _j9Flags.set(SupportsInlineMath_MaxMin_FD); }

/**
* \brief
* The number of nodes between a monext and the next monent before
Expand Down Expand Up @@ -677,6 +687,7 @@ void addMonClass(TR::Node* monNode, TR_OpaqueClassBlock* clazz);
SavesNonVolatileGPRsForGC = 0x00000800,
SupportsInlineVectorizedMismatch = 0x00001000,
SupportsInlineVectorizedHashCode = 0x00002000,
SupportsInlineMath_MaxMin_FD = 0x00002000,
};

flags32_t _j9Flags;
Expand Down
4 changes: 4 additions & 0 deletions runtime/compiler/env/j9method.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4996,6 +4996,10 @@ TR_ResolvedJ9Method::setRecognizedMethodInfo(TR::RecognizedMethod rm)
case TR::java_lang_Math_min_I:
case TR::java_lang_Math_max_L:
case TR::java_lang_Math_min_L:
case TR::java_lang_Math_max_F:
case TR::java_lang_Math_min_F:
case TR::java_lang_Math_max_D:
case TR::java_lang_Math_min_D:
case TR::java_lang_Math_abs_I:
case TR::java_lang_Math_abs_L:
case TR::java_lang_Math_abs_F:
Expand Down
16 changes: 16 additions & 0 deletions runtime/compiler/optimizer/J9RecognizedCallTransformer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1366,6 +1366,10 @@ bool J9::RecognizedCallTransformer::isInlineable(TR::TreeTop* treetop)
case TR::java_lang_Math_min_I:
case TR::java_lang_Math_max_L:
case TR::java_lang_Math_min_L:
case TR::java_lang_Math_max_F:
case TR::java_lang_Math_min_F:
case TR::java_lang_Math_max_D:
case TR::java_lang_Math_min_D:
return !comp()->getOption(TR_DisableMaxMinOptimization);
case TR::java_lang_Math_multiplyHigh:
return cg()->getSupportsLMulHigh();
Expand Down Expand Up @@ -1495,6 +1499,18 @@ void J9::RecognizedCallTransformer::transform(TR::TreeTop* treetop)
case TR::java_lang_Math_min_L:
processIntrinsicFunction(treetop, node, TR::lmin);
break;
case TR::java_lang_Math_max_F:
processIntrinsicFunction(treetop, node, TR::fmax);
break;
case TR::java_lang_Math_min_F:
processIntrinsicFunction(treetop, node, TR::fmin);
break;
case TR::java_lang_Math_max_D:
processIntrinsicFunction(treetop, node, TR::dmax);
break;
case TR::java_lang_Math_min_D:
processIntrinsicFunction(treetop, node, TR::dmin);
break;
case TR::java_lang_Math_multiplyHigh:
processIntrinsicFunction(treetop, node, TR::lmulh);
break;
Expand Down
24 changes: 17 additions & 7 deletions runtime/compiler/z/codegen/J9CodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,12 @@ J9::Z::CodeGenerator::initialize()
cg->setSupportsInlineEncodeASCII();
}

static bool disableInlineMath_MaxMin_FD = feGetEnv("TR_disableInlineMath_MaxMin_FD") != NULL;
if (!disableInlineMath_MaxMin_FD)
{
cg->setSupportsInlineMath_MaxMin_FD();
}

static bool disableInlineVectorizedMismatch = feGetEnv("TR_disableInlineVectorizedMismatch") != NULL;
if (cg->getSupportsArrayCmpLen() &&
#if defined(J9VM_GC_ENABLE_SPARSE_HEAP_ALLOCATION)
Expand Down Expand Up @@ -4073,20 +4079,24 @@ J9::Z::CodeGenerator::inlineDirectCall(
}
}

if (!comp->getOption(TR_DisableSIMDDoubleMaxMin) && cg->getSupportsVectorRegisters())
{
switch (methodSymbol->getRecognizedMethod())
{
if (cg->getSupportsInlineMath_MaxMin_FD()) {
switch (methodSymbol->getRecognizedMethod()) {
case TR::java_lang_Math_max_D:
resultReg = TR::TreeEvaluator::inlineDoubleMax(node, cg);
resultReg = J9::Z::TreeEvaluator::dmaxEvaluator(node, cg);
return true;
case TR::java_lang_Math_min_D:
resultReg = TR::TreeEvaluator::inlineDoubleMin(node, cg);
resultReg = J9::Z::TreeEvaluator::dminEvaluator(node, cg);
return true;
case TR::java_lang_Math_max_F:
resultReg = J9::Z::TreeEvaluator::fmaxEvaluator(node, cg);
return true;
case TR::java_lang_Math_min_F:
resultReg = J9::Z::TreeEvaluator::fminEvaluator(node, cg);
return true;
default:
break;
}
}
}

switch (methodSymbol->getRecognizedMethod())
{
Expand Down
136 changes: 116 additions & 20 deletions runtime/compiler/z/codegen/J9TreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -906,10 +906,13 @@ allocateWriteBarrierInternalPointerRegister(TR::CodeGenerator * cg, TR::Node * s
}


extern TR::Register *
doubleMaxMinHelper(TR::Node *node, TR::CodeGenerator *cg, bool isMaxOp)
static TR::Register*
fpMinMaxVectorHelper(TR::Node* node, TR::CodeGenerator* cg, TR::DataTypes dataType, bool isMaxOp)
{
TR_ASSERT(node->getNumChildren() >= 1 || node->getNumChildren() <= 2, "node has incorrect number of children");
TR_ASSERT(dataType == TR::DataTypes::Double || dataType == TR::DataTypes::Float, "incorrect dataType");

int type = dataType == TR::DataTypes::Double ? 3 : 2; //for type mask

/* ===================== Allocating Registers ===================== */

Expand All @@ -926,17 +929,17 @@ doubleMaxMinHelper(TR::Node *node, TR::CodeGenerator *cg, bool isMaxOp)
TR::Register * v2 = cg->evaluate(node->getSecondChild());

/* ====== WFTCIDB V16,V0,X'F' a == NaN ====== */
generateVRIeInstruction(cg, TR::InstOpCode::VFTCI, node, v16, v0, 0xF, 8, 3);
generateVRIeInstruction(cg, TR::InstOpCode::VFTCI, node, v16, v0, 0xF, 8, type);

/* ====== For Max: WFCHE V17,V0,V2 Compare a >= b ====== */
if(isMaxOp)
{
generateVRRcInstruction(cg, TR::InstOpCode::VFCH, node, v17, v0, v2, 0, 8, 3);
generateVRRcInstruction(cg, TR::InstOpCode::VFCH, node, v17, v0, v2, 0, 8, type);
}
/* ====== For Min: WFCHE V17,V0,V2 Compare a <= b ====== */
else
{
generateVRRcInstruction(cg, TR::InstOpCode::VFCH, node, v17, v2, v0, 0, 8, 3);
generateVRRcInstruction(cg, TR::InstOpCode::VFCH, node, v17, v2, v0, 0, 8, type);
}

/* ====== VO V16,V16,V17 (a >= b) || (a == NaN) ====== */
Expand All @@ -945,15 +948,15 @@ doubleMaxMinHelper(TR::Node *node, TR::CodeGenerator *cg, bool isMaxOp)
/* ====== For Max: WFTCIDB V17,V0,X'800' a == +0 ====== */
if(isMaxOp)
{
generateVRIeInstruction(cg, TR::InstOpCode::VFTCI, node, v17, v0, 0x800, 8, 3);
generateVRIeInstruction(cg, TR::InstOpCode::VFTCI, node, v17, v0, 0x800, 8, type);
}
/* ====== For Min: WFTCIDB V17,V0,X'400' a == -0 ====== */
else
{
generateVRIeInstruction(cg, TR::InstOpCode::VFTCI, node, v17, v0, 0x400, 8, 3);
generateVRIeInstruction(cg, TR::InstOpCode::VFTCI, node, v17, v0, 0x400, 8, type);
}
/* ====== WFTCIDB V18,V2,X'C00' b == 0 ====== */
generateVRIeInstruction(cg, TR::InstOpCode::VFTCI, node, v18, v2, 0xC00, 8, 3);
generateVRIeInstruction(cg, TR::InstOpCode::VFTCI, node, v18, v2, 0xC00, 8, type);

/* ====== VN V17,V17,V18 (a == -0) && (b == 0) ====== */
generateVRRcInstruction(cg, TR::InstOpCode::VN, node, v17, v17, v18, 0, 0, 0);
Expand All @@ -964,6 +967,7 @@ doubleMaxMinHelper(TR::Node *node, TR::CodeGenerator *cg, bool isMaxOp)
/* ====== VSEL V0,V0,V2,V16 ====== */
generateVRReInstruction(cg, TR::InstOpCode::VSEL, node, v0, v0, v2, v16);


/* ===================== Deallocating Registers ===================== */
cg->stopUsingRegister(v2);
cg->stopUsingRegister(v16);
Expand All @@ -978,6 +982,110 @@ doubleMaxMinHelper(TR::Node *node, TR::CodeGenerator *cg, bool isMaxOp)
return node->getRegister();
}

static TR::Register*
fpMinMaxHelper(TR::Node* node, TR::CodeGenerator* cg, TR::InstOpCode::Mnemonic compareRROp, TR::InstOpCode::S390BranchCondition branchCond, TR::InstOpCode::Mnemonic moveRROp)
{
TR::Node* lhsNode = node->getChild(0);
TR::Node* rhsNode = node->getChild(1);

TR::Register* lhsReg = cg->gprClobberEvaluate(lhsNode);
TR::Register* rhsReg = cg->evaluate(rhsNode);

TR::LabelSymbol* cFlowRegionStart = generateLabelSymbol(cg);
TR::LabelSymbol* cFlowRegionEnd = generateLabelSymbol(cg);

TR::LabelSymbol* swap = generateLabelSymbol(cg);
TR::LabelSymbol* equalRegion = generateLabelSymbol(cg);

generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
cFlowRegionStart->setStartInternalControlFlow();

generateRREInstruction(cg, compareRROp, node, lhsReg, rhsReg);
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, branchCond, node, cFlowRegionEnd);
//Check for NaN operands for float and double
//Support float and double +0/-0 comparisons adhering to IEEE 754 standard
//Checking if operands are equal, then branching to equalRegion, otherwise fall through for NaN case handling
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK8, node, equalRegion);

// If first operand is NaN, then we are done, otherwise fallthrough to move second operand as result
generateRXEInstruction(cg, node->getOpCode().isDouble() ? TR::InstOpCode::TCDB : TR::InstOpCode::TCEB, node, lhsReg, generateS390MemoryReference(0x00F, cg),0);
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK4, node, cFlowRegionEnd);
//branch to swap label, since either second operand is NaN, or entire satisfies the alternate condition code
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, swap);

//code for handling +0/-0 comparisons when operands are equal
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, equalRegion);
if (node->getOpCode().isMax())
{
//For Max calls, checking if first operand is +0, then we are done, otherwise fall through for swap
generateRXEInstruction(cg, node->getOpCode().isDouble() ? TR::InstOpCode::TCDB : TR::InstOpCode::TCEB, node, lhsReg, generateS390MemoryReference(0x800, cg), 0); // lhsReg is +0 ?
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK4, node, cFlowRegionEnd); // it is +0
}
else if (node->getOpCode().isMin())
{
//For Min calls, checking if first operand is not +0, then we are done, otherwise fall through for swap
generateRXEInstruction(cg, node->getOpCode().isDouble() ? TR::InstOpCode::TCDB : TR::InstOpCode::TCEB, node, lhsReg, generateS390MemoryReference(0x400, cg), 0); // lhsReg is -0 ?
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK4, node, cFlowRegionEnd);
}

generateS390LabelInstruction(cg, TR::InstOpCode::label, node, swap);
//Move resulting operand to lhsReg as fallthrough for alternate Condition Code
generateRREInstruction(cg, moveRROp, node, lhsReg, rhsReg);

TR::RegisterDependencyConditions* deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2, cg);
deps->addPostConditionIfNotAlreadyInserted(lhsReg, TR::RealRegister::AssignAny);
deps->addPostConditionIfNotAlreadyInserted(rhsReg, TR::RealRegister::AssignAny);

generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, deps);
cFlowRegionEnd->setEndInternalControlFlow();

node->setRegister(lhsReg);
cg->decReferenceCount(lhsNode);
cg->decReferenceCount(rhsNode);

return lhsReg;
}

TR::Register*
J9::Z::TreeEvaluator::fminEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
if (cg->getSupportsVectorRegisters())
{
return fpMinMaxVectorHelper(node, cg, TR::DataTypes::Float, false);
}
return fpMinMaxHelper(node, cg, TR::InstOpCode::CEBR, TR::InstOpCode::COND_MASK4, TR::InstOpCode::LER);
}

TR::Register*
J9::Z::TreeEvaluator::dminEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
if (cg->getSupportsVectorRegisters())
{
return fpMinMaxVectorHelper(node, cg, TR::DataTypes::Double, false);
}
return fpMinMaxHelper(node, cg, TR::InstOpCode::CDBR, TR::InstOpCode::COND_MASK4, TR::InstOpCode::LDR);
}

TR::Register*
J9::Z::TreeEvaluator::fmaxEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
if (cg->getSupportsVectorRegisters())
{
return fpMinMaxVectorHelper(node, cg, TR::DataTypes::Float, true);
}
return fpMinMaxHelper(node, cg, TR::InstOpCode::CEBR, TR::InstOpCode::COND_MASK2, TR::InstOpCode::LER);
}

TR::Register*
J9::Z::TreeEvaluator::dmaxEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
if (cg->getSupportsVectorRegisters())
{
return fpMinMaxVectorHelper(node, cg, TR::DataTypes::Double, true);
}
return fpMinMaxHelper(node, cg, TR::InstOpCode::CDBR, TR::InstOpCode::COND_MASK2, TR::InstOpCode::LDR);
}

TR::Register*
J9::Z::TreeEvaluator::inlineVectorizedStringIndexOf(TR::Node* node, TR::CodeGenerator* cg, bool isUTF16)
{
Expand Down Expand Up @@ -2945,19 +3053,7 @@ J9::Z::TreeEvaluator::toLowerIntrinsic(TR::Node *node, TR::CodeGenerator *cg, bo
return caseConversionHelper(node, cg, false, isCompressedString);
}

TR::Register*
J9::Z::TreeEvaluator::inlineDoubleMax(TR::Node *node, TR::CodeGenerator *cg)
{
cg->generateDebugCounter("z13/simd/doubleMax", 1, TR::DebugCounter::Free);
return doubleMaxMinHelper(node, cg, true);
}

TR::Register*
J9::Z::TreeEvaluator::inlineDoubleMin(TR::Node *node, TR::CodeGenerator *cg)
{
cg->generateDebugCounter("z13/simd/doubleMin", 1, TR::DebugCounter::Free);
return doubleMaxMinHelper(node, cg, false);
}

TR::Register *
J9::Z::TreeEvaluator::inlineMathFma(TR::Node *node, TR::CodeGenerator *cg)
Expand Down
6 changes: 4 additions & 2 deletions runtime/compiler/z/codegen/J9TreeEvaluator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,10 @@ class OMR_EXTENSIBLE TreeEvaluator: public J9::TreeEvaluator
*/
static TR::Register *inlineVectorizedStringIndexOf(TR::Node *node, TR::CodeGenerator *cg, bool isCompressed);
static TR::Register *inlineIntrinsicIndexOf(TR::Node *node, TR::CodeGenerator *cg, bool isLatin1);
static TR::Register *inlineDoubleMax(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *inlineDoubleMin(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *fminEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *dminEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *fmaxEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *dmaxEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *inlineMathFma(TR::Node *node, TR::CodeGenerator *cg);

/* This Evaluator generates the SIMD routine for methods
Expand Down
Loading