Skip to content

Commit

Permalink
Cleanup some handling around Avx10v1
Browse files Browse the repository at this point in the history
  • Loading branch information
tannergooding committed Jun 10, 2024
1 parent c87d73c commit 7ef9a0d
Show file tree
Hide file tree
Showing 10 changed files with 47 additions and 561 deletions.
10 changes: 0 additions & 10 deletions src/coreclr/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -1185,18 +1185,8 @@ HARDWARE_INTRINSIC(AVX10v1, GetMantissaScalar,
HARDWARE_INTRINSIC(AVX10v1, LeadingZeroCount, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vplzcntd, INS_vplzcntd, INS_vplzcntq, INS_vplzcntq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1, Max, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaxsq, INS_vpmaxuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX10v1, Min, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpminsq, INS_vpminuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_Commutative)
HARDWARE_INTRINSIC(AVX10v1, MultiplyAdd, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ps, INS_vfmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1, MultiplyAddNegated, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ps, INS_vfnmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1, MultiplyAddNegatedScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ss, INS_vfnmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(AVX10v1, MultiplyAddScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ss, INS_vfmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(AVX10v1, MultiplyAddSubtract, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmaddsub213ps, INS_vfmaddsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1, MultiplyLow, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1, MultiplyScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_mulsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible)
HARDWARE_INTRINSIC(AVX10v1, MultiplySubtract, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ps, INS_vfmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1, MultiplySubtractAdd, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsubadd213ps, INS_vfmsubadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1, MultiplySubtractNegated, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ps, INS_vfnmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1, MultiplySubtractNegatedScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ss, INS_vfnmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(AVX10v1, MultiplySubtractScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ss, INS_vfmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(AVX10v1, PermuteVar16x16, 32, 2, false, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1, PermuteVar16x16x2, 32, 3, false, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible)
HARDWARE_INTRINSIC(AVX10v1, PermuteVar16x8, 16, 2, false, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible)
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -970,8 +970,8 @@ GenTree* Compiler::impNonConstFallback(NamedIntrinsic intrinsic, var_types simdT
static_assert_no_msg(NI_AVX512F_RotateLeftVariable == (NI_AVX512F_RotateLeft + 1));
static_assert_no_msg(NI_AVX512F_RotateRightVariable == (NI_AVX512F_RotateRight + 1));
static_assert_no_msg(NI_AVX512F_VL_RotateLeftVariable == (NI_AVX512F_VL_RotateLeft + 1));
static_assert_no_msg(NI_AVX10v1_RotateLeftVariable == (NI_AVX10v1_RotateLeft + 1));
static_assert_no_msg(NI_AVX512F_VL_RotateRightVariable == (NI_AVX512F_VL_RotateRight + 1));
static_assert_no_msg(NI_AVX10v1_RotateLeftVariable == (NI_AVX10v1_RotateLeft + 1));
static_assert_no_msg(NI_AVX10v1_RotateRightVariable == (NI_AVX10v1_RotateRight + 1));

impSpillSideEffect(true,
Expand Down
20 changes: 10 additions & 10 deletions src/coreclr/jit/importercalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4012,11 +4012,8 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,
op2 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op2, callJitType, 16);
op1 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op1, callJitType, 16);

retNode = compOpportunisticallyDependsOn(InstructionSet_AVX10v1)
? gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, NI_AVX10v1_MultiplyAddScalar,
callJitType, 16)
: gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, NI_FMA_MultiplyAddScalar,
callJitType, 16);
retNode =
gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, NI_FMA_MultiplyAddScalar, callJitType, 16);

retNode = gtNewSimdToScalarNode(callType, retNode, callJitType, 16);
break;
Expand Down Expand Up @@ -9298,8 +9295,9 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method,
#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH)
if (!isMagnitude && compOpportunisticallyDependsOn(InstructionSet_SSE2))
{
bool needsFixup = false;
bool canHandle = false;
bool needsFixup = false;
bool canHandle = false;
bool isV512Supported = false;

if (isMax)
{
Expand Down Expand Up @@ -9328,7 +9326,7 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method,
needsFixup = cnsNode->IsFloatPositiveZero();
}

if (!needsFixup || compOpportunisticallyDependsOn(InstructionSet_AVX512F))
if (!needsFixup || compIsEvexOpportunisticallySupported(isV512Supported))
{
// Given the checks, op1 can safely be the cns and op2 the other node

Expand Down Expand Up @@ -9369,7 +9367,7 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method,
needsFixup = cnsNode->IsFloatNegativeZero();
}

if (!needsFixup || compOpportunisticallyDependsOn(InstructionSet_AVX512F))
if (!needsFixup || compIsEvexOpportunisticallySupported(isV512Supported))
{
// Given the checks, op1 can safely be the cns and op2 the other node

Expand Down Expand Up @@ -9453,8 +9451,10 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method,
tbl->gtSimdVal.i32[0] = 0x0700;
}

NamedIntrinsic fixupScalarId = isV512Supported ? NI_AVX512F_FixupScalar : NI_AVX10v1_FixupScalar;

retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, retNode, op2Clone, tbl, gtNewIconNode(0),
NI_AVX512F_FixupScalar, callJitType, 16);
fixupScalarId, callJitType, 16);
}

if (isNumber)
Expand Down
53 changes: 19 additions & 34 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1303,30 +1303,29 @@ void Lowering::LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIn
}

//----------------------------------------------------------------------------------------------
// LowerFusedMultiplyAdd: Changes NI_FMA_MultiplyAddScalar / NI_AVX10v1_MultiplyAddScalar produced
// LowerFusedMultiplyAdd: Changes NI_FMA_MultiplyAddScalar produced
// by Math(F).FusedMultiplyAdd to a better FMA intrinsics if there are GT_NEG around in order
// to eliminate them.
//
// Arguments:
// node - The hardware intrinsic node
//
// Notes:
// Math(F).FusedMultiplyAdd is expanded into NI_FMA_MultiplyAddScalar / NI_AVX10v1_MultiplyAddScalar and
// Math(F).FusedMultiplyAdd is expanded into NI_FMA_MultiplyAddScalar and
// depending on additional GT_NEG nodes around it can be:
//
// x * y + z -> NI_FMA_MultiplyAddScalar / NI_AVX10v1_MultiplyAddScalar
// x * -y + z -> NI_FMA_MultiplyAddNegatedScalar / NI_AVX10v1_MultiplyAddNegatedScalar
// -x * y + z -> NI_FMA_MultiplyAddNegatedScalar / NI_AVX10v1_MultiplyAddNegatedScalar
// -x * -y + z -> NI_FMA_MultiplyAddScalar / NI_AVX10v1_MultiplyAddScalar
// x * y - z -> NI_FMA_MultiplySubtractScalar / NI_AVX10v1_MultiplySubtractScalar
// x * -y - z -> NI_FMA_MultiplySubtractNegatedScalar / NI_AVX10v1_MultiplySubtractNegatedScalar
// -x * y - z -> NI_FMA_MultiplySubtractNegatedScalar / NI_AVX10v1_MultiplySubtractNegatedScalar
// -x * -y - z -> NI_FMA_MultiplySubtractScalar / NI_AVX10v1_MultiplySubtractScalar
// x * y + z -> NI_FMA_MultiplyAddScalar
// x * -y + z -> NI_FMA_MultiplyAddNegatedScalar
// -x * y + z -> NI_FMA_MultiplyAddNegatedScalar
// -x * -y + z -> NI_FMA_MultiplyAddScalar
// x * y - z -> NI_FMA_MultiplySubtractScalar
// x * -y - z -> NI_FMA_MultiplySubtractNegatedScalar
// -x * y - z -> NI_FMA_MultiplySubtractNegatedScalar
// -x * -y - z -> NI_FMA_MultiplySubtractScalar
//
void Lowering::LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node)
{
assert((node->GetHWIntrinsicId() == NI_FMA_MultiplyAddScalar) ||
(node->GetHWIntrinsicId() == NI_AVX10v1_MultiplyAddScalar));
assert(node->GetHWIntrinsicId() == NI_FMA_MultiplyAddScalar);
GenTreeHWIntrinsic* createScalarOps[3];

for (size_t i = 1; i <= 3; i++)
Expand Down Expand Up @@ -1370,26 +1369,11 @@ void Lowering::LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node)
createScalarOps[2]->Op(1)->ClearContained();
ContainCheckHWIntrinsic(createScalarOps[2]);

if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
{
node->ChangeHWIntrinsicId(negMul ? NI_AVX10v1_MultiplySubtractNegatedScalar
: NI_AVX10v1_MultiplySubtractScalar);
}
else
{
node->ChangeHWIntrinsicId(negMul ? NI_FMA_MultiplySubtractNegatedScalar : NI_FMA_MultiplySubtractScalar);
}
node->ChangeHWIntrinsicId(negMul ? NI_FMA_MultiplySubtractNegatedScalar : NI_FMA_MultiplySubtractScalar);
}
else
{
if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
{
node->ChangeHWIntrinsicId(negMul ? NI_AVX10v1_MultiplyAddNegatedScalar : NI_AVX10v1_MultiplyAddScalar);
}
else
{
node->ChangeHWIntrinsicId(negMul ? NI_FMA_MultiplyAddNegatedScalar : NI_FMA_MultiplyAddScalar);
}
node->ChangeHWIntrinsicId(negMul ? NI_FMA_MultiplyAddNegatedScalar : NI_FMA_MultiplyAddScalar);
}
}

Expand Down Expand Up @@ -2150,7 +2134,6 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
break;

case NI_FMA_MultiplyAddScalar:
case NI_AVX10v1_MultiplyAddScalar:
LowerFusedMultiplyAdd(node);
break;

Expand Down Expand Up @@ -4907,7 +4890,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node)

NamedIntrinsic extractIntrinsicId = NI_AVX512F_ExtractVector128;

if ((genTypeSize(simdBaseType) == 8) && !comp->compOpportunisticallyDependsOn(InstructionSet_AVX512DQ))
if ((genTypeSize(simdBaseType) == 8) && comp->compOpportunisticallyDependsOn(InstructionSet_AVX512DQ))
{
extractIntrinsicId = NI_AVX512DQ_ExtractVector128;
}
Expand Down Expand Up @@ -5191,7 +5174,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)

NamedIntrinsic extractIntrinsicId = NI_AVX512F_ExtractVector128;

if ((genTypeSize(simdBaseType) == 8) && !comp->compOpportunisticallyDependsOn(InstructionSet_AVX512DQ))
if ((genTypeSize(simdBaseType) == 8) && comp->compOpportunisticallyDependsOn(InstructionSet_AVX512DQ))
{
extractIntrinsicId = NI_AVX512DQ_ExtractVector128;
}
Expand All @@ -5211,7 +5194,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)

NamedIntrinsic insertIntrinsicId = NI_AVX512F_InsertVector128;

if ((genTypeSize(simdBaseType) == 8) && !comp->compOpportunisticallyDependsOn(InstructionSet_AVX512DQ))
if ((genTypeSize(simdBaseType) == 8) && comp->compOpportunisticallyDependsOn(InstructionSet_AVX512DQ))
{
insertIntrinsicId = NI_AVX512DQ_InsertVector128;
}
Expand Down Expand Up @@ -8708,9 +8691,9 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
case NI_AVX10v1_RotateRight:
case NI_AVX10v1_RoundScale:
case NI_AVX10v1_ShiftRightArithmetic:
case NI_AVX10v1_Shuffle2x128:
case NI_AVX10v1_SumAbsoluteDifferencesInBlock32:
case NI_AVX10v1_TernaryLogic:
case NI_AVX10v1_Shuffle2x128:
case NI_AVX10v1_V512_Range:
case NI_AVX10v1_V512_Reduce:
{
Expand Down Expand Up @@ -8847,6 +8830,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
case NI_AVX512F_RoundScaleScalar:
case NI_AVX512DQ_RangeScalar:
case NI_AVX512DQ_ReduceScalar:
case NI_AVX10v1_FixupScalar:
case NI_AVX10v1_GetMantissaScalar:
case NI_AVX10v1_RangeScalar:
case NI_AVX10v1_ReduceScalar:
Expand Down Expand Up @@ -8937,6 +8921,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre
case NI_AVX512F_X64_ConvertScalarToVector128Single:
case NI_AVX10v1_X64_ConvertScalarToVector128Double:
case NI_AVX10v1_X64_ConvertScalarToVector128Single:
case NI_AVX10v1_ConvertScalarToVector128Double:
case NI_AVX10v1_ConvertScalarToVector128Single:
{
if (!varTypeIsIntegral(childNode->TypeGet()))
Expand Down
10 changes: 0 additions & 10 deletions src/coreclr/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2455,16 +2455,6 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
case NI_AVX10v1_FusedMultiplyAddScalar:
case NI_AVX10v1_FusedMultiplySubtractNegatedScalar:
case NI_AVX10v1_FusedMultiplySubtractScalar:
case NI_AVX10v1_MultiplyAdd:
case NI_AVX10v1_MultiplyAddNegated:
case NI_AVX10v1_MultiplyAddNegatedScalar:
case NI_AVX10v1_MultiplyAddScalar:
case NI_AVX10v1_MultiplyAddSubtract:
case NI_AVX10v1_MultiplySubtract:
case NI_AVX10v1_MultiplySubtractAdd:
case NI_AVX10v1_MultiplySubtractNegated:
case NI_AVX10v1_MultiplySubtractNegatedScalar:
case NI_AVX10v1_MultiplySubtractScalar:
{
assert((numArgs == 3) || (intrinsicTree->OperIsEmbRoundingEnabled()));
assert(isRMW);
Expand Down
Loading

0 comments on commit 7ef9a0d

Please sign in to comment.