Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: Added SVE Prefetch* APIs. #103094

Merged
merged 20 commits into from
Jun 12, 2024
4 changes: 4 additions & 0 deletions src/coreclr/jit/fgdiagnostic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3459,6 +3459,10 @@ void Compiler::fgDebugCheckFlags(GenTree* tree, BasicBlock* block)

#if defined(TARGET_ARM64)
case NI_ArmBase_Yield:
case NI_Sve_PrefetchBytes:
TIHan marked this conversation as resolved.
Show resolved Hide resolved
case NI_Sve_PrefetchInt16:
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
{
assert(tree->OperRequiresCallFlag(this));
expectedFlags |= GTF_GLOB_REF;
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27315,6 +27315,10 @@ bool GenTreeHWIntrinsic::OperRequiresCallFlag() const

#if defined(TARGET_ARM64)
case NI_ArmBase_Yield:
case NI_Sve_PrefetchBytes:
TIHan marked this conversation as resolved.
Show resolved Hide resolved
case NI_Sve_PrefetchInt16:
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
{
return true;
}
Expand Down Expand Up @@ -27497,6 +27501,10 @@ void GenTreeHWIntrinsic::Initialize(NamedIntrinsic intrinsicId)

#if defined(TARGET_ARM64)
case NI_ArmBase_Yield:
case NI_Sve_PrefetchBytes:
case NI_Sve_PrefetchInt16:
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
{
// Mark as a call and global reference, much as is done for GT_KEEPALIVE
gtFlags |= (GTF_CALL | GTF_GLOB_REF);
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,14 @@ void HWIntrinsicInfo::lookupImmBounds(
}
break;

case NI_Sve_PrefetchBytes:
case NI_Sve_PrefetchInt16:
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
immLowerBound = (int)SVE_PRFOP_PLDL1KEEP;
immUpperBound = (int)SVE_PRFOP_CONST15;
break;

default:
unreached();
}
Expand Down
29 changes: 27 additions & 2 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
}
else if (intrin.category == HW_Category_Special)
{
assert(intrin.id == NI_ArmBase_Yield);
TIHan marked this conversation as resolved.
Show resolved Hide resolved

emitSize = EA_UNKNOWN;
opt = INS_OPTS_NONE;
}
Expand Down Expand Up @@ -1489,6 +1487,33 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}

case NI_Sve_PrefetchBytes:
case NI_Sve_PrefetchInt16:
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
{
assert(hasImmediateOperand);
assert(HWIntrinsicInfo::HasEnumOperand(intrin.id));
if (intrin.op3->IsCnsIntOrI())
{
GetEmitter()->emitIns_PRFOP_R_R_I(ins, emitSize,
(insSvePrfop)intrin.op3->AsIntConCommon()->IconValue(), op1Reg,
op2Reg, 0);
}
else
{
assert(!intrin.op3->isContainedIntOrIImmed());

HWIntrinsicImmOpHelper helper(this, intrin.op3, node);
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
{
const insSvePrfop prfop = (insSvePrfop)helper.ImmValue();
GetEmitter()->emitIns_PRFOP_R_R_I(ins, emitSize, prfop, op1Reg, op2Reg, 0);
}
}
break;
}

case NI_Vector64_ToVector128:
GetEmitter()->emitIns_Mov(ins, emitSize, targetReg, op1Reg, /* canSkip */ false);
break;
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,10 @@ HARDWARE_INTRINSIC(Sve, Negate,
HARDWARE_INTRINSIC(Sve, Or, -1, -1, false, {INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_sve_orr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, OrAcross, -1, -1, false, {INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_sve_orv, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, PopCount, -1, -1, false, {INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt, INS_sve_cnt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, PrefetchBytes, -1, 3, false, {INS_invalid, INS_sve_prfb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand|HW_Flag_SpecialSideEffect_Other)
TIHan marked this conversation as resolved.
Show resolved Hide resolved
HARDWARE_INTRINSIC(Sve, PrefetchInt16, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_prfh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand|HW_Flag_SpecialSideEffect_Other)
HARDWARE_INTRINSIC(Sve, PrefetchInt32, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand|HW_Flag_SpecialSideEffect_Other)
TIHan marked this conversation as resolved.
Show resolved Hide resolved
HARDWARE_INTRINSIC(Sve, PrefetchInt64, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand|HW_Flag_SpecialSideEffect_Other)
HARDWARE_INTRINSIC(Sve, ReverseElement, -1, 1, true, {INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, ReverseElement16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_revh, INS_sve_revh, INS_sve_revh, INS_sve_revh, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, ReverseElement32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_revw, INS_sve_revw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
Expand Down
1 change: 0 additions & 1 deletion src/coreclr/jit/instr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1718,7 +1718,6 @@ instruction CodeGen::ins_Move_Extend(var_types srcType, bool srcInReg)
#if defined(TARGET_XARCH)
return INS_kmovq_msk;
#elif defined(TARGET_ARM64)
unreached(); // TODO-SVE: This needs testing
return INS_sve_mov;
#endif
}
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3194,6 +3194,10 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4:
case NI_Sve_PrefetchBytes:
case NI_Sve_PrefetchInt16:
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
assert(hasImmediateOperand);
assert(varTypeIsIntegral(intrin.op3));
if (intrin.op3->IsCnsIntOrI())
Expand Down
51 changes: 33 additions & 18 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1447,6 +1447,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4:
case NI_Sve_PrefetchBytes:
case NI_Sve_PrefetchInt16:
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
needBranchTargetReg = !intrin.op3->isContainedIntOrIImmed();
break;

Expand Down Expand Up @@ -1965,28 +1969,39 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
(argNum == lowVectorOperandNum) ? lowVectorCandidates : RBM_NONE);
}
}
else if (intrin.id == NI_Sve_StoreAndZip)
{
srcCount += BuildAddrUses(intrin.op2);
}
else
{
SingleTypeRegSet candidates = lowVectorOperandNum == 2 ? lowVectorCandidates : RBM_NONE;
switch (intrin.id)
TIHan marked this conversation as resolved.
Show resolved Hide resolved
{
case NI_Sve_StoreAndZip:
case NI_Sve_PrefetchBytes:
case NI_Sve_PrefetchInt16:
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
srcCount += BuildAddrUses(intrin.op2);
break;

if (intrin.op2->gtType == TYP_MASK)
{
assert(lowVectorOperandNum != 2);
candidates = RBM_ALLMASK;
}
default:
{
SingleTypeRegSet candidates = lowVectorOperandNum == 2 ? lowVectorCandidates : RBM_NONE;

if (forceOp2DelayFree)
{
srcCount += BuildDelayFreeUses(intrin.op2, nullptr, candidates);
}
else
{
srcCount += isRMW ? BuildDelayFreeUses(intrin.op2, intrin.op1, candidates)
: BuildOperandUses(intrin.op2, candidates);
if (intrin.op2->gtType == TYP_MASK)
{
assert(lowVectorOperandNum != 2);
candidates = RBM_ALLMASK;
}

if (forceOp2DelayFree)
{
srcCount += BuildDelayFreeUses(intrin.op2, nullptr, candidates);
}
else
{
srcCount += isRMW ? BuildDelayFreeUses(intrin.op2, intrin.op1, candidates)
: BuildOperandUses(intrin.op2, candidates);
}
}
break;
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2330,7 +2330,7 @@ void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call

bool isBackFilled = false;
unsigned nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use
bool isStructArg = varTypeIsStruct(argSigType);
bool isStructArg = varTypeIsStruct(argSigType) && !varTypeIsMask(argx->gtEffectiveVal());
var_types structBaseType = TYP_STRUCT;
unsigned structSize = 0;
bool passStructByRef = false;
Expand Down Expand Up @@ -3178,7 +3178,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
argx->gtType = TYP_I_IMPL;
}

bool isStructArg = varTypeIsStruct(arg.GetSignatureType());
bool isStructArg = varTypeIsStruct(arg.GetSignatureType()) && !varTypeIsMask(arg.GetSignatureType());
GenTree* argObj = argx->gtEffectiveVal();
bool makeOutArgCopy = false;

Expand Down
6 changes: 5 additions & 1 deletion src/coreclr/jit/rationalize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,11 @@ void Rationalizer::RewriteNodeAsCall(GenTree** use,
assert(!operand->OperIsFieldList());

sigTyp = comp->impNormStructType(clsHnd);
arg = NewCallArg::Struct(operand, sigTyp, clsHnd);
if (varTypeIsMask(operand) && varTypeIsSIMD(sigTyp))
{
sigTyp = TYP_MASK;
}
arg = NewCallArg::Struct(operand, sigTyp, clsHnd);
TIHan marked this conversation as resolved.
Show resolved Hide resolved
}
else
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,20 @@ public enum SveMaskPattern : byte
/// </summary>
All = 31 // All available (implicitly a multiple of two).
}

public enum SvePrefetchType : byte
{
SV_PLDL1KEEP = 0, // Temporal fetch the addressed location for reading, to L1 cache.
SV_PLDL1STRM = 1, // Streaming fetch the addressed location for reading, to L1 cache.
SV_PLDL2KEEP = 2, // Temporal fetch the addressed location for reading, to L2 cache.
SV_PLDL2STRM = 3, // Streaming fetch the addressed location for reading, to L2 cache.
SV_PLDL3KEEP = 4, // Temporal fetch the addressed location for reading, to L3 cache.
SV_PLDL3STRM = 5, // Streaming fetch the addressed location for reading, to L3 cache.
SV_PSTL1KEEP = 8, // Temporal fetch the addressed location for writing, to L1 cache.
SV_PSTL1STRM = 9, // Streaming fetch the addressed location for writing, to L1 cache.
SV_PSTL2KEEP = 10, // Temporal fetch the addressed location for writing, to L2 cache.
SV_PSTL2STRM = 11, // Streaming fetch the addressed location for writing, to L2 cache.
SV_PSTL3KEEP = 12, // Temporal fetch the addressed location for writing, to L3 cache.
SV_PSTL3STRM = 13 // Streaming fetch the addressed location for writing, to L3 cache.
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -3238,6 +3238,30 @@ internal Arm64() { }
/// </summary>
public static unsafe Vector<ulong> PopCount(Vector<ulong> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// void svprfb(svbool_t pg, const void *base, enum svprfop op)
/// PRFB op, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void PrefetchBytes(Vector<byte> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw new PlatformNotSupportedException(); }

/// <summary>
/// void svprfh(svbool_t pg, const void *base, enum svprfop op)
/// PRFH op, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void PrefetchInt16(Vector<ushort> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw new PlatformNotSupportedException(); }

/// <summary>
/// void svprfw(svbool_t pg, const void *base, enum svprfop op)
/// PRFW op, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void PrefetchInt32(Vector<uint> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw new PlatformNotSupportedException(); }

/// <summary>
/// void svprfd(svbool_t pg, const void *base, enum svprfop op)
/// PRFD op, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void PrefetchInt64(Vector<ulong> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw new PlatformNotSupportedException(); }


/// Reverse all elements

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3294,6 +3294,29 @@ internal Arm64() { }
/// </summary>
public static unsafe Vector<ulong> PopCount(Vector<ulong> value) => PopCount(value);

/// <summary>
/// void svprfb(svbool_t pg, const void *base, enum svprfop op)
/// PRFB op, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void PrefetchBytes(Vector<byte> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) => PrefetchBytes(mask, address, prefetchType);

/// <summary>
/// void svprfh(svbool_t pg, const void *base, enum svprfop op)
/// PRFH op, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void PrefetchInt16(Vector<ushort> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) => PrefetchInt16(mask, address, prefetchType);

/// <summary>
/// void svprfw(svbool_t pg, const void *base, enum svprfop op)
/// PRFW op, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void PrefetchInt32(Vector<uint> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) => PrefetchInt32(mask, address, prefetchType);

/// <summary>
/// void svprfd(svbool_t pg, const void *base, enum svprfop op)
/// PRFD op, Pg, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe void PrefetchInt64(Vector<ulong> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) => PrefetchInt64(mask, address, prefetchType);

/// Reverse all elements

Expand Down
Loading
Loading