Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve the codegen of the vector accelerated System.Numerics.* types #81335

Merged
merged 32 commits into from
Feb 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
83433ff
Allow Quaternion and Plane to be imported as TYP_SIMD16
tannergooding Jan 27, 2023
9d5aa21
Add some minimal intrinsification of Quaternion and Plane
tannergooding Jan 28, 2023
a666f6c
Ensure Vector indexers are marked readonly
tannergooding Jan 28, 2023
6cc5c3d
Ensure the vector constant properties are intrinsic
tannergooding Jan 28, 2023
e9e4528
Ensure that the vector GetElement and WithElement APIs are intrinsic
tannergooding Jan 28, 2023
0d3690b
Ensure vector division by scalar is intrinsic
tannergooding Jan 28, 2023
15f804f
Minor cleanup to vector files
tannergooding Jan 28, 2023
8cdb2f5
Ensure vector arithmetic functions are consistently intrinsic
tannergooding Jan 28, 2023
192f814
Ensure creating a plane from a vector4 is intrinsic
tannergooding Jan 28, 2023
bc510bb
Ensure accessing the Normal field of a Plane is efficient
tannergooding Jan 28, 2023
dd3746f
Ensure Quaternion and Plane return the correct SimdAsHWIntrinsicClassId
tannergooding Jan 28, 2023
78e1317
Specially optimize Create(Dot(..., ...)) and Create(Sqrt(Dot(..., ...)))
tannergooding Jan 28, 2023
623c440
Ensure vector clamp is intrinsic
tannergooding Jan 29, 2023
20db9dc
Ensure vector lerp is intrinsic
tannergooding Jan 29, 2023
186bf95
Ensure vector length is intrinsic
tannergooding Jan 29, 2023
a22827f
Ensure vector normalize is intrinsic
tannergooding Jan 29, 2023
db1541b
Ensure vector distance is intrinsic
tannergooding Jan 29, 2023
2262902
Optimize the vector transform by matrix methods
tannergooding Jan 29, 2023
05abc18
Ensure quaternion conjugate and inverse are intrinsic
tannergooding Jan 29, 2023
6bf2e67
Fixing assert, formatting, and build failure
tannergooding Jan 29, 2023
5ca919a
Ensure Quaternion.Inverse uses LengthSquared not Length
tannergooding Jan 29, 2023
9bc9806
Ensure Create APIs are correctly imported as intrinsic
tannergooding Jan 30, 2023
43e229d
Ensure we don't assert for AltJit
tannergooding Jan 30, 2023
b92fc1d
Ensure lowering DotProd doesn't break CSE for scalar vs vector results
tannergooding Jan 31, 2023
a498ac7
Minimally fixup Mono for the new intrinsics
tannergooding Jan 31, 2023
338cca7
Ensure SN_GetElement doesn't raise an assert
tannergooding Jan 31, 2023
d54eaf2
Ensure get_UnitW is ordered correctly for Mono
tannergooding Jan 31, 2023
597ac9a
Merge remote-tracking branch 'dotnet/main' into numerics-rewrite
tannergooding Feb 1, 2023
fa1e62c
Try to fix SN_GetElement
tannergooding Feb 1, 2023
4084de1
Fix SN_WithElement for Mono
tannergooding Feb 1, 2023
cba1d2f
Resolving mono formatting feedback
tannergooding Feb 1, 2023
ff68c6c
Merge remote-tracking branch 'dotnet/main' into numerics-rewrite
tannergooding Feb 2, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 70 additions & 6 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -8417,6 +8417,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
CORINFO_CLASS_HANDLE SIMDNIntHandle;
CORINFO_CLASS_HANDLE SIMDNUIntHandle;

CORINFO_CLASS_HANDLE SIMDPlaneHandle;
CORINFO_CLASS_HANDLE SIMDQuaternionHandle;
CORINFO_CLASS_HANDLE SIMDVector2Handle;
CORINFO_CLASS_HANDLE SIMDVector3Handle;
CORINFO_CLASS_HANDLE SIMDVector4Handle;
Expand Down Expand Up @@ -8494,23 +8496,54 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
switch (simdType)
{
case TYP_SIMD8:
{
return m_simdHandleCache->SIMDVector2Handle;
}

case TYP_SIMD12:
{
return m_simdHandleCache->SIMDVector3Handle;
}

case TYP_SIMD16:
if ((getSIMDVectorType() == TYP_SIMD32) ||
(m_simdHandleCache->SIMDVector4Handle != NO_CLASS_HANDLE))
{
// We order the checks roughly by expected hit count so early exits are possible

if (simdBaseJitType != CORINFO_TYPE_FLOAT)
{
// We could be Vector<T>, so handle below
assert(getSIMDVectorType() == TYP_SIMD16);
break;
}

if (m_simdHandleCache->SIMDVector4Handle != NO_CLASS_HANDLE)
{
return m_simdHandleCache->SIMDVector4Handle;
}
break;

if (m_simdHandleCache->SIMDQuaternionHandle != NO_CLASS_HANDLE)
{
return m_simdHandleCache->SIMDQuaternionHandle;
}

if (m_simdHandleCache->SIMDPlaneHandle != NO_CLASS_HANDLE)
{
return m_simdHandleCache->SIMDPlaneHandle;
}

return NO_CLASS_HANDLE;
}

case TYP_SIMD32:
break;

default:
unreached();
}
}

assert(emitTypeSize(simdType) <= largestEnregisterableStructSize());

switch (simdBaseJitType)
{
case CORINFO_TYPE_FLOAT:
Expand Down Expand Up @@ -8540,6 +8573,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
default:
assert(!"Didn't find a class handle for simdType");
}

return NO_CLASS_HANDLE;
}

Expand Down Expand Up @@ -8617,9 +8651,39 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
// actually be declared as having fields.
bool isOpaqueSIMDType(CORINFO_CLASS_HANDLE structHandle) const
{
return ((m_simdHandleCache != nullptr) && (structHandle != m_simdHandleCache->SIMDVector2Handle) &&
(structHandle != m_simdHandleCache->SIMDVector3Handle) &&
(structHandle != m_simdHandleCache->SIMDVector4Handle));
// We order the checks roughly by expected hit count so early exits are possible

if (m_simdHandleCache == nullptr)
{
return false;
}

if (structHandle == m_simdHandleCache->SIMDVector4Handle)
{
return false;
}

if (structHandle == m_simdHandleCache->SIMDVector3Handle)
{
return false;
}

if (structHandle == m_simdHandleCache->SIMDVector2Handle)
{
return false;
}

if (structHandle == m_simdHandleCache->SIMDQuaternionHandle)
{
return false;
}

if (structHandle == m_simdHandleCache->SIMDPlaneHandle)
{
return false;
}

return true;
}

// Returns true if the lclVar is an opaque SIMD type.
Expand Down
5 changes: 3 additions & 2 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21407,7 +21407,6 @@ GenTree* Compiler::gtNewSimdDotProdNode(var_types type,
bool isSimdAsHWIntrinsic)
{
assert(IsBaselineSimdIsaSupportedDebugOnly());
assert(varTypeIsArithmetic(type));

var_types simdType = getSIMDTypeForSize(simdSize);
assert(varTypeIsSIMD(simdType));
Expand All @@ -21419,7 +21418,9 @@ GenTree* Compiler::gtNewSimdDotProdNode(var_types type,
assert(op2->TypeIs(simdType));

var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
assert(JITtype2varType(simdBaseJitType) == type);

// We support the return type being a SIMD for floating-point as a special optimization
assert(varTypeIsArithmetic(type) || (varTypeIsSIMD(type) && varTypeIsFloating(simdBaseType)));

NamedIntrinsic intrinsic = NI_Illegal;

Expand Down
88 changes: 69 additions & 19 deletions src/coreclr/jit/lclmorph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1156,16 +1156,32 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
break;

#ifdef FEATURE_HW_INTRINSICS
// We have two cases we want to handle:
// 1. Vector2/3/4 and Quaternion where we have 4x float fields
// 2. Plane where we have 1x Vector3 and 1x float field

case IndirTransform::GetElement:
{
GenTree* hwiNode = nullptr;
var_types elementType = indir->TypeGet();
assert(elementType == TYP_FLOAT);
lclNode = BashToLclVar(indir->gtGetOp1(), lclNum);

if (elementType == TYP_FLOAT)
{
GenTree* indexNode = m_compiler->gtNewIconNode(val.Offset() / genTypeSize(elementType));
hwiNode = m_compiler->gtNewSimdGetElementNode(elementType, lclNode, indexNode, CORINFO_TYPE_FLOAT,
genTypeSize(varDsc),
/* isSimdAsHWIntrinsic */ true);
}
else
{
assert(elementType == TYP_SIMD12);
assert(genTypeSize(varDsc) == 16);
hwiNode =
m_compiler->gtNewSimdHWIntrinsicNode(elementType, lclNode, NI_Vector128_AsVector3,
CORINFO_TYPE_FLOAT, 16, /* isSimdAsHWIntrinsic */ true);
}

lclNode = BashToLclVar(indir->gtGetOp1(), lclNum);
GenTree* indexNode = m_compiler->gtNewIconNode(val.Offset() / genTypeSize(elementType));
GenTree* hwiNode = m_compiler->gtNewSimdGetElementNode(elementType, lclNode, indexNode,
CORINFO_TYPE_FLOAT, genTypeSize(varDsc),
/* isSimdAsHWIntrinsic */ false);
indir = hwiNode;
*val.Use() = hwiNode;
}
Expand All @@ -1174,17 +1190,35 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
case IndirTransform::WithElement:
{
assert(user->OperIs(GT_ASG) && (user->gtGetOp1() == indir));

GenTree* hwiNode = nullptr;
var_types elementType = indir->TypeGet();
assert(elementType == TYP_FLOAT);

lclNode = BashToLclVar(indir, lclNum);
GenTree* simdLclNode = m_compiler->gtNewLclvNode(lclNum, varDsc->TypeGet());
GenTree* indexNode = m_compiler->gtNewIconNode(val.Offset() / genTypeSize(elementType));
GenTree* elementNode = user->gtGetOp2();
user->AsOp()->gtOp2 =
m_compiler->gtNewSimdWithElementNode(varDsc->TypeGet(), simdLclNode, indexNode, elementNode,
CORINFO_TYPE_FLOAT, genTypeSize(varDsc),
/* isSimdAsHWIntrinsic */ false);
lclNode = BashToLclVar(indir, lclNum);
GenTree* simdLclNode = m_compiler->gtNewLclvNode(lclNum, varDsc->TypeGet());
GenTree* elementNode = user->gtGetOp2();

if (elementType == TYP_FLOAT)
{
GenTree* indexNode = m_compiler->gtNewIconNode(val.Offset() / genTypeSize(elementType));
hwiNode = m_compiler->gtNewSimdWithElementNode(varDsc->TypeGet(), simdLclNode, indexNode,
elementNode, CORINFO_TYPE_FLOAT, genTypeSize(varDsc),
/* isSimdAsHWIntrinsic */ true);
}
else
{
assert(elementType == TYP_SIMD12);
assert(varDsc->TypeGet() == TYP_SIMD16);

// We inverse the operands here and take elementNode as the main value and simdLclNode[3] as the
// new value. This gives us a new TYP_SIMD16 with all elements in the right spots

GenTree* indexNode = m_compiler->gtNewIconNode(3, TYP_INT);
hwiNode =
m_compiler->gtNewSimdWithElementNode(TYP_SIMD16, elementNode, indexNode, simdLclNode,
CORINFO_TYPE_FLOAT, 16, /* isSimdAsHWIntrinsic */ true);
}

user->AsOp()->gtOp2 = hwiNode;
user->ChangeType(varDsc->TypeGet());
}
break;
Expand Down Expand Up @@ -1300,10 +1334,26 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
}

#ifdef FEATURE_HW_INTRINSICS
if (varTypeIsSIMD(varDsc) && indir->TypeIs(TYP_FLOAT) && ((val.Offset() % genTypeSize(TYP_FLOAT)) == 0) &&
m_compiler->IsBaselineSimdIsaSupported())
if (varTypeIsSIMD(varDsc))
{
return isDef ? IndirTransform::WithElement : IndirTransform::GetElement;
// We have two cases we want to handle:
// 1. Vector2/3/4 and Quaternion where we have 4x float fields
// 2. Plane where we have 1x Vector3 and 1x float field

if (indir->TypeIs(TYP_FLOAT))
{
if (((val.Offset() % genTypeSize(TYP_FLOAT)) == 0) && m_compiler->IsBaselineSimdIsaSupported())
{
return isDef ? IndirTransform::WithElement : IndirTransform::GetElement;
}
}
else if (indir->TypeIs(TYP_SIMD12))
{
if ((val.Offset() == 0) && m_compiler->IsBaselineSimdIsaSupported())
{
return isDef ? IndirTransform::WithElement : IndirTransform::GetElement;
}
}
}
#endif // FEATURE_HW_INTRINSICS

Expand Down
38 changes: 28 additions & 10 deletions src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1604,6 +1604,9 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
assert(varTypeIsArithmetic(simdBaseType));
assert(simdSize != 0);

// We support the return type being a SIMD for floating-point as a special optimization
assert(varTypeIsArithmetic(node) || (varTypeIsSIMD(node) && varTypeIsFloating(simdBaseType)));

GenTree* op1 = node->Op(1);
GenTree* op2 = node->Op(2);

Expand Down Expand Up @@ -1859,19 +1862,34 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
}
}

// We will be constructing the following parts:
// ...
// /--* tmp2 simd16
// node = * HWINTRINSIC simd16 T ToScalar
if (varTypeIsSIMD(node->gtType))
{
// We're producing a vector result, so just return the result directly

// This is roughly the following managed code:
// ...
// return tmp2.ToScalar();
LIR::Use use;

node->ResetHWIntrinsicId((simdSize == 8) ? NI_Vector64_ToScalar : NI_Vector128_ToScalar, tmp2);
if (BlockRange().TryGetUse(node, &use))
{
use.ReplaceWith(tmp2);
}

LowerNode(node);
return node->gtNext;
BlockRange().Remove(node);
return tmp2->gtNext;
}
else
{
// We will be constructing the following parts:
// ...
// /--* tmp2 simd16
// node = * HWINTRINSIC simd16 T ToScalar

// This is roughly the following managed code:
// ...
// return tmp2.ToScalar();

node->ResetHWIntrinsicId((simdSize == 8) ? NI_Vector64_ToScalar : NI_Vector128_ToScalar, tmp2);
return LowerNode(node);
}
}
#endif // FEATURE_HW_INTRINSICS

Expand Down
Loading