Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spill all nodes multi-reg morphing does not handle #71127

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -2096,6 +2096,11 @@ struct GenTree
gtFlags |= sourceFlags;
}

void AddAllEffectsFlags(GenTree* source)
{
AddAllEffectsFlags(source->gtFlags & GTF_ALL_EFFECT);
}

void AddAllEffectsFlags(GenTree* firstSource, GenTree* secondSource)
{
AddAllEffectsFlags((firstSource->gtFlags | secondSource->gtFlags) & GTF_ALL_EFFECT);
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1451,6 +1451,8 @@ GenTree* Compiler::impAssignStructPtr(GenTree* destAddr,
// Instead, we're going to sink the assignment below the COMMA.
src->AsOp()->gtOp2 =
impAssignStructPtr(destAddr, src->AsOp()->gtOp2, structHnd, curLevel, pAfterStmt, usedDI, block);
src->AddAllEffectsFlags(src->AsOp()->gtOp2);

return src;
}

Expand Down
114 changes: 58 additions & 56 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -929,76 +929,78 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call)
bool isMultiRegArg = (arg.AbiInfo.NumRegs > 1);
#endif

if (varTypeIsStruct(argx->TypeGet()) && !arg.m_needTmp)
if (varTypeIsStruct(argx) && !arg.m_needTmp)
{
if (isMultiRegArg && ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0))
if (isMultiRegArg)
{
// Spill multireg struct arguments that have Assignments or Calls embedded in them
SetNeedsTemp(&arg);
}
else
{
// We call gtPrepareCost to measure the cost of evaluating this tree
comp->gtPrepareCost(argx);

if (isMultiRegArg && (argx->GetCostEx() > (6 * IND_COST_EX)))
if ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0)
{
// Spill multireg struct arguments that are expensive to evaluate twice
// Spill multireg struct arguments that have Assignments or Calls embedded in them.
SetNeedsTemp(&arg);
}
#if defined(FEATURE_SIMD) && defined(TARGET_ARM64)
else if (isMultiRegArg && varTypeIsSIMD(argx) && (argx->OperIsSimdOrHWintrinsic() || argx->IsCnsVec()))
else if (!argx->OperIsLocalRead() && !argx->OperIsIndir())
{
// Multi-reg morphing does not handle these SIMD nodes.
// TODO-CQ: handle HWI/SIMD/COMMA nodes in multi-reg morphing.
SetNeedsTemp(&arg);
}
#endif
#ifndef TARGET_ARM
// TODO-Arm: This optimization is not implemented for ARM32
// so we skip this for ARM32 until it is ported to use RyuJIT backend
//
else if (argx->OperGet() == GT_OBJ)
else
{
GenTreeObj* argObj = argx->AsObj();
unsigned structSize = argObj->GetLayout()->GetSize();
switch (structSize)
// Finally, we call gtPrepareCost to measure the cost of evaluating this tree.
comp->gtPrepareCost(argx);

if (argx->GetCostEx() > (6 * IND_COST_EX))
{
case 3:
case 5:
case 6:
case 7:
// If we have a stack based LclVar we can perform a wider read of 4 or 8 bytes
//
if (argObj->AsObj()->gtOp1->IsLocalAddrExpr() == nullptr) // Is the source not a LclVar?
{
// If we don't have a LclVar we need to read exactly 3,5,6 or 7 bytes
// For now we use a GT_CPBLK to copy the exact size into a GT_LCL_VAR temp.
//
SetNeedsTemp(&arg);
}
break;
case 11:
case 13:
case 14:
case 15:
// Spill any GT_OBJ multireg structs that are difficult to extract
//
// When we have a GT_OBJ of a struct with the above sizes we would need
// to use 3 or 4 load instructions to load the exact size of this struct.
// Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence
// will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp.
// Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing
// the argument.
// Spill multireg struct arguments that are expensive to evaluate twice.
SetNeedsTemp(&arg);
}
}
}

#ifndef TARGET_ARM
// TODO-Arm: This optimization is not implemented for ARM32
// so we skip this for ARM32 until it is ported to use RyuJIT backend
//
if (argx->OperGet() == GT_OBJ)
{
GenTreeObj* argObj = argx->AsObj();
unsigned structSize = argObj->GetLayout()->GetSize();
switch (structSize)
{
case 3:
case 5:
case 6:
case 7:
// If we have a stack based LclVar we can perform a wider read of 4 or 8 bytes
//
if (argObj->AsObj()->gtOp1->IsLocalAddrExpr() == nullptr) // Is the source not a LclVar?
{
// If we don't have a LclVar we need to read exactly 3,5,6 or 7 bytes
// For now we use a GT_CPBLK to copy the exact size into a GT_LCL_VAR temp.
//
SetNeedsTemp(&arg);
break;
}
break;
case 11:
case 13:
case 14:
case 15:
// Spill any GT_OBJ multireg structs that are difficult to extract
//
// When we have a GT_OBJ of a struct with the above sizes we would need
// to use 3 or 4 load instructions to load the exact size of this struct.
// Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence
// will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp.
// Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing
// the argument.
//
SetNeedsTemp(&arg);
break;

default:
break;
}
default:
break;
}
#endif // !TARGET_ARM
}
#endif // !TARGET_ARM
}
#endif // FEATURE_MULTIREG_ARGS
}
Expand Down Expand Up @@ -3703,7 +3705,7 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg)
{
GenTree* argNode = arg->GetNode();
assert(varTypeIsStruct(argNode->TypeGet()));
assert(varTypeIsStruct(argNode));

#if !defined(TARGET_ARMARCH) && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64)
NYI("fgMorphMultiregStructArg requires implementation for this target");
Expand Down
28 changes: 28 additions & 0 deletions src/tests/JIT/Regression/JitBlue/Runtime_71118/Runtime_71118.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Numerics;
using System.Runtime.CompilerServices;

public class Runtime_71118
{
public static int Main()
{
return Problem(new ClassWithVtor4 { Vtor4FieldTwo = new Vector4(1, 2, 3, 4) }) ? 101 : 100;
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static bool Problem(ClassWithVtor4 a)
{
return CallForVtor4(a.Vtor4FieldTwo) != a.Vtor4FieldTwo.X;
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static float CallForVtor4(Vector4 vtor) => vtor.X;

class ClassWithVtor4
{
public Vector4 Vtor4FieldOne;
public Vector4 Vtor4FieldTwo;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<Optimize>True</Optimize>
<CLRTestBatchPreCommands><![CDATA[
$(CLRTestBatchPreCommands)
set DOTNET_JitStressModeNamesOnly=1
set DOTNET_JitStressModeNames=STRESS_NULL_OBJECT_CHECK
]]></CLRTestBatchPreCommands>
<BashCLRTestPreCommands><![CDATA[
$(BashCLRTestPreCommands)
export DOTNET_JitStressModeNamesOnly=1
export DOTNET_JitStressModeNames=STRESS_NULL_OBJECT_CHECK
]]></BashCLRTestPreCommands>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
</ItemGroup>
</Project>