diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 5b88e3219f6ed..88b67f0a6d120 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -2096,6 +2096,11 @@ struct GenTree gtFlags |= sourceFlags; } + void AddAllEffectsFlags(GenTree* source) + { + AddAllEffectsFlags(source->gtFlags & GTF_ALL_EFFECT); + } + void AddAllEffectsFlags(GenTree* firstSource, GenTree* secondSource) { AddAllEffectsFlags((firstSource->gtFlags | secondSource->gtFlags) & GTF_ALL_EFFECT); diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 3c01e665ddc32..0bbd79223045d 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -1451,6 +1451,8 @@ GenTree* Compiler::impAssignStructPtr(GenTree* destAddr, // Instead, we're going to sink the assignment below the COMMA. src->AsOp()->gtOp2 = impAssignStructPtr(destAddr, src->AsOp()->gtOp2, structHnd, curLevel, pAfterStmt, usedDI, block); + src->AddAllEffectsFlags(src->AsOp()->gtOp2); + return src; } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 1758051daebfa..d2dfa96d3f1fa 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -929,76 +929,78 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) bool isMultiRegArg = (arg.AbiInfo.NumRegs > 1); #endif - if (varTypeIsStruct(argx->TypeGet()) && !arg.m_needTmp) + if (varTypeIsStruct(argx) && !arg.m_needTmp) { - if (isMultiRegArg && ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0)) + if (isMultiRegArg) { - // Spill multireg struct arguments that have Assignments or Calls embedded in them - SetNeedsTemp(&arg); - } - else - { - // We call gtPrepareCost to measure the cost of evaluating this tree - comp->gtPrepareCost(argx); - - if (isMultiRegArg && (argx->GetCostEx() > (6 * IND_COST_EX))) + if ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) { - // Spill multireg struct arguments that are expensive to evaluate twice + // Spill multireg struct arguments that have Assignments or Calls embedded in them. SetNeedsTemp(&arg); } -#if defined(FEATURE_SIMD) && defined(TARGET_ARM64) - else if (isMultiRegArg && varTypeIsSIMD(argx) && (argx->OperIsSimdOrHWintrinsic() || argx->IsCnsVec())) + else if (!argx->OperIsLocalRead() && !argx->OperIsIndir()) { - // Multi-reg morphing does not handle these SIMD nodes. + // TODO-CQ: handle HWI/SIMD/COMMA nodes in multi-reg morphing. SetNeedsTemp(&arg); } -#endif -#ifndef TARGET_ARM - // TODO-Arm: This optimization is not implemented for ARM32 - // so we skip this for ARM32 until it is ported to use RyuJIT backend - // - else if (argx->OperGet() == GT_OBJ) + else { - GenTreeObj* argObj = argx->AsObj(); - unsigned structSize = argObj->GetLayout()->GetSize(); - switch (structSize) + // Finally, we call gtPrepareCost to measure the cost of evaluating this tree. + comp->gtPrepareCost(argx); + + if (argx->GetCostEx() > (6 * IND_COST_EX)) { - case 3: - case 5: - case 6: - case 7: - // If we have a stack based LclVar we can perform a wider read of 4 or 8 bytes - // - if (argObj->AsObj()->gtOp1->IsLocalAddrExpr() == nullptr) // Is the source not a LclVar? - { - // If we don't have a LclVar we need to read exactly 3,5,6 or 7 bytes - // For now we use a GT_CPBLK to copy the exact size into a GT_LCL_VAR temp. - // - SetNeedsTemp(&arg); - } - break; - case 11: - case 13: - case 14: - case 15: - // Spill any GT_OBJ multireg structs that are difficult to extract - // - // When we have a GT_OBJ of a struct with the above sizes we would need - // to use 3 or 4 load instructions to load the exact size of this struct. - // Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence - // will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp. - // Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing - // the argument. + // Spill multireg struct arguments that are expensive to evaluate twice. + SetNeedsTemp(&arg); + } + } + } + +#ifndef TARGET_ARM + // TODO-Arm: This optimization is not implemented for ARM32 + // so we skip this for ARM32 until it is ported to use RyuJIT backend + // + if (argx->OperGet() == GT_OBJ) + { + GenTreeObj* argObj = argx->AsObj(); + unsigned structSize = argObj->GetLayout()->GetSize(); + switch (structSize) + { + case 3: + case 5: + case 6: + case 7: + // If we have a stack based LclVar we can perform a wider read of 4 or 8 bytes + // + if (argObj->AsObj()->gtOp1->IsLocalAddrExpr() == nullptr) // Is the source not a LclVar? + { + // If we don't have a LclVar we need to read exactly 3,5,6 or 7 bytes + // For now we use a GT_CPBLK to copy the exact size into a GT_LCL_VAR temp. // SetNeedsTemp(&arg); - break; + } + break; + case 11: + case 13: + case 14: + case 15: + // Spill any GT_OBJ multireg structs that are difficult to extract + // + // When we have a GT_OBJ of a struct with the above sizes we would need + // to use 3 or 4 load instructions to load the exact size of this struct. + // Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence + // will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp. + // Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing + // the argument. + // + SetNeedsTemp(&arg); + break; - default: - break; - } + default: + break; } -#endif // !TARGET_ARM } +#endif // !TARGET_ARM } #endif // FEATURE_MULTIREG_ARGS } @@ -3703,7 +3705,7 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg) { GenTree* argNode = arg->GetNode(); - assert(varTypeIsStruct(argNode->TypeGet())); + assert(varTypeIsStruct(argNode)); #if !defined(TARGET_ARMARCH) && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64) NYI("fgMorphMultiregStructArg requires implementation for this target"); diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_71118/Runtime_71118.cs b/src/tests/JIT/Regression/JitBlue/Runtime_71118/Runtime_71118.cs new file mode 100644 index 0000000000000..254b12d4b5e28 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_71118/Runtime_71118.cs @@ -0,0 +1,28 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Numerics; +using System.Runtime.CompilerServices; + +public class Runtime_71118 +{ + public static int Main() + { + return Problem(new ClassWithVtor4 { Vtor4FieldTwo = new Vector4(1, 2, 3, 4) }) ? 101 : 100; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static bool Problem(ClassWithVtor4 a) + { + return CallForVtor4(a.Vtor4FieldTwo) != a.Vtor4FieldTwo.X; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static float CallForVtor4(Vector4 vtor) => vtor.X; + + class ClassWithVtor4 + { + public Vector4 Vtor4FieldOne; + public Vector4 Vtor4FieldTwo; + } +} diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_71118/Runtime_71118.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_71118/Runtime_71118.csproj new file mode 100644 index 0000000000000..c6e34eb80207f --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_71118/Runtime_71118.csproj @@ -0,0 +1,19 @@ + + + Exe + True + + + + + + + \ No newline at end of file