Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[arm64] Add RCPC ISA (8.3+) and use ldap for volatile reads #67384

Merged
merged 12 commits into from
Apr 12, 2022
1 change: 1 addition & 0 deletions src/coreclr/inc/clrconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -780,6 +780,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Dp, W("EnableArm64Dp"), 1
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Rdm, W("EnableArm64Rdm"), 1, "Allows Arm64 Rdm+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Sha1, W("EnableArm64Sha1"), 1, "Allows Arm64 Sha1+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Sha256, W("EnableArm64Sha256"), 1, "Allows Arm64 Sha256+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Rcpc, W("EnableArm64Rcpc"), 1, "Allows Arm64 Rcpc+ hardware intrinsics to be disabled")
#endif

///
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/inc/corinfoinstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ enum CORINFO_InstructionSet
InstructionSet_Rdm_Arm64=18,
InstructionSet_Sha1_Arm64=19,
InstructionSet_Sha256_Arm64=20,
InstructionSet_Rcpc=21,
#endif // TARGET_ARM64
#ifdef TARGET_AMD64
InstructionSet_X86Base=1,
Expand Down Expand Up @@ -486,6 +487,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
return "Vector128";
case InstructionSet_Dczva :
return "Dczva";
case InstructionSet_Rcpc :
return "Rcpc";
#endif // TARGET_ARM64
#ifdef TARGET_AMD64
case InstructionSet_X86Base :
Expand Down
10 changes: 5 additions & 5 deletions src/coreclr/inc/jiteeversionguid.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
#define GUID_DEFINED
#endif // !GUID_DEFINED

constexpr GUID JITEEVersionIdentifier = { /* b2d3c86f-87fd-4724-9e5d-4c44905eba91 */
0xb2d3c86f,
0x87fd,
0x4724,
{0x9e, 0x5d, 0x4c, 0x44, 0x90, 0x5e, 0xba, 0x91}
constexpr GUID JITEEVersionIdentifier = { /* 206a7aa6-9f5c-47c1-b63b-54f4cb169ee3 */
0x206a7aa6,
0x9f5c,
0x47c1,
{0xb6, 0x3b, 0x54, 0xf4, 0xcb, 0x16, 0x9e, 0xe3}
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5233,6 +5233,12 @@ void CodeGen::genArm64EmitterUnitTests()
theEmitter->emitIns_R_R(INS_stlrb, EA_4BYTE, REG_R5, REG_R14);
theEmitter->emitIns_R_R(INS_stlrh, EA_4BYTE, REG_R3, REG_R15);

// ldapr Rt, [reg]
theEmitter->emitIns_R_R(INS_ldapr, EA_8BYTE, REG_R9, REG_R8);
theEmitter->emitIns_R_R(INS_ldapr, EA_4BYTE, REG_R7, REG_R10);
theEmitter->emitIns_R_R(INS_ldaprb, EA_4BYTE, REG_R5, REG_R11);
theEmitter->emitIns_R_R(INS_ldaprh, EA_4BYTE, REG_R5, REG_R12);

// ldaxr Rt, [reg]
theEmitter->emitIns_R_R(INS_ldaxr, EA_8BYTE, REG_R9, REG_R8);
theEmitter->emitIns_R_R(INS_ldaxr, EA_4BYTE, REG_R7, REG_R10);
Expand Down
10 changes: 7 additions & 3 deletions src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1884,17 +1884,21 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree)
bool addrIsInReg = tree->Addr()->isUsedFromReg();
bool addrIsAligned = ((tree->gtFlags & GTF_IND_UNALIGNED) == 0);

// on arm64-v8.3+ we can use ldap* instructions with acquire/release semantics to avoid
// full memory barriers if mixed with STLR
bool hasRcpc = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc);

if ((ins == INS_ldrb) && addrIsInReg)
{
ins = INS_ldarb;
ins = hasRcpc ? INS_ldaprb : INS_ldarb;
}
else if ((ins == INS_ldrh) && addrIsInReg && addrIsAligned)
{
ins = INS_ldarh;
ins = hasRcpc ? INS_ldaprh : INS_ldarh;
}
else if ((ins == INS_ldr) && addrIsInReg && addrIsAligned && genIsValidIntReg(targetReg))
{
ins = INS_ldar;
ins = hasRcpc ? INS_ldapr : INS_ldar;
}
else
#endif // TARGET_ARM64
Expand Down
11 changes: 10 additions & 1 deletion src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1132,6 +1132,7 @@ emitAttr emitter::emitInsTargetRegSize(instrDesc* id)
{
case INS_ldxrb:
case INS_ldarb:
case INS_ldaprb:
case INS_ldaxrb:
case INS_stxrb:
case INS_stlrb:
Expand All @@ -1145,6 +1146,7 @@ emitAttr emitter::emitInsTargetRegSize(instrDesc* id)

case INS_ldxrh:
case INS_ldarh:
case INS_ldaprh:
case INS_ldaxrh:
case INS_stxrh:
case INS_stlrh:
Expand Down Expand Up @@ -1181,6 +1183,7 @@ emitAttr emitter::emitInsTargetRegSize(instrDesc* id)

case INS_ldxr:
case INS_ldar:
case INS_ldapr:
case INS_ldaxr:
case INS_stxr:
case INS_stlr:
Expand Down Expand Up @@ -1212,6 +1215,7 @@ emitAttr emitter::emitInsLoadStoreSize(instrDesc* id)
switch (ins)
{
case INS_ldarb:
case INS_ldaprb:
case INS_stlrb:
case INS_ldrb:
case INS_strb:
Expand All @@ -1223,6 +1227,7 @@ emitAttr emitter::emitInsLoadStoreSize(instrDesc* id)
break;

case INS_ldarh:
case INS_ldaprh:
case INS_stlrh:
case INS_ldrh:
case INS_strh:
Expand All @@ -1247,6 +1252,7 @@ emitAttr emitter::emitInsLoadStoreSize(instrDesc* id)
break;

case INS_ldar:
case INS_ldapr:
case INS_stlr:
case INS_ldr:
case INS_str:
Expand Down Expand Up @@ -4460,6 +4466,7 @@ void emitter::emitIns_R_R(
break;

case INS_ldar:
case INS_ldapr:
case INS_ldaxr:
case INS_ldxr:
case INS_stlr:
Expand All @@ -4468,9 +4475,11 @@ void emitter::emitIns_R_R(
FALLTHROUGH;

case INS_ldarb:
case INS_ldaprb:
case INS_ldaxrb:
case INS_ldxrb:
case INS_ldarh:
case INS_ldaprh:
case INS_ldaxrh:
case INS_ldxrh:
case INS_stlrb:
Expand Down Expand Up @@ -14206,7 +14215,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
break;

case IF_LS_2A: // ldr, ldrsw, ldrb, ldrh, ldrsb, ldrsh, str, strb, strh (no immediate)
// ldar, ldarb, ldarh, ldxr, ldxrb, ldxrh,
// ldar, ldarb, ldarh, ldapr, ldaprb, ldaprh, ldxr, ldxrb, ldxrh,
// ldaxr, ldaxrb, ldaxrh, stlr, stlrb, stlrh

result.insThroughput = PERFSCORE_THROUGHPUT_1C;
Expand Down
11 changes: 11 additions & 0 deletions src/coreclr/jit/instrsarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -1053,6 +1053,17 @@ INST1(ldarb, "ldarb", LD, IF_LS_2A, 0x08DFFC00)
INST1(ldarh, "ldarh", LD, IF_LS_2A, 0x48DFFC00)
// ldarh Rt,[Xn] LS_2A 0100100011011111 111111nnnnnttttt 48DF FC00


INST1(ldapr, "ldapr", LD, IF_LS_2A, 0xB8BFC000)
// ldapr Rt,[Xn] LS_2A 1X11100010111111 110000nnnnnttttt B8BF C000 Rm Rt Rn ARMv8.3 LRCPC

INST1(ldaprb, "ldaprb", LD, IF_LS_2A, 0x38BFC000)
// ldaprb Rt,[Xn] LS_2A 0011100010111111 110000nnnnnttttt 38BF C000 Rm Rt Rn ARMv8.3 LRCPC

INST1(ldaprh, "ldaprh", LD, IF_LS_2A, 0x78BFC000)
// ldaprh Rt,[Xn] LS_2A 0111100010111111 110000nnnnnttttt 78BF C000 Rm Rt Rn ARMv8.3 LRCPC


INST1(ldxr, "ldxr", LD, IF_LS_2A, 0x885F7C00)
// ldxr Rt,[Xn] LS_2A 1X00100001011111 011111nnnnnttttt 885F 7C00

Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/nativeaot/Runtime/IntrinsicConstants.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ enum ARM64IntrinsicConstants
ARM64IntrinsicConstants_Sha256 = 0x0100,
ARM64IntrinsicConstants_Atomics = 0x0200,
ARM64IntrinsicConstants_Vector64 = 0x0400,
ARM64IntrinsicConstants_Vector128 = 0x0800
ARM64IntrinsicConstants_Vector128 = 0x0800,
ARM64IntrinsicConstants_Rcpc = 0x1000
};
#endif //HOST_ARM64

Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1219,8 +1219,8 @@ REDHAWK_PALEXPORT void REDHAWK_PALAPI PAL_GetCpuCapabilityFlags(int* flags)
// *flags |= ARM64IntrinsicConstants_???;
#endif
#ifdef HWCAP_LRCPC
// if (hwCap & HWCAP_LRCPC)
// *flags |= ARM64IntrinsicConstants_???;
if (hwCap & HWCAP_LRCPC)
*flags |= ARM64IntrinsicConstants_Rcpc;
#endif
#ifdef HWCAP_PMULL
// if (hwCap & HWCAP_PMULL)
Expand Down
11 changes: 8 additions & 3 deletions src/coreclr/pal/src/misc/jitsupport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,9 @@ static const CpuCapability CpuCapabilities[] = {
#endif
//{ "jscvt", HWCAP_JSCVT },
//{ "fcma", HWCAP_FCMA },
//{ "lrcpc", HWCAP_LRCPC },
#ifdef HWCAP_LRCPC
{ "lrcpc", HWCAP_LRCPC },
#endif
//{ "dcpop", HWCAP_DCPOP },
//{ "sha3", HWCAP_SHA3 },
//{ "sm3", HWCAP_SM3 },
Expand Down Expand Up @@ -208,8 +210,8 @@ PAL_GetJitCpuCapabilityFlags(CORJIT_FLAGS *flags)
// flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_JSCVT);
#endif
#ifdef HWCAP_LRCPC
// if (hwCap & HWCAP_LRCPC)
// flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_LRCPC);
if (hwCap & HWCAP_LRCPC)
flags->Set(InstructionSet_Rcpc);
#endif
#ifdef HWCAP_PMULL
// if (hwCap & HWCAP_PMULL)
Expand Down Expand Up @@ -280,6 +282,9 @@ PAL_GetJitCpuCapabilityFlags(CORJIT_FLAGS *flags)

if ((sysctlbyname("hw.optional.armv8_1_atomics", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0))
flags->Set(InstructionSet_Atomics);

if ((sysctlbyname("hw.optional.arm.FEAT_LRCPC", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0))
flags->Set(InstructionSet_Rcpc);
#endif // HAVE_SYSCTLBYNAME
// CoreCLR SIMD and FP support is included in ARM64 baseline
// On exceptional basis platforms may leave out support, but CoreCLR does not
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ public static class ReadyToRunInstructionSetHelper
case InstructionSet.ARM64_Vector64: return null;
case InstructionSet.ARM64_Vector128: return null;
case InstructionSet.ARM64_Dczva: return null;
case InstructionSet.ARM64_Rcpc: return null;

default: throw new Exception("Unknown instruction set");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public enum InstructionSet
ARM64_Rdm_Arm64 = InstructionSet_ARM64.Rdm_Arm64,
ARM64_Sha1_Arm64 = InstructionSet_ARM64.Sha1_Arm64,
ARM64_Sha256_Arm64 = InstructionSet_ARM64.Sha256_Arm64,
ARM64_Rcpc = InstructionSet_ARM64.Rcpc,
X64_X86Base = InstructionSet_X64.X86Base,
X64_SSE = InstructionSet_X64.SSE,
X64_SSE2 = InstructionSet_X64.SSE2,
Expand Down Expand Up @@ -136,6 +137,7 @@ public enum InstructionSet_ARM64
Rdm_Arm64 = 18,
Sha1_Arm64 = 19,
Sha256_Arm64 = 20,
Rcpc = 21,
}

public enum InstructionSet_X64
Expand Down Expand Up @@ -740,6 +742,7 @@ public static IEnumerable<InstructionSetInfo> ArchitectureToValidInstructionSets
yield return new InstructionSetInfo("Vector64", "", InstructionSet.ARM64_Vector64, false);
yield return new InstructionSetInfo("Vector128", "", InstructionSet.ARM64_Vector128, false);
yield return new InstructionSetInfo("Dczva", "", InstructionSet.ARM64_Dczva, false);
yield return new InstructionSetInfo("Rcpc", "", InstructionSet.ARM64_Rcpc, false);
break;

case TargetArchitecture.X64:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ instructionset64bit,ARM64 ,Dp
instructionset64bit,ARM64 ,Rdm
instructionset64bit,ARM64 ,Sha1
instructionset64bit,ARM64 ,Sha256
instructionset ,ARM64 , , , ,Rcpc ,

vectorinstructionset,ARM64,Vector64
vectorinstructionset,ARM64,Vector128
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ private static class Arm64IntrinsicConstants
public const int Atomics = 0x0200;
public const int Vector64 = 0x0400;
public const int Vector128 = 0x0800;
public const int Rcpc = 0x1000;

public static int FromHardwareIntrinsicId(string id)
{
Expand All @@ -207,6 +208,7 @@ public static int FromHardwareIntrinsicId(string id)
"Atomics" => Atomics,
"Vector64" => Vector64,
"Vector128" => Vector128,
"Rcpc" => Rcpc,
_ => throw new NotSupportedException(),
};
}
Expand All @@ -231,6 +233,7 @@ public static int FromInstructionSetFlags(InstructionSetFlags instructionSets)
InstructionSet.ARM64_Atomics => Atomics,
InstructionSet.ARM64_Vector64 => Vector64,
InstructionSet.ARM64_Vector128 => Vector128,
InstructionSet.ARM64_Rcpc => Rcpc,
_ => throw new NotSupportedException()
};
}
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/tools/aot/ILCompiler/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,7 @@ private int Run(string[] args)
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("sha1");
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("sha2");
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("lse");
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("rcpc");
}

optimisticInstructionSetSupportBuilder.ComputeInstructionSetFlags(out var optimisticInstructionSet, out _,
Expand Down
5 changes: 5 additions & 0 deletions src/coreclr/vm/codeman.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1592,6 +1592,11 @@ void EEJitManager::SetCpuInfo()
CPUCompileFlags.Clear(InstructionSet_Atomics);
}

if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Rcpc))
{
CPUCompileFlags.Clear(InstructionSet_Rcpc);
}

if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Crc32))
{
CPUCompileFlags.Clear(InstructionSet_Crc32);
Expand Down