Skip to content

Arm64_32 #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 32 additions & 7 deletions clang/lib/Basic/Targets/AArch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,11 @@ AArch64TargetInfo::AArch64TargetInfo(const llvm::Triple &Triple,
HasLegalHalfType = true;
HasFloat16 = true;

LongWidth = LongAlign = PointerWidth = PointerAlign = 64;
if (!Triple.getArchName().endswith("_32"))
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You use this predicate a lot; is there a better way of expressing it now?

LongWidth = LongAlign = PointerWidth = PointerAlign = 64;
else
LongWidth = LongAlign = PointerWidth = PointerAlign = 32;

MaxVectorAlign = 128;
MaxAtomicInlineWidth = 128;
MaxAtomicPromoteWidth = 128;
Expand Down Expand Up @@ -128,7 +132,8 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__ELF__");

// Target properties.
if (!getTriple().isOSWindows()) {
if (!getTriple().isOSWindows() &&
!getTriple().getArchName().endswith("_32")) {
Builder.defineMacro("_LP64");
Builder.defineMacro("__LP64__");
}
Expand Down Expand Up @@ -441,14 +446,19 @@ int AArch64TargetInfo::getEHDataRegisterNumber(unsigned RegNo) const {
return -1;
}

bool AArch64TargetInfo::hasInt128Type() const { return true; }

AArch64leTargetInfo::AArch64leTargetInfo(const llvm::Triple &Triple,
const TargetOptions &Opts)
: AArch64TargetInfo(Triple, Opts) {}

void AArch64leTargetInfo::setDataLayout() {
if (getTriple().isOSBinFormatMachO())
resetDataLayout("e-m:o-i64:64-i128:128-n32:64-S128");
else
if (getTriple().isOSBinFormatMachO()) {
if(getTriple().getArchName().endswith("_32"))
resetDataLayout("e-m:o-p:32:32-i64:64-i128:128-n32:64-S128");
else
resetDataLayout("e-m:o-i64:64-i128:128-n32:64-S128");
} else
resetDataLayout("e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128");
}

Expand Down Expand Up @@ -555,19 +565,34 @@ DarwinAArch64TargetInfo::DarwinAArch64TargetInfo(const llvm::Triple &Triple,
const TargetOptions &Opts)
: DarwinTargetInfo<AArch64leTargetInfo>(Triple, Opts) {
Int64Type = SignedLongLong;
if (getTriple().getArchName().endswith("_32"))
IntMaxType = SignedLongLong;

WCharType = SignedInt;
UseSignedCharForObjCBool = false;

LongDoubleWidth = LongDoubleAlign = SuitableAlign = 64;
LongDoubleFormat = &llvm::APFloat::IEEEdouble();

TheCXXABI.set(TargetCXXABI::iOS64);
UseZeroLengthBitfieldAlignment = false;

if (getTriple().getArchName().endswith("_32")) {
UseBitFieldTypeAlignment = false;
ZeroLengthBitfieldBoundary = 32;
UseZeroLengthBitfieldAlignment = true;
TheCXXABI.set(TargetCXXABI::WatchOS);
} else
TheCXXABI.set(TargetCXXABI::iOS64);
}

void DarwinAArch64TargetInfo::getOSDefines(const LangOptions &Opts,
const llvm::Triple &Triple,
MacroBuilder &Builder) const {
Builder.defineMacro("__AARCH64_SIMD__");
Builder.defineMacro("__ARM64_ARCH_8__");
if (Triple.getArchName().endswith("_32"))
Builder.defineMacro("__ARM64_ARCH_8_32__");
else
Builder.defineMacro("__ARM64_ARCH_8__");
Builder.defineMacro("__ARM_NEON__");
Builder.defineMacro("__LITTLE_ENDIAN__");
Builder.defineMacro("__REGISTER_PREFIX__", "");
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Basic/Targets/AArch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
}

int getEHDataRegisterNumber(unsigned RegNo) const override;

bool hasInt128Type() const override;
};

class LLVM_LIBRARY_VISIBILITY AArch64leTargetInfo : public AArch64TargetInfo {
Expand Down
35 changes: 23 additions & 12 deletions clang/lib/CodeGen/TargetInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4952,7 +4952,7 @@ class AArch64ABIInfo : public SwiftABIInfo {
ABIKind getABIKind() const { return Kind; }
bool isDarwinPCS() const { return Kind == DarwinPCS; }

ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyReturnType(QualType RetTy, bool IsVariadic) const;
ABIArgInfo classifyArgumentType(QualType RetTy) const;
bool isHomogeneousAggregateBaseType(QualType Ty) const override;
bool isHomogeneousAggregateSmallEnough(const Type *Ty,
Expand All @@ -4962,7 +4962,8 @@ class AArch64ABIInfo : public SwiftABIInfo {

void computeInfo(CGFunctionInfo &FI) const override {
if (!::classifyReturnType(getCXXABI(), FI, *this))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
FI.getReturnInfo() =
classifyReturnType(FI.getReturnType(), FI.isVariadic());

for (auto &it : FI.arguments())
it.info = classifyArgumentType(it.type);
Expand Down Expand Up @@ -5145,23 +5146,24 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
Alignment = getContext().getTypeUnadjustedAlign(Ty);
Alignment = Alignment < 128 ? 64 : 128;
} else {
Alignment = getContext().getTypeAlign(Ty);
Alignment = std::max(getContext().getTypeAlign(Ty),
(unsigned)getTarget().getPointerWidth(0));
}
Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes
Size = llvm::alignTo(Size, Alignment);

// We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
// For aggregates with 16-byte alignment, we use i128.
if (Alignment < 128 && Size == 128) {
llvm::Type *BaseTy = llvm::Type::getInt64Ty(getVMContext());
return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64));
}
return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
llvm::Type *BaseTy = llvm::Type::getIntNTy(getVMContext(), Alignment);
return ABIArgInfo::getDirect(
Size == Alignment ? BaseTy
: llvm::ArrayType::get(BaseTy, Size / Alignment));
}

return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
}

ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const {
ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy,
bool IsVariadic) const {
if (RetTy->isVoidType())
return ABIArgInfo::getIgnore();

Expand All @@ -5185,7 +5187,9 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const {

const Type *Base = nullptr;
uint64_t Members = 0;
if (isHomogeneousAggregate(RetTy, Base, Members))
if (isHomogeneousAggregate(RetTy, Base, Members) &&
!(getTarget().getTriple().getArchName().startswith("arm64_32") &&
IsVariadic))
// Homogeneous Floating-point Aggregates (HFAs) are returned directly.
return ABIArgInfo::getDirect();

Expand Down Expand Up @@ -5220,6 +5224,12 @@ bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const {
// NumElements should be power of 2.
if (!llvm::isPowerOf2_32(NumElements))
return true;

// arm64_32 has to be compatible with the ARM logic here, which allows huge
// vectors for some reason.
if (getTarget().getTriple().getArchName() == "arm64_32")
return Size <= 32;

return Size != 64 && (Size != 128 || NumElements == 1);
}
return false;
Expand Down Expand Up @@ -5520,7 +5530,8 @@ Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty,
if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty))
return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect());

CharUnits SlotSize = CharUnits::fromQuantity(8);
uint64_t PointerSize = getTarget().getPointerWidth(0) / 8;
CharUnits SlotSize = CharUnits::fromQuantity(PointerSize);

// Empty records are ignored for parameter passing purposes.
if (isEmptyRecord(getContext(), Ty, true)) {
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/Driver/ToolChain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,10 @@ std::string ToolChain::ComputeLLVMTriple(const ArgList &Args,
if (!Triple.isOSBinFormatMachO())
return getTripleString();

StringRef Arch = Triple.getArchName();
if (Arch == "arm64_32")
return Triple.getTriple();

// FIXME: older versions of ld64 expect the "arm64" component in the actual
// triple string and query it to determine whether an LTO file can be
// handled. Remove this when we don't care any more.
Expand Down
8 changes: 5 additions & 3 deletions clang/lib/Driver/ToolChains/Darwin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ llvm::Triple::ArchType darwin::getArchTypeForMachOArchName(StringRef Str) {
.Cases("arm", "armv4t", "armv5", "armv6", "armv6m", llvm::Triple::arm)
.Cases("armv7", "armv7em", "armv7k", "armv7m", llvm::Triple::arm)
.Cases("armv7s", "xscale", llvm::Triple::arm)
.Case("arm64", llvm::Triple::aarch64)
.Cases("arm64", "arm64_32", llvm::Triple::aarch64)
.Case("r600", llvm::Triple::r600)
.Case("amdgcn", llvm::Triple::amdgcn)
.Case("nvptx", llvm::Triple::nvptx)
Expand All @@ -70,7 +70,7 @@ void darwin::setTripleTypeForMachOArchName(llvm::Triple &T, StringRef Str) {
llvm::ARM::ArchKind ArchKind = llvm::ARM::parseArch(Str);
T.setArch(Arch);

if (Str == "x86_64h")
if (Str == "x86_64h" || Str == "arm64_32")
T.setArchName(Str);
else if (ArchKind == llvm::ARM::ArchKind::ARMV6M ||
ArchKind == llvm::ARM::ArchKind::ARMV7M ||
Expand Down Expand Up @@ -780,6 +780,8 @@ StringRef MachO::getMachOArchName(const ArgList &Args) const {
return getDefaultUniversalArchName();

case llvm::Triple::aarch64:
if (getTriple().getArchName().endswith("_32"))
return "arm64_32";
return "arm64";

case llvm::Triple::thumb:
Expand Down Expand Up @@ -1530,7 +1532,7 @@ inferDeploymentTargetFromArch(DerivedArgList &Args, const Darwin &Toolchain,
if (MachOArchName == "armv7" || MachOArchName == "armv7s" ||
MachOArchName == "arm64")
OSTy = llvm::Triple::IOS;
else if (MachOArchName == "armv7k")
else if (MachOArchName == "armv7k" || MachOArchName == "arm64_32")
OSTy = llvm::Triple::WatchOS;
else if (MachOArchName != "armv6m" && MachOArchName != "armv7m" &&
MachOArchName != "armv7em")
Expand Down
117 changes: 117 additions & 0 deletions clang/test/CodeGen/arm64_32-vaarg.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
// RUN: %clang_cc1 -triple arm64_32-apple-ios7.0 -target-abi darwinpcs -emit-llvm -o - -O1 -ffreestanding %s | FileCheck %s

#include <stdarg.h>

typedef struct {
int a;
} OneInt;

// No realignment should be needed here: slot size is 4 bytes.
int test_int(OneInt input, va_list *mylist) {
// CHECK-LABEL: define i32 @test_int(i32 %input
// CHECK: [[START:%.*]] = load i8*, i8** %mylist
// CHECK: [[NEXT:%.*]] = getelementptr inbounds i8, i8* [[START]], i32 4
// CHECK: store i8* [[NEXT]], i8** %mylist

// CHECK: [[ADDR_I32:%.*]] = bitcast i8* [[START]] to i32*
// CHECK: [[RES:%.*]] = load i32, i32* [[ADDR_I32]]
// CHECK: ret i32 [[RES]]

return va_arg(*mylist, OneInt).a;
}


typedef struct {
long long a;
} OneLongLong;

// Minimum slot size is 4 bytes, so address needs rounding up to multiple of 8.
long long test_longlong(OneLongLong input, va_list *mylist) {
// CHECK-LABEL: define i64 @test_longlong(i64 %input
// CHECK: [[STARTPTR:%.*]] = bitcast i8** %mylist to i32*
// CHECK: [[START:%.*]] = load i32, i32* [[STARTPTR]]

// CHECK: [[ALIGN_TMP:%.*]] = add i32 [[START]], 7
// CHECK: [[ALIGNED:%.*]] = and i32 [[ALIGN_TMP]], -8
// CHECK: [[ALIGNED_ADDR:%.*]] = inttoptr i32 [[ALIGNED]] to i8*
// CHECK: [[NEXT:%.*]] = getelementptr inbounds i8, i8* [[ALIGNED_ADDR]], i32 8
// CHECK: store i8* [[NEXT]], i8** %mylist

// CHECK: [[ADDR_STRUCT:%.*]] = inttoptr i32 [[ALIGNED]] to %struct.OneLongLong*
// CHECK: [[ADDR_I64:%.*]] = getelementptr inbounds %struct.OneLongLong, %struct.OneLongLong* [[ADDR_STRUCT]], i32 0, i32 0
// CHECK: [[RES:%.*]] = load i64, i64* [[ADDR_I64]]
// CHECK: ret i64 [[RES]]

return va_arg(*mylist, OneLongLong).a;
}


typedef struct {
float arr[4];
} HFA;

// HFAs take priority over passing large structs indirectly.
float test_hfa(va_list *mylist) {
// CHECK-LABEL: define float @test_hfa
// CHECK: [[START:%.*]] = load i8*, i8** %mylist

// CHECK: [[NEXT:%.*]] = getelementptr inbounds i8, i8* [[START]], i32 16
// CHECK: store i8* [[NEXT]], i8** %mylist

// CHECK: [[ADDR_FLOAT:%.*]] = bitcast i8* [[START]] to float*
// CHECK: [[RES:%.*]] = load float, float* [[ADDR_FLOAT]]
// CHECK: ret float [[RES]]

return va_arg(*mylist, HFA).arr[0];
}

// armv7k does not return HFAs normally for variadic functions, so we must match
// that.
HFA test_hfa_return(int n, ...) {
// CHECK-LABEL: define [2 x i64] @test_hfa_return
HFA h = {0};
return h;
}

typedef struct {
long long a, b;
char c;
} BigStruct;

// Structs bigger than 16 bytes are passed indirectly: a pointer is placed on
// the stack.
long long test_bigstruct(BigStruct input, va_list *mylist) {
// CHECK-LABEL: define i64 @test_bigstruct(%struct.BigStruct*
// CHECK: [[START:%.*]] = load i8*, i8** %mylist
// CHECK: [[NEXT:%.*]] = getelementptr inbounds i8, i8* [[START]], i32 4
// CHECK: store i8* [[NEXT]], i8** %mylist

// CHECK: [[INT_PTR:%.*]] = bitcast i8* [[START]] to %struct.BigStruct**
// CHECK: [[ADDR:%.*]] = load %struct.BigStruct*, %struct.BigStruct** [[INT_PTR]]
// CHECK: [[ADDR_I64:%.*]] = getelementptr inbounds %struct.BigStruct, %struct.BigStruct* [[ADDR]], i32 0, i32 0
// CHECK: [[RES:%.*]] = load i64, i64* [[ADDR_I64]]
// CHECK: ret i64 [[RES]]

return va_arg(*mylist, BigStruct).a;
}

typedef struct {
short arr[3];
} ThreeShorts;

// Slot sizes are 4-bytes on arm64_32, so structs with less than 32-bit
// alignment must be passed via "[N x i32]" to be correctly allocated in the
// backend.
short test_threeshorts(ThreeShorts input, va_list *mylist) {
// CHECK-LABEL: define signext i16 @test_threeshorts([2 x i32] %input

// CHECK: [[START:%.*]] = load i8*, i8** %mylist
// CHECK: [[NEXT:%.*]] = getelementptr inbounds i8, i8* [[START]], i32 8
// CHECK: store i8* [[NEXT]], i8** %mylist

// CHECK: [[ADDR_I32:%.*]] = bitcast i8* [[START]] to i16*
// CHECK: [[RES:%.*]] = load i16, i16* [[ADDR_I32]]
// CHECK: ret i16 [[RES]]

return va_arg(*mylist, ThreeShorts).arr[0];
}
30 changes: 30 additions & 0 deletions clang/test/CodeGen/arm64_32.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// RUN: %clang_cc1 -triple arm64_32-apple-ios7.0 -emit-llvm -o - %s | FileCheck %s

struct Foo {
char a;
int b : 1;
};

int BitfieldOffset = sizeof(struct Foo);
// CHECK: @BitfieldOffset = global i32 2

int PointerSize = sizeof(void *);
// CHECK: @PointerSize = global i32 4

int PointerAlign = __alignof(void *);
// CHECK: @PointerAlign = global i32 4

int LongSize = sizeof(long);
// CHECK: @LongSize = global i32 4

int LongAlign = __alignof(long);
// CHECK: @LongAlign = global i32 4

// Not expected to change, but it's a difference between AAPCS and DarwinPCS
// that we need to be preserved for compatibility with ARMv7k.
long double LongDoubleVar = 0.0;
// CHECK: @LongDoubleVar = global double

typedef float __attribute__((ext_vector_type(16))) v16f32;
v16f32 func(v16f32 in) { return in; }
// CHECK: define void @func(<16 x float>* noalias sret {{%.*}}, <16 x float> {{%.*}})
5 changes: 4 additions & 1 deletion clang/test/CodeGen/builtins-arm64.c
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
// RUN: %clang_cc1 -triple arm64-unknown-linux -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LINUX
// RUN: %clang_cc1 -triple aarch64-windows -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-WIN
// RUN: %clang_cc1 -triple arm64_32-apple-ios -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
#include <stdint.h>

void f0(void *a, void *b) {
__clear_cache(a,b);
// CHECK: call {{.*}} @__clear_cache
}

#if __LP64__
void *tp (void) {
return __builtin_thread_pointer ();
// CHECK: call {{.*}} @llvm.thread.pointer()
// CHECK-LINUX: call {{.*}} @llvm.thread.pointer()
}
#endif

// CHECK: call {{.*}} @llvm.bitreverse.i32(i32 %a)
unsigned rbit(unsigned a) {
Expand Down
Loading