-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[WASM] Constant fold SIMD wasm intrinsics: any/alltrue #148074
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-webassembly Author: jjasmine (badumbatish) ChangesConstant fold SIMD wasm intrinsics: any/alltrue Added test in Full diff: https://github.com/llvm/llvm-project/pull/148074.diff 2 Files Affected:
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 6e469c034d9c8..ddd3f137ad84d 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1655,6 +1655,8 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::arm_mve_vctp32:
case Intrinsic::arm_mve_vctp64:
case Intrinsic::aarch64_sve_convert_from_svbool:
+ case Intrinsic::wasm_alltrue:
+ case Intrinsic::wasm_anytrue:
// WebAssembly float semantics are always known
case Intrinsic::wasm_trunc_signed:
case Intrinsic::wasm_trunc_unsigned:
@@ -2832,7 +2834,8 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
// Support ConstantVector in case we have an Undef in the top.
if (isa<ConstantVector>(Operands[0]) ||
- isa<ConstantDataVector>(Operands[0])) {
+ isa<ConstantDataVector>(Operands[0]) ||
+ isa<ConstantAggregateZero>(Operands[0])) {
auto *Op = cast<Constant>(Operands[0]);
switch (IntrinsicID) {
default: break;
@@ -2856,6 +2859,14 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
/*roundTowardZero=*/true, Ty,
/*IsSigned*/true);
break;
+
+ case Intrinsic::wasm_anytrue:
+ return Op->isZeroValue() ? ConstantInt::get(Ty, 1)
+ : ConstantInt::get(Ty, 0);
+
+ case Intrinsic::wasm_alltrue:
+ return Op->isAllOnesValue() ? ConstantInt::get(Ty, 1)
+ : ConstantInt::get(Ty, 0);
}
}
diff --git a/llvm/test/CodeGen/WebAssembly/const_fold_simd_intrinsics.ll b/llvm/test/CodeGen/WebAssembly/const_fold_simd_intrinsics.ll
new file mode 100644
index 0000000000000..4d29b82f64d0d
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/const_fold_simd_intrinsics.ll
@@ -0,0 +1,127 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+
+; RUN: opt -passes=instcombine -S < %s | FileCheck %s
+
+; Test that intrinsics wasm call are constant folded
+
+; all_one: a splat that is all one
+; not_all_one: a splat that is all one, except for 0 in the first location
+
+; all_zero: a splat that is all zero
+; not_all_zero: a splat that is all zero, except for 1 in the first location
+
+target triple = "wasm32-unknown-unknown"
+
+define void @all_true_splat_not_all_one(ptr %ptr) {
+; CHECK-LABEL: define void @all_true_splat_not_all_one(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
+ %a = tail call i32 @llvm.wasm.alltrue(<16 x i8> <i8 0, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+ store volatile i32 %a, ptr %ptr
+
+ %b = tail call i32 @llvm.wasm.alltrue(<8 x i16> <i16 0, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ store volatile i32 %b, ptr %ptr
+
+ %c = tail call i32 @llvm.wasm.alltrue(<4 x i32> <i32 0, i32 1, i32 1, i32 1>)
+ store volatile i32 %c, ptr %ptr
+
+ %d = tail call i32 @llvm.wasm.alltrue(<2 x i64> <i64 0, i64 1>)
+ store volatile i32 %d, ptr %ptr
+
+ %e = tail call i32 @llvm.wasm.alltrue(<4 x i64> <i64 0, i64 1, i64 1, i64 1>)
+ store volatile i32 %e, ptr %ptr
+
+ ret void
+}
+
+define void @all_true_splat_one(ptr %ptr) {
+; CHECK-LABEL: define void @all_true_splat_one(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
+ %a = tail call i32 @llvm.wasm.alltrue(<16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+ store volatile i32 %a, ptr %ptr
+
+ %b = tail call i32 @llvm.wasm.alltrue(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ store volatile i32 %b, ptr %ptr
+
+ %c = tail call i32 @llvm.wasm.alltrue(<4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+ store volatile i32 %c, ptr %ptr
+
+ %d = tail call i32 @llvm.wasm.alltrue(<2 x i64> <i64 1, i64 1>)
+ store volatile i32 %d, ptr %ptr
+
+ %e = tail call i32 @llvm.wasm.alltrue(<4 x i64> <i64 1, i64 1, i64 1, i64 1>)
+ store volatile i32 %e, ptr %ptr
+
+ ret void
+}
+
+
+define void @any_true_splat_zero(ptr %ptr) {
+; CHECK-LABEL: define void @any_true_splat_zero(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: store volatile i32 1, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 1, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 1, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 1, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 1, ptr [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
+ %a = tail call i32 @llvm.wasm.anytrue(<16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
+ store volatile i32 %a, ptr %ptr
+
+ %b = tail call i32 @llvm.wasm.anytrue(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+ store volatile i32 %b, ptr %ptr
+
+ %c = tail call i32 @llvm.wasm.anytrue(<4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ store volatile i32 %c, ptr %ptr
+
+ %d = tail call i32 @llvm.wasm.anytrue(<2 x i64> <i64 0, i64 0>)
+ store volatile i32 %d, ptr %ptr
+
+ %e = tail call i32 @llvm.wasm.anytrue(<4 x i64> <i64 0, i64 0, i64 0, i64 0>)
+ store volatile i32 %e, ptr %ptr
+
+ ret void
+}
+
+
+define void @any_true_splat_not_all_zero(ptr %ptr) {
+; CHECK-LABEL: define void @any_true_splat_not_all_zero(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
+ %a = tail call i32 @llvm.wasm.anytrue(<16 x i8> <i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
+ store volatile i32 %a, ptr %ptr
+
+ %b = tail call i32 @llvm.wasm.anytrue(<8 x i16> <i16 1, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+ store volatile i32 %b, ptr %ptr
+
+ %c = tail call i32 @llvm.wasm.anytrue(<4 x i32> <i32 1, i32 0, i32 0, i32 0>)
+ store volatile i32 %c, ptr %ptr
+
+ %d = tail call i32 @llvm.wasm.anytrue(<2 x i64> <i64 1, i64 0>)
+ store volatile i32 %d, ptr %ptr
+
+ %e = tail call i32 @llvm.wasm.anytrue(<4 x i64> <i64 1, i64 0, i64 0, i64 0>)
+ store volatile i32 %e, ptr %ptr
+
+ ret void
+}
|
@llvm/pr-subscribers-llvm-analysis Author: jjasmine (badumbatish) ChangesConstant fold SIMD wasm intrinsics: any/alltrue Added test in Full diff: https://github.com/llvm/llvm-project/pull/148074.diff 2 Files Affected:
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 6e469c034d9c8..ddd3f137ad84d 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1655,6 +1655,8 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::arm_mve_vctp32:
case Intrinsic::arm_mve_vctp64:
case Intrinsic::aarch64_sve_convert_from_svbool:
+ case Intrinsic::wasm_alltrue:
+ case Intrinsic::wasm_anytrue:
// WebAssembly float semantics are always known
case Intrinsic::wasm_trunc_signed:
case Intrinsic::wasm_trunc_unsigned:
@@ -2832,7 +2834,8 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
// Support ConstantVector in case we have an Undef in the top.
if (isa<ConstantVector>(Operands[0]) ||
- isa<ConstantDataVector>(Operands[0])) {
+ isa<ConstantDataVector>(Operands[0]) ||
+ isa<ConstantAggregateZero>(Operands[0])) {
auto *Op = cast<Constant>(Operands[0]);
switch (IntrinsicID) {
default: break;
@@ -2856,6 +2859,14 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
/*roundTowardZero=*/true, Ty,
/*IsSigned*/true);
break;
+
+ case Intrinsic::wasm_anytrue:
+ return Op->isZeroValue() ? ConstantInt::get(Ty, 1)
+ : ConstantInt::get(Ty, 0);
+
+ case Intrinsic::wasm_alltrue:
+ return Op->isAllOnesValue() ? ConstantInt::get(Ty, 1)
+ : ConstantInt::get(Ty, 0);
}
}
diff --git a/llvm/test/CodeGen/WebAssembly/const_fold_simd_intrinsics.ll b/llvm/test/CodeGen/WebAssembly/const_fold_simd_intrinsics.ll
new file mode 100644
index 0000000000000..4d29b82f64d0d
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/const_fold_simd_intrinsics.ll
@@ -0,0 +1,127 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+
+; RUN: opt -passes=instcombine -S < %s | FileCheck %s
+
+; Test that intrinsics wasm call are constant folded
+
+; all_one: a splat that is all one
+; not_all_one: a splat that is all one, except for 0 in the first location
+
+; all_zero: a splat that is all zero
+; not_all_zero: a splat that is all zero, except for 1 in the first location
+
+target triple = "wasm32-unknown-unknown"
+
+define void @all_true_splat_not_all_one(ptr %ptr) {
+; CHECK-LABEL: define void @all_true_splat_not_all_one(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
+ %a = tail call i32 @llvm.wasm.alltrue(<16 x i8> <i8 0, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+ store volatile i32 %a, ptr %ptr
+
+ %b = tail call i32 @llvm.wasm.alltrue(<8 x i16> <i16 0, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ store volatile i32 %b, ptr %ptr
+
+ %c = tail call i32 @llvm.wasm.alltrue(<4 x i32> <i32 0, i32 1, i32 1, i32 1>)
+ store volatile i32 %c, ptr %ptr
+
+ %d = tail call i32 @llvm.wasm.alltrue(<2 x i64> <i64 0, i64 1>)
+ store volatile i32 %d, ptr %ptr
+
+ %e = tail call i32 @llvm.wasm.alltrue(<4 x i64> <i64 0, i64 1, i64 1, i64 1>)
+ store volatile i32 %e, ptr %ptr
+
+ ret void
+}
+
+define void @all_true_splat_one(ptr %ptr) {
+; CHECK-LABEL: define void @all_true_splat_one(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
+ %a = tail call i32 @llvm.wasm.alltrue(<16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+ store volatile i32 %a, ptr %ptr
+
+ %b = tail call i32 @llvm.wasm.alltrue(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ store volatile i32 %b, ptr %ptr
+
+ %c = tail call i32 @llvm.wasm.alltrue(<4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+ store volatile i32 %c, ptr %ptr
+
+ %d = tail call i32 @llvm.wasm.alltrue(<2 x i64> <i64 1, i64 1>)
+ store volatile i32 %d, ptr %ptr
+
+ %e = tail call i32 @llvm.wasm.alltrue(<4 x i64> <i64 1, i64 1, i64 1, i64 1>)
+ store volatile i32 %e, ptr %ptr
+
+ ret void
+}
+
+
+define void @any_true_splat_zero(ptr %ptr) {
+; CHECK-LABEL: define void @any_true_splat_zero(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: store volatile i32 1, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 1, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 1, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 1, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 1, ptr [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
+ %a = tail call i32 @llvm.wasm.anytrue(<16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
+ store volatile i32 %a, ptr %ptr
+
+ %b = tail call i32 @llvm.wasm.anytrue(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+ store volatile i32 %b, ptr %ptr
+
+ %c = tail call i32 @llvm.wasm.anytrue(<4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ store volatile i32 %c, ptr %ptr
+
+ %d = tail call i32 @llvm.wasm.anytrue(<2 x i64> <i64 0, i64 0>)
+ store volatile i32 %d, ptr %ptr
+
+ %e = tail call i32 @llvm.wasm.anytrue(<4 x i64> <i64 0, i64 0, i64 0, i64 0>)
+ store volatile i32 %e, ptr %ptr
+
+ ret void
+}
+
+
+define void @any_true_splat_not_all_zero(ptr %ptr) {
+; CHECK-LABEL: define void @any_true_splat_not_all_zero(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
+ %a = tail call i32 @llvm.wasm.anytrue(<16 x i8> <i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
+ store volatile i32 %a, ptr %ptr
+
+ %b = tail call i32 @llvm.wasm.anytrue(<8 x i16> <i16 1, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+ store volatile i32 %b, ptr %ptr
+
+ %c = tail call i32 @llvm.wasm.anytrue(<4 x i32> <i32 1, i32 0, i32 0, i32 0>)
+ store volatile i32 %c, ptr %ptr
+
+ %d = tail call i32 @llvm.wasm.anytrue(<2 x i64> <i64 1, i64 0>)
+ store volatile i32 %d, ptr %ptr
+
+ %e = tail call i32 @llvm.wasm.anytrue(<4 x i64> <i64 1, i64 0, i64 0, i64 0>)
+ store volatile i32 %e, ptr %ptr
+
+ ret void
+}
|
test not behaving as expected, i'll redo before pinging |
✅ With the latest revision this PR passed the C/C++ code formatter. |
@dschuff @sparker-arm hi there, can I have some reviews @ the PR? tysm |
@@ -0,0 +1,127 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 | |||
|
|||
; RUN: opt -passes=instcombine -S < %s | FileCheck %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The test/CodeGen directory is for tests that run llc
, because this is actually in the middle end and running opt
this should be in llvm/test/Transforms/InstSimplify/ConstProp/WebAssembly/
I think we also only need to run instsimplify for this since we're not creating any new instructions:
; RUN: opt -passes=instcombine -S < %s | FileCheck %s | |
; RUN: opt -passes=instsimplify -S < %s | FileCheck %s |
Constant fold SIMD wasm intrinsics: any/alltrue
Added test in
llvm/test/CodeGen/WebAssembly/const_fold_simd_intrinsics.ll