Skip to content

[WASM] Constant fold SIMD wasm intrinsics: any/alltrue #148074

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from

Conversation

badumbatish
Copy link
Contributor

Constant fold SIMD wasm intrinsics: any/alltrue

Added test in llvm/test/CodeGen/WebAssembly/const_fold_simd_intrinsics.ll

@llvmbot llvmbot added backend:WebAssembly llvm:analysis Includes value tracking, cost tables and constant folding labels Jul 10, 2025
@llvmbot
Copy link
Member

llvmbot commented Jul 10, 2025

@llvm/pr-subscribers-backend-webassembly

Author: jjasmine (badumbatish)

Changes

Constant fold SIMD wasm intrinsics: any/alltrue

Added test in llvm/test/CodeGen/WebAssembly/const_fold_simd_intrinsics.ll


Full diff: https://github.com/llvm/llvm-project/pull/148074.diff

2 Files Affected:

  • (modified) llvm/lib/Analysis/ConstantFolding.cpp (+12-1)
  • (added) llvm/test/CodeGen/WebAssembly/const_fold_simd_intrinsics.ll (+127)
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 6e469c034d9c8..ddd3f137ad84d 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1655,6 +1655,8 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
   case Intrinsic::arm_mve_vctp32:
   case Intrinsic::arm_mve_vctp64:
   case Intrinsic::aarch64_sve_convert_from_svbool:
+  case Intrinsic::wasm_alltrue:
+  case Intrinsic::wasm_anytrue:
   // WebAssembly float semantics are always known
   case Intrinsic::wasm_trunc_signed:
   case Intrinsic::wasm_trunc_unsigned:
@@ -2832,7 +2834,8 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
 
   // Support ConstantVector in case we have an Undef in the top.
   if (isa<ConstantVector>(Operands[0]) ||
-      isa<ConstantDataVector>(Operands[0])) {
+      isa<ConstantDataVector>(Operands[0]) ||
+      isa<ConstantAggregateZero>(Operands[0])) {
     auto *Op = cast<Constant>(Operands[0]);
     switch (IntrinsicID) {
     default: break;
@@ -2856,6 +2859,14 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
                                            /*roundTowardZero=*/true, Ty,
                                            /*IsSigned*/true);
       break;
+
+    case Intrinsic::wasm_anytrue:
+      return Op->isZeroValue() ? ConstantInt::get(Ty, 1)
+                               : ConstantInt::get(Ty, 0);
+
+    case Intrinsic::wasm_alltrue:
+      return Op->isAllOnesValue() ? ConstantInt::get(Ty, 1)
+                                  : ConstantInt::get(Ty, 0);
     }
   }
 
diff --git a/llvm/test/CodeGen/WebAssembly/const_fold_simd_intrinsics.ll b/llvm/test/CodeGen/WebAssembly/const_fold_simd_intrinsics.ll
new file mode 100644
index 0000000000000..4d29b82f64d0d
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/const_fold_simd_intrinsics.ll
@@ -0,0 +1,127 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+
+; RUN: opt -passes=instcombine -S < %s | FileCheck %s
+
+; Test that intrinsics wasm call are constant folded
+
+; all_one: a splat that is all one
+; not_all_one: a splat that is all one, except for 0 in the first location
+
+; all_zero: a splat that is all zero
+; not_all_zero: a splat that is all zero, except for 1 in the first location
+
+target triple = "wasm32-unknown-unknown"
+
+define void @all_true_splat_not_all_one(ptr %ptr) {
+; CHECK-LABEL: define void @all_true_splat_not_all_one(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    ret void
+;
+  %a = tail call i32 @llvm.wasm.alltrue(<16 x i8> <i8 0, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+  store volatile i32 %a, ptr %ptr
+
+  %b = tail call i32 @llvm.wasm.alltrue(<8 x i16> <i16 0, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+  store volatile i32 %b, ptr %ptr
+
+  %c = tail call i32 @llvm.wasm.alltrue(<4 x i32> <i32 0, i32 1, i32 1, i32 1>)
+  store volatile i32 %c, ptr %ptr
+
+  %d = tail call i32 @llvm.wasm.alltrue(<2 x i64> <i64 0, i64 1>)
+  store volatile i32 %d, ptr %ptr
+
+  %e = tail call i32 @llvm.wasm.alltrue(<4 x i64> <i64 0, i64 1, i64 1, i64 1>)
+  store volatile i32 %e, ptr %ptr
+
+  ret void
+}
+
+define void @all_true_splat_one(ptr %ptr) {
+; CHECK-LABEL: define void @all_true_splat_one(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    ret void
+;
+  %a = tail call i32 @llvm.wasm.alltrue(<16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+  store volatile i32 %a, ptr %ptr
+
+  %b = tail call i32 @llvm.wasm.alltrue(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+  store volatile i32 %b, ptr %ptr
+
+  %c = tail call i32 @llvm.wasm.alltrue(<4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+  store volatile i32 %c, ptr %ptr
+
+  %d = tail call i32 @llvm.wasm.alltrue(<2 x i64> <i64 1, i64 1>)
+  store volatile i32 %d, ptr %ptr
+
+  %e = tail call i32 @llvm.wasm.alltrue(<4 x i64> <i64 1, i64 1, i64 1, i64 1>)
+  store volatile i32 %e, ptr %ptr
+
+  ret void
+}
+
+
+define void @any_true_splat_zero(ptr %ptr) {
+; CHECK-LABEL: define void @any_true_splat_zero(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    store volatile i32 1, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 1, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 1, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 1, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 1, ptr [[PTR]], align 4
+; CHECK-NEXT:    ret void
+;
+  %a = tail call i32 @llvm.wasm.anytrue(<16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
+  store volatile i32 %a, ptr %ptr
+
+  %b = tail call i32 @llvm.wasm.anytrue(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+  store volatile i32 %b, ptr %ptr
+
+  %c = tail call i32 @llvm.wasm.anytrue(<4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+  store volatile i32 %c, ptr %ptr
+
+  %d = tail call i32 @llvm.wasm.anytrue(<2 x i64> <i64 0, i64 0>)
+  store volatile i32 %d, ptr %ptr
+
+  %e = tail call i32 @llvm.wasm.anytrue(<4 x i64> <i64 0, i64 0, i64 0, i64 0>)
+  store volatile i32 %e, ptr %ptr
+
+  ret void
+}
+
+
+define void @any_true_splat_not_all_zero(ptr %ptr) {
+; CHECK-LABEL: define void @any_true_splat_not_all_zero(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    ret void
+;
+  %a = tail call i32 @llvm.wasm.anytrue(<16 x i8> <i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
+  store volatile i32 %a, ptr %ptr
+
+  %b = tail call i32 @llvm.wasm.anytrue(<8 x i16> <i16 1, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+  store volatile i32 %b, ptr %ptr
+
+  %c = tail call i32 @llvm.wasm.anytrue(<4 x i32> <i32 1, i32 0, i32 0, i32 0>)
+  store volatile i32 %c, ptr %ptr
+
+  %d = tail call i32 @llvm.wasm.anytrue(<2 x i64> <i64 1, i64 0>)
+  store volatile i32 %d, ptr %ptr
+
+  %e = tail call i32 @llvm.wasm.anytrue(<4 x i64> <i64 1, i64 0, i64 0, i64 0>)
+  store volatile i32 %e, ptr %ptr
+
+  ret void
+}

@llvmbot
Copy link
Member

llvmbot commented Jul 10, 2025

@llvm/pr-subscribers-llvm-analysis

Author: jjasmine (badumbatish)

Changes

Constant fold SIMD wasm intrinsics: any/alltrue

Added test in llvm/test/CodeGen/WebAssembly/const_fold_simd_intrinsics.ll


Full diff: https://github.com/llvm/llvm-project/pull/148074.diff

2 Files Affected:

  • (modified) llvm/lib/Analysis/ConstantFolding.cpp (+12-1)
  • (added) llvm/test/CodeGen/WebAssembly/const_fold_simd_intrinsics.ll (+127)
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 6e469c034d9c8..ddd3f137ad84d 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1655,6 +1655,8 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
   case Intrinsic::arm_mve_vctp32:
   case Intrinsic::arm_mve_vctp64:
   case Intrinsic::aarch64_sve_convert_from_svbool:
+  case Intrinsic::wasm_alltrue:
+  case Intrinsic::wasm_anytrue:
   // WebAssembly float semantics are always known
   case Intrinsic::wasm_trunc_signed:
   case Intrinsic::wasm_trunc_unsigned:
@@ -2832,7 +2834,8 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
 
   // Support ConstantVector in case we have an Undef in the top.
   if (isa<ConstantVector>(Operands[0]) ||
-      isa<ConstantDataVector>(Operands[0])) {
+      isa<ConstantDataVector>(Operands[0]) ||
+      isa<ConstantAggregateZero>(Operands[0])) {
     auto *Op = cast<Constant>(Operands[0]);
     switch (IntrinsicID) {
     default: break;
@@ -2856,6 +2859,14 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
                                            /*roundTowardZero=*/true, Ty,
                                            /*IsSigned*/true);
       break;
+
+    case Intrinsic::wasm_anytrue:
+      return Op->isZeroValue() ? ConstantInt::get(Ty, 1)
+                               : ConstantInt::get(Ty, 0);
+
+    case Intrinsic::wasm_alltrue:
+      return Op->isAllOnesValue() ? ConstantInt::get(Ty, 1)
+                                  : ConstantInt::get(Ty, 0);
     }
   }
 
diff --git a/llvm/test/CodeGen/WebAssembly/const_fold_simd_intrinsics.ll b/llvm/test/CodeGen/WebAssembly/const_fold_simd_intrinsics.ll
new file mode 100644
index 0000000000000..4d29b82f64d0d
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/const_fold_simd_intrinsics.ll
@@ -0,0 +1,127 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+
+; RUN: opt -passes=instcombine -S < %s | FileCheck %s
+
+; Test that intrinsics wasm call are constant folded
+
+; all_one: a splat that is all one
+; not_all_one: a splat that is all one, except for 0 in the first location
+
+; all_zero: a splat that is all zero
+; not_all_zero: a splat that is all zero, except for 1 in the first location
+
+target triple = "wasm32-unknown-unknown"
+
+define void @all_true_splat_not_all_one(ptr %ptr) {
+; CHECK-LABEL: define void @all_true_splat_not_all_one(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    ret void
+;
+  %a = tail call i32 @llvm.wasm.alltrue(<16 x i8> <i8 0, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+  store volatile i32 %a, ptr %ptr
+
+  %b = tail call i32 @llvm.wasm.alltrue(<8 x i16> <i16 0, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+  store volatile i32 %b, ptr %ptr
+
+  %c = tail call i32 @llvm.wasm.alltrue(<4 x i32> <i32 0, i32 1, i32 1, i32 1>)
+  store volatile i32 %c, ptr %ptr
+
+  %d = tail call i32 @llvm.wasm.alltrue(<2 x i64> <i64 0, i64 1>)
+  store volatile i32 %d, ptr %ptr
+
+  %e = tail call i32 @llvm.wasm.alltrue(<4 x i64> <i64 0, i64 1, i64 1, i64 1>)
+  store volatile i32 %e, ptr %ptr
+
+  ret void
+}
+
+define void @all_true_splat_one(ptr %ptr) {
+; CHECK-LABEL: define void @all_true_splat_one(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    ret void
+;
+  %a = tail call i32 @llvm.wasm.alltrue(<16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+  store volatile i32 %a, ptr %ptr
+
+  %b = tail call i32 @llvm.wasm.alltrue(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+  store volatile i32 %b, ptr %ptr
+
+  %c = tail call i32 @llvm.wasm.alltrue(<4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+  store volatile i32 %c, ptr %ptr
+
+  %d = tail call i32 @llvm.wasm.alltrue(<2 x i64> <i64 1, i64 1>)
+  store volatile i32 %d, ptr %ptr
+
+  %e = tail call i32 @llvm.wasm.alltrue(<4 x i64> <i64 1, i64 1, i64 1, i64 1>)
+  store volatile i32 %e, ptr %ptr
+
+  ret void
+}
+
+
+define void @any_true_splat_zero(ptr %ptr) {
+; CHECK-LABEL: define void @any_true_splat_zero(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    store volatile i32 1, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 1, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 1, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 1, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 1, ptr [[PTR]], align 4
+; CHECK-NEXT:    ret void
+;
+  %a = tail call i32 @llvm.wasm.anytrue(<16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
+  store volatile i32 %a, ptr %ptr
+
+  %b = tail call i32 @llvm.wasm.anytrue(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+  store volatile i32 %b, ptr %ptr
+
+  %c = tail call i32 @llvm.wasm.anytrue(<4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+  store volatile i32 %c, ptr %ptr
+
+  %d = tail call i32 @llvm.wasm.anytrue(<2 x i64> <i64 0, i64 0>)
+  store volatile i32 %d, ptr %ptr
+
+  %e = tail call i32 @llvm.wasm.anytrue(<4 x i64> <i64 0, i64 0, i64 0, i64 0>)
+  store volatile i32 %e, ptr %ptr
+
+  ret void
+}
+
+
+define void @any_true_splat_not_all_zero(ptr %ptr) {
+; CHECK-LABEL: define void @any_true_splat_not_all_zero(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    store volatile i32 0, ptr [[PTR]], align 4
+; CHECK-NEXT:    ret void
+;
+  %a = tail call i32 @llvm.wasm.anytrue(<16 x i8> <i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
+  store volatile i32 %a, ptr %ptr
+
+  %b = tail call i32 @llvm.wasm.anytrue(<8 x i16> <i16 1, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+  store volatile i32 %b, ptr %ptr
+
+  %c = tail call i32 @llvm.wasm.anytrue(<4 x i32> <i32 1, i32 0, i32 0, i32 0>)
+  store volatile i32 %c, ptr %ptr
+
+  %d = tail call i32 @llvm.wasm.anytrue(<2 x i64> <i64 1, i64 0>)
+  store volatile i32 %d, ptr %ptr
+
+  %e = tail call i32 @llvm.wasm.anytrue(<4 x i64> <i64 1, i64 0, i64 0, i64 0>)
+  store volatile i32 %e, ptr %ptr
+
+  ret void
+}

@badumbatish
Copy link
Contributor Author

test not behaving as expected, i'll redo before pinging

Copy link

github-actions bot commented Jul 11, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@badumbatish
Copy link
Contributor Author

@dschuff @sparker-arm hi there, can I have some reviews @ the PR? tysm

@@ -0,0 +1,127 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5

; RUN: opt -passes=instcombine -S < %s | FileCheck %s
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test/CodeGen directory is for tests that run llc, because this is actually in the middle end and running opt this should be in llvm/test/Transforms/InstSimplify/ConstProp/WebAssembly/

I think we also only need to run instsimplify for this since we're not creating any new instructions:

Suggested change
; RUN: opt -passes=instcombine -S < %s | FileCheck %s
; RUN: opt -passes=instsimplify -S < %s | FileCheck %s

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:WebAssembly llvm:analysis Includes value tracking, cost tables and constant folding
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants