[VectorCombine] Added pattern for recognising the construction of packed integers. #147414

zGoldthorpe · 2025-07-07T22:16:04Z

This patch extends the vector combiner to simplify the construction of a packed scalar integer from a vector type, such as:

define i32 @bitcast.v2i(<4 x i8> %v) {
  %v.0 = extractelement <4 x i8> %v, i32 0
  %z.0 = zext i8 %v.0 to i32

  %v.1 = extractelement <4 x i8> %v, i32 1
  %z.1 = zext i8 %v.1 to i32
  %s.1 = shl i32 %z.1, 8
  %x.1 = or i32 %z.0, %s.1

  %v.2 = extractelement <4 x i8> %v, i32 2
  %z.2 = zext i8 %v.2 to i32
  %s.2 = shl i32 %z.2, 16
  %x.2 = or i32 %x.1, %s.2

  %v.3 = extractelement <4 x i8> %v, i32 3
  %z.3 = zext i8 %v.3 to i32
  %s.3 = shl i32 %z.3, 24
  %x.3 = or i32 %x.2, %s.3

  ret i32 %x.3
}

llvmbot · 2025-07-07T22:16:38Z

@llvm/pr-subscribers-vectorizers

@llvm/pr-subscribers-llvm-transforms

Author: None (zGoldthorpe)

Changes

This patch extends the vector combiner to simplify the construction of a packed scalar integer from a vector type, such as:

define i32 @<!-- -->bitcast.v2i(&lt;4 x i8&gt; %v) {
  %v.0 = extractelement &lt;4 x i8&gt; %v, i32 0
  %z.0 = zext i8 %v.0 to i32

  %v.1 = extractelement &lt;4 x i8&gt; %v, i32 1
  %z.1 = zext i8 %v.1 to i32
  %s.1 = shl i32 %z.1, 8
  %x.1 = or i32 %z.0, %s.1

  %v.2 = extractelement &lt;4 x i8&gt; %v, i32 2
  %z.2 = zext i8 %v.2 to i32
  %s.2 = shl i32 %z.2, 16
  %x.2 = or i32 %x.1, %s.2

  %v.3 = extractelement &lt;4 x i8&gt; %v, i32 3
  %z.3 = zext i8 %v.3 to i32
  %s.3 = shl i32 %z.3, 24
  %x.3 = or i32 %x.2, %s.3

  ret i32 %x.3
}

Full diff: https://github.com/llvm/llvm-project/pull/147414.diff

2 Files Affected:

(modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (+125)
(added) llvm/test/Transforms/VectorCombine/packed-integers.ll (+108)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index fe8d74c43dfdc..ce73a383d2555 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -125,6 +126,7 @@ class VectorCombine {
   bool scalarizeLoadExtract(Instruction &I);
   bool scalarizeExtExtract(Instruction &I);
   bool foldConcatOfBoolMasks(Instruction &I);
+  bool foldIntegerPackFromVector(Instruction &I);
   bool foldPermuteOfBinops(Instruction &I);
   bool foldShuffleOfBinops(Instruction &I);
   bool foldShuffleOfSelects(Instruction &I);
@@ -1957,6 +1959,126 @@ bool VectorCombine::foldConcatOfBoolMasks(Instruction &I) {
   return true;
 }
 
+/// Match "shufflevector -> bitcast" or "extractelement -> zext -> shl" patterns
+/// which extract vector elements and pack them in the same relative positions.
+static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec,
+                                          uint64_t &VecOffset,
+                                          SmallBitVector &Mask) {
+  static const auto m_ConstShlOrSelf = [](const auto &Base, uint64_t &ShlAmt) {
+    ShlAmt = 0;
+    return m_CombineOr(m_Shl(Base, m_ConstantInt(ShlAmt)), Base);
+  };
+
+  // First try to match extractelement -> zext -> shl
+  uint64_t VecIdx, ShlAmt;
+  if (match(V, m_ConstShlOrSelf(m_ZExtOrSelf(m_ExtractElt(
+                                    m_Value(Vec), m_ConstantInt(VecIdx))),
+                                ShlAmt))) {
+    auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
+    if (!VecTy)
+      return false;
+    auto *EltTy = dyn_cast<IntegerType>(VecTy->getElementType());
+    if (!EltTy)
+      return false;
+
+    const unsigned EltBitWidth = EltTy->getBitWidth();
+    const unsigned TargetBitWidth = V->getType()->getIntegerBitWidth();
+    if (TargetBitWidth % EltBitWidth != 0 || ShlAmt % EltBitWidth != 0)
+      return false;
+    const unsigned ShlEltAmt = ShlAmt / EltBitWidth;
+
+    if (ShlEltAmt > VecIdx)
+      return false;
+    VecOffset = VecIdx - ShlEltAmt;
+    Mask.resize(V->getType()->getIntegerBitWidth() / EltBitWidth);
+    Mask.set(ShlEltAmt);
+    return true;
+  }
+
+  // Now try to match shufflevector -> bitcast
+  Value *Lhs, *Rhs;
+  ArrayRef<int> ShuffleMask;
+  if (!match(V, m_BitCast(m_Shuffle(m_Value(Lhs), m_Value(Rhs),
+                                    m_Mask(ShuffleMask)))))
+    return false;
+  Mask.resize(ShuffleMask.size());
+
+  if (isa<Constant>(Lhs))
+    std::swap(Lhs, Rhs);
+
+  auto *RhsConst = dyn_cast<Constant>(Rhs);
+  if (!RhsConst)
+    return false;
+
+  auto *LhsTy = dyn_cast<FixedVectorType>(Lhs->getType());
+  if (!LhsTy)
+    return false;
+
+  Vec = Lhs;
+  const unsigned NumLhsElts = LhsTy->getNumElements();
+  bool FoundVecOffset = false;
+  for (unsigned Idx = 0; Idx < ShuffleMask.size(); ++Idx) {
+    if (ShuffleMask[Idx] == PoisonMaskElem)
+      return false;
+    const unsigned ShuffleIdx = ShuffleMask[Idx];
+    if (ShuffleIdx >= NumLhsElts) {
+      const unsigned RhsIdx = ShuffleIdx - NumLhsElts;
+      auto *RhsElt =
+          dyn_cast<ConstantInt>(RhsConst->getAggregateElement(RhsIdx));
+      if (!RhsElt || RhsElt->getZExtValue() != 0)
+        return false;
+      continue;
+    }
+
+    if (FoundVecOffset) {
+      if (VecOffset + Idx != ShuffleIdx)
+        return false;
+    } else {
+      if (ShuffleIdx < Idx)
+        return false;
+      VecOffset = ShuffleIdx - Idx;
+      FoundVecOffset = true;
+    }
+    Mask.set(Idx);
+  }
+  return FoundVecOffset;
+}
+/// Try to fold the or of two scalar integers whose contents are packed elements
+/// of the same vector.
+bool VectorCombine::foldIntegerPackFromVector(Instruction &I) {
+  assert(I.getOpcode() == Instruction::Or);
+  Value *LhsVec, *RhsVec;
+  uint64_t LhsVecOffset, RhsVecOffset;
+  SmallBitVector Mask;
+  if (!matchSubIntegerPackFromVector(I.getOperand(0), LhsVec, LhsVecOffset,
+                                     Mask))
+    return false;
+  if (!matchSubIntegerPackFromVector(I.getOperand(1), RhsVec, RhsVecOffset,
+                                     Mask))
+    return false;
+  if (LhsVec != RhsVec || LhsVecOffset != RhsVecOffset)
+    return false;
+
+  // Convert into shufflevector -> bitcast
+  SmallVector<int> ShuffleMask;
+  ShuffleMask.reserve(Mask.size());
+  const unsigned ZeroVecIdx =
+      cast<FixedVectorType>(LhsVec->getType())->getNumElements();
+  for (unsigned Idx = 0; Idx < Mask.size(); ++Idx) {
+    if (Mask.test(Idx))
+      ShuffleMask.push_back(LhsVecOffset + Idx);
+    else
+      ShuffleMask.push_back(ZeroVecIdx);
+  }
+
+  Value *MaskedVec = Builder.CreateShuffleVector(
+      LhsVec, Constant::getNullValue(LhsVec->getType()), ShuffleMask,
+      LhsVec->getName() + ".extract");
+  Value *CastedVec = Builder.CreateBitCast(MaskedVec, I.getType(), I.getName());
+  replaceValue(I, *CastedVec);
+  return true;
+}
+
 /// Try to convert "shuffle (binop (shuffle, shuffle)), undef"
 ///           -->  "binop (shuffle), (shuffle)".
 bool VectorCombine::foldPermuteOfBinops(Instruction &I) {
@@ -3742,6 +3864,9 @@ bool VectorCombine::run() {
     if (Opcode == Instruction::Store)
       MadeChange |= foldSingleElementStore(I);
 
+    if (isa<IntegerType>(I.getType()) && Opcode == Instruction::Or)
+      MadeChange |= foldIntegerPackFromVector(I);
+
     // If this is an early pipeline invocation of this pass, we are done.
     if (TryEarlyFoldsOnly)
       return;
diff --git a/llvm/test/Transforms/VectorCombine/packed-integers.ll b/llvm/test/Transforms/VectorCombine/packed-integers.ll
new file mode 100644
index 0000000000000..f01179bbde13c
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/packed-integers.ll
@@ -0,0 +1,108 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=vector-combine %s | FileCheck %s
+
+define i32 @bitcast.v2i(<4 x i8> %v) {
+; CHECK-LABEL: define i32 @bitcast.v2i(
+; CHECK-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-NEXT:    [[X_3:%.*]] = bitcast <4 x i8> [[V]] to i32
+; CHECK-NEXT:    ret i32 [[X_3]]
+;
+  %v.0 = extractelement <4 x i8> %v, i32 0
+  %z.0 = zext i8 %v.0 to i32
+
+  %v.1 = extractelement <4 x i8> %v, i32 1
+  %z.1 = zext i8 %v.1 to i32
+  %s.1 = shl i32 %z.1, 8
+  %x.1 = or i32 %z.0, %s.1
+
+  %v.2 = extractelement <4 x i8> %v, i32 2
+  %z.2 = zext i8 %v.2 to i32
+  %s.2 = shl i32 %z.2, 16
+  %x.2 = or i32 %x.1, %s.2
+
+  %v.3 = extractelement <4 x i8> %v, i32 3
+  %z.3 = zext i8 %v.3 to i32
+  %s.3 = shl i32 %z.3, 24
+  %x.3 = or i32 %x.2, %s.3
+
+  ret i32 %x.3
+}
+
+define i32 @bitcast.v2i.tree(<4 x i8> %v) {
+; CHECK-LABEL: define i32 @bitcast.v2i.tree(
+; CHECK-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-NEXT:    [[X:%.*]] = bitcast <4 x i8> [[V]] to i32
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %v.0 = extractelement <4 x i8> %v, i32 0
+  %z.0 = zext i8 %v.0 to i32
+
+  %v.1 = extractelement <4 x i8> %v, i32 1
+  %z.1 = zext i8 %v.1 to i32
+  %s.1 = shl i32 %z.1, 8
+  %x.1 = or i32 %z.0, %s.1
+
+  %v.2 = extractelement <4 x i8> %v, i32 2
+  %z.2 = zext i8 %v.2 to i32
+  %s.2 = shl i32 %z.2, 16
+
+  %v.3 = extractelement <4 x i8> %v, i32 3
+  %z.3 = zext i8 %v.3 to i32
+  %s.3 = shl i32 %z.3, 24
+  %x.3 = or i32 %s.2, %s.3
+
+  %x = or i32 %x.1, %x.3
+
+  ret i32 %x
+}
+
+define i32 @extract.i32(<8 x i8> %v) {
+; CHECK-LABEL: define i32 @extract.i32(
+; CHECK-SAME: <8 x i8> [[V:%.*]]) {
+; CHECK-NEXT:    [[V_EXTRACT4:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> zeroinitializer, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT:    [[X_3:%.*]] = bitcast <4 x i8> [[V_EXTRACT4]] to i32
+; CHECK-NEXT:    ret i32 [[X_3]]
+;
+  %v.0 = extractelement <8 x i8> %v, i32 3
+  %z.0 = zext i8 %v.0 to i32
+
+  %v.1 = extractelement <8 x i8> %v, i32 4
+  %z.1 = zext i8 %v.1 to i32
+  %s.1 = shl i32 %z.1, 8
+  %x.1 = or i32 %z.0, %s.1
+
+  %v.2 = extractelement <8 x i8> %v, i32 5
+  %z.2 = zext i8 %v.2 to i32
+  %s.2 = shl i32 %z.2, 16
+  %x.2 = or i32 %x.1, %s.2
+
+  %v.3 = extractelement <8 x i8> %v, i32 6
+  %z.3 = zext i8 %v.3 to i32
+  %s.3 = shl i32 %z.3, 24
+  %x.3 = or i32 %x.2, %s.3
+
+  ret i32 %x.3
+}
+
+define i32 @partial(<4 x i8> %v) {
+; CHECK-LABEL: define i32 @partial(
+; CHECK-SAME: <4 x i8> [[V:%.*]]) {
+; CHECK-NEXT:    [[V_EXTRACT2:%.*]] = shufflevector <4 x i8> [[V]], <4 x i8> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
+; CHECK-NEXT:    [[X_3:%.*]] = bitcast <4 x i8> [[V_EXTRACT2]] to i32
+; CHECK-NEXT:    ret i32 [[X_3]]
+;
+  %v.0 = extractelement <4 x i8> %v, i32 0
+  %z.0 = zext i8 %v.0 to i32
+
+  %v.1 = extractelement <4 x i8> %v, i32 1
+  %z.1 = zext i8 %v.1 to i32
+  %s.1 = shl i32 %z.1, 8
+  %x.1 = or i32 %z.0, %s.1
+
+  %v.3 = extractelement <4 x i8> %v, i32 3
+  %z.3 = zext i8 %v.3 to i32
+  %s.3 = shl i32 %z.3, 24
+  %x.3 = or i32 %x.1, %s.3
+
+  ret i32 %x.3
+}

zGoldthorpe · 2025-07-07T22:18:53Z

@arsenm @macurtis-amd pinging to request review

macurtis-amd

LGTM, though I'm not an expert in this area. Best to get at least one more approval.

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Added pattern for folding packed integer constructions.

d98ec01

llvmbot added vectorizers llvm:transforms llvm:vectorcombine labels Jul 7, 2025

zGoldthorpe mentioned this pull request Jul 7, 2025

[Scalar] Dedicated pass for identifying redundant operations on packed bytes #146364

Open

macurtis-amd requested review from arsenm, macurtis-amd and shiltian July 10, 2025 20:07

macurtis-amd approved these changes Jul 10, 2025

View reviewed changes

topperc reviewed Jul 10, 2025

View reviewed changes

llvm/lib/Transforms/Vectorize/VectorCombine.cpp Outdated Show resolved Hide resolved

arsenm reviewed Jul 11, 2025

View reviewed changes

llvm/lib/Transforms/Vectorize/VectorCombine.cpp Outdated Show resolved Hide resolved

arsenm reviewed Jul 11, 2025

View reviewed changes

llvm/lib/Transforms/Vectorize/VectorCombine.cpp Outdated Show resolved Hide resolved

Incorporated reviewer feedback.

95e74dc

zGoldthorpe requested review from arsenm and topperc July 12, 2025 03:48

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[VectorCombine] Added pattern for recognising the construction of packed integers. #147414

[VectorCombine] Added pattern for recognising the construction of packed integers. #147414

zGoldthorpe commented Jul 7, 2025

Uh oh!

llvmbot commented Jul 7, 2025 •

edited

Loading

Uh oh!

zGoldthorpe commented Jul 7, 2025

Uh oh!

macurtis-amd left a comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

[VectorCombine] Added pattern for recognising the construction of packed integers. #147414

Are you sure you want to change the base?

[VectorCombine] Added pattern for recognising the construction of packed integers. #147414

Conversation

zGoldthorpe commented Jul 7, 2025

Uh oh!

llvmbot commented Jul 7, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

zGoldthorpe commented Jul 7, 2025

Uh oh!

macurtis-amd left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

llvmbot commented Jul 7, 2025 •

edited

Loading