@@ -3898,7 +3898,7 @@ class BoUpSLP {
3898
3898
3899
3899
/// When ReuseReorderShuffleIndices is empty it just returns position of \p
3900
3900
/// V within vector of Scalars. Otherwise, try to remap on its reuse index.
3901
- int findLaneForValue(Value *V) const {
3901
+ unsigned findLaneForValue(Value *V) const {
3902
3902
unsigned FoundLane = getVectorFactor();
3903
3903
for (auto *It = find(Scalars, V), *End = Scalars.end(); It != End;
3904
3904
std::advance(It, 1)) {
@@ -4344,7 +4344,7 @@ class BoUpSLP {
4344
4344
4345
4345
/// This POD struct describes one external user in the vectorized tree.
4346
4346
struct ExternalUser {
4347
- ExternalUser(Value *S, llvm::User *U, const TreeEntry &E, int L)
4347
+ ExternalUser(Value *S, llvm::User *U, const TreeEntry &E, unsigned L)
4348
4348
: Scalar(S), User(U), E(E), Lane(L) {}
4349
4349
4350
4350
/// Which scalar in our function.
@@ -4357,7 +4357,7 @@ class BoUpSLP {
4357
4357
const TreeEntry &E;
4358
4358
4359
4359
/// Which lane does the scalar belong to.
4360
- int Lane;
4360
+ unsigned Lane;
4361
4361
};
4362
4362
using UserList = SmallVector<ExternalUser, 16>;
4363
4363
@@ -7901,7 +7901,7 @@ void BoUpSLP::buildExternalUses(
7901
7901
// Check if the scalar is externally used as an extra arg.
7902
7902
const auto ExtI = ExternallyUsedValues.find(Scalar);
7903
7903
if (ExtI != ExternallyUsedValues.end()) {
7904
- int FoundLane = Entry->findLaneForValue(Scalar);
7904
+ unsigned FoundLane = Entry->findLaneForValue(Scalar);
7905
7905
LLVM_DEBUG(dbgs() << "SLP: Need to extract: Extra arg from lane "
7906
7906
<< FoundLane << " from " << *Scalar << ".\n");
7907
7907
ScalarToExtUses.try_emplace(Scalar, ExternalUses.size());
@@ -7949,7 +7949,7 @@ void BoUpSLP::buildExternalUses(
7949
7949
7950
7950
if (U && Scalar->hasNUsesOrMore(UsesLimit))
7951
7951
U = nullptr;
7952
- int FoundLane = Entry->findLaneForValue(Scalar);
7952
+ unsigned FoundLane = Entry->findLaneForValue(Scalar);
7953
7953
LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *UserInst
7954
7954
<< " from lane " << FoundLane << " from " << *Scalar
7955
7955
<< ".\n");
@@ -14568,8 +14568,6 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
14568
14568
LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size "
14569
14569
<< VectorizableTree.size() << ".\n");
14570
14570
14571
- unsigned BundleWidth = VectorizableTree[0]->Scalars.size();
14572
-
14573
14571
SmallPtrSet<Value *, 4> CheckedExtracts;
14574
14572
for (unsigned I = 0, E = VectorizableTree.size(); I < E; ++I) {
14575
14573
TreeEntry &TE = *VectorizableTree[I];
@@ -14632,6 +14630,11 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
14632
14630
}
14633
14631
SmallDenseSet<std::pair<Value *, Value *>, 8> CheckedScalarUser;
14634
14632
for (ExternalUser &EU : ExternalUses) {
14633
+ LLVM_DEBUG(dbgs() << "SLP: Computing cost for external use of TreeEntry "
14634
+ << EU.E.Idx << " in lane " << EU.Lane << "\n");
14635
+ LLVM_DEBUG(dbgs() << " User:" << *EU.User << "\n");
14636
+ LLVM_DEBUG(dbgs() << " Use: " << EU.Scalar->getNameOrAsOperand() << "\n");
14637
+
14635
14638
// Uses by ephemeral values are free (because the ephemeral value will be
14636
14639
// removed prior to code generation, and so the extraction will be
14637
14640
// removed as well).
@@ -14739,6 +14742,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
14739
14742
// for the extract and the added cost of the sign extend if needed.
14740
14743
InstructionCost ExtraCost = TTI::TCC_Free;
14741
14744
auto *ScalarTy = EU.Scalar->getType();
14745
+ const unsigned BundleWidth = EU.E.getVectorFactor();
14746
+ assert(EU.Lane < BundleWidth && "Extracted lane out of bounds.");
14742
14747
auto *VecTy = getWidenedType(ScalarTy, BundleWidth);
14743
14748
const TreeEntry *Entry = &EU.E;
14744
14749
auto It = MinBWs.find(Entry);
@@ -14752,10 +14757,14 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals,
14752
14757
VecTy = getWidenedType(MinTy, BundleWidth);
14753
14758
ExtraCost =
14754
14759
getExtractWithExtendCost(*TTI, Extend, ScalarTy, VecTy, EU.Lane);
14760
+ LLVM_DEBUG(dbgs() << " ExtractExtend or ExtractSubvec cost: "
14761
+ << ExtraCost << "\n");
14755
14762
} else {
14756
14763
ExtraCost =
14757
14764
getVectorInstrCost(*TTI, ScalarTy, Instruction::ExtractElement, VecTy,
14758
14765
CostKind, EU.Lane, EU.Scalar, ScalarUserAndIdx);
14766
+ LLVM_DEBUG(dbgs() << " ExtractElement cost for " << *ScalarTy << " from "
14767
+ << *VecTy << ": " << ExtraCost << "\n");
14759
14768
}
14760
14769
// Leave the scalar instructions as is if they are cheaper than extracts.
14761
14770
if (Entry->Idx != 0 || Entry->getOpcode() == Instruction::GetElementPtr ||
0 commit comments