Skip to content

Commit a6a0610

Browse files
committed
[LV] Vectorize select min/max index.
Add support for vectorizing loops that select the index of the minimum or maximum element. The patch implements vectorizing those patterns by combining Min/Max and FindFirstIV reductions. It extends matching Min/Max reductions to allow in-loop users that are FindLastIV reductions. It records a flag indicating that the Min/Max reduction is used by another reduction. When creating reduction recipes, we process any reduction that has other reduction users. The reduction using the min/max reduction needs adjusting to compute the correct result: 1. We need to find the first IV for which the condition based on the min/max reduction is true, 2. Compare the partial min/max reduction result to its final value and, 3. Select the lanes of the partial FindLastIV reductions which correspond to the lanes matching the min/max reduction result.
1 parent 0c0aa56 commit a6a0610

File tree

11 files changed

+1434
-241
lines changed

11 files changed

+1434
-241
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -798,6 +798,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
798798
// For each block in the loop.
799799
for (BasicBlock *BB : TheLoop->blocks()) {
800800
// Scan the instructions in the block and look for hazards.
801+
PHINode *UnclassifiedPhi = nullptr;
801802
for (Instruction &I : *BB) {
802803
if (auto *Phi = dyn_cast<PHINode>(&I)) {
803804
Type *PhiTy = Phi->getType();
@@ -887,12 +888,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
887888
addInductionPhi(Phi, ID, AllowedExit);
888889
continue;
889890
}
890-
891-
reportVectorizationFailure("Found an unidentified PHI",
892-
"value that could not be identified as "
893-
"reduction is used outside the loop",
894-
"NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
895-
return false;
891+
UnclassifiedPhi = Phi;
896892
} // end of PHI handling
897893

898894
// We handle calls that:
@@ -1043,6 +1039,19 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
10431039
return false;
10441040
}
10451041
} // next instr.
1042+
if (UnclassifiedPhi && none_of(BB->phis(), [this](PHINode &P) {
1043+
auto I = Reductions.find(&P);
1044+
return I != Reductions.end() &&
1045+
RecurrenceDescriptor::isFindLastIVRecurrenceKind(
1046+
I->second.getRecurrenceKind());
1047+
})) {
1048+
reportVectorizationFailure("Found an unidentified PHI",
1049+
"value that could not be identified as "
1050+
"reduction is used outside the loop",
1051+
"NonReductionValueUsedOutsideLoop", ORE,
1052+
TheLoop, UnclassifiedPhi);
1053+
return false;
1054+
}
10461055
}
10471056

10481057
if (!PrimaryInduction) {

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7177,6 +7177,9 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
71777177
Value *StartV = getStartValueFromReductionResult(EpiRedResult);
71787178
Value *SentinelV = EpiRedResult->getOperand(2)->getLiveInIRValue();
71797179
using namespace llvm::PatternMatch;
7180+
MainResumeValue = cast<VPInstruction>(EpiRedHeaderPhi->getStartValue())
7181+
->getOperand(0)
7182+
->getUnderlyingValue();
71807183
Value *Cmp, *OrigResumeV, *CmpOp;
71817184
[[maybe_unused]] bool IsExpectedPattern =
71827185
match(MainResumeValue,
@@ -7187,7 +7190,11 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
71877190
((CmpOp == StartV && isGuaranteedNotToBeUndefOrPoison(CmpOp))));
71887191
assert(IsExpectedPattern && "Unexpected reduction resume pattern");
71897192
MainResumeValue = OrigResumeV;
7193+
} else {
7194+
if (auto *VPI = dyn_cast<VPInstruction>(EpiRedHeaderPhi->getStartValue()))
7195+
MainResumeValue = VPI->getOperand(0)->getUnderlyingValue();
71907196
}
7197+
71917198
PHINode *MainResumePhi = cast<PHINode>(MainResumeValue);
71927199

71937200
// When fixing reductions in the epilogue loop we should already have
@@ -8193,9 +8200,6 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
81938200
return Recipe;
81948201

81958202
VPHeaderPHIRecipe *PhiRecipe = nullptr;
8196-
assert((Legal->isReductionVariable(Phi) ||
8197-
Legal->isFixedOrderRecurrence(Phi)) &&
8198-
"can only widen reductions and fixed-order recurrences here");
81998203
VPValue *StartV = Operands[0];
82008204
if (Legal->isReductionVariable(Phi)) {
82018205
const RecurrenceDescriptor &RdxDesc = Legal->getRecurrenceDescriptor(Phi);
@@ -8208,12 +8212,17 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
82088212
PhiRecipe = new VPReductionPHIRecipe(
82098213
Phi, RdxDesc.getRecurrenceKind(), *StartV, CM.isInLoopReduction(Phi),
82108214
CM.useOrderedReductions(RdxDesc), ScaleFactor);
8211-
} else {
8215+
} else if (Legal->isFixedOrderRecurrence(Phi)) {
82128216
// TODO: Currently fixed-order recurrences are modeled as chains of
82138217
// first-order recurrences. If there are no users of the intermediate
82148218
// recurrences in the chain, the fixed order recurrence should be modeled
82158219
// directly, enabling more efficient codegen.
82168220
PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV);
8221+
} else {
8222+
// Failed to identify phi as reduction or fixed-order recurrence. Keep the
8223+
// original VPWidenPHIRecipe for now, to be legalized later if possible.
8224+
setRecipe(Phi, R);
8225+
return nullptr;
82178226
}
82188227
// Add backedge value.
82198228
PhiRecipe->addOperand(Operands[1]);
@@ -8398,7 +8407,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
83988407
// TODO: Extract final value from induction recipe initially, optimize to
83998408
// pre-computed end value together in optimizeInductionExitUsers.
84008409
auto *VectorPhiR =
8401-
cast<VPHeaderPHIRecipe>(Builder.getRecipe(&ScalarPhiIRI->getIRPhi()));
8410+
cast<VPSingleDefRecipe>(Builder.getRecipe(&ScalarPhiIRI->getIRPhi()));
84028411
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
84038412
if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction(
84048413
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
@@ -8420,7 +8429,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
84208429
// which for FORs is a vector whose last element needs to be extracted. The
84218430
// start value provides the value if the loop is bypassed.
84228431
bool IsFOR = isa<VPFirstOrderRecurrencePHIRecipe>(VectorPhiR);
8423-
auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
8432+
auto *ResumeFromVectorLoop = VectorPhiR->getOperand(1);
84248433
assert(VectorRegion->getSingleSuccessor() == Plan.getMiddleBlock() &&
84258434
"Cannot handle loops with uncountable early exits");
84268435
if (IsFOR)
@@ -8429,7 +8438,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
84298438
"vector.recur.extract");
84308439
StringRef Name = IsFOR ? "scalar.recur.init" : "bc.merge.rdx";
84318440
auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
8432-
{ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name);
8441+
{ResumeFromVectorLoop, VectorPhiR->getOperand(0)}, {}, Name);
84338442
ScalarPhiIRI->addOperand(ResumePhiR);
84348443
}
84358444
}
@@ -8744,6 +8753,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
87448753
VPRecipeBase *Recipe =
87458754
RecipeBuilder.tryToCreateWidenRecipe(SingleDef, Range);
87468755
if (!Recipe) {
8756+
if (isa<VPWidenPHIRecipe>(SingleDef))
8757+
continue;
87478758
SmallVector<VPValue *, 4> Operands(R.operands());
87488759
Recipe = RecipeBuilder.handleReplication(Instr, Operands, Range);
87498760
}
@@ -8809,6 +8820,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
88098820
// Adjust the recipes for any inloop reductions.
88108821
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);
88118822

8823+
// Try to convert remaining VPWidenPHIRecipes to reduction recipes.
8824+
if (!VPlanTransforms::runPass(VPlanTransforms::legalizeUnclassifiedPhis,
8825+
*Plan))
8826+
return nullptr;
8827+
88128828
// Transform recipes to abstract recipes if it is legal and beneficial and
88138829
// clamp the range for better cost estimation.
88148830
// TODO: Enable following transform when the EVL-version of extended-reduction
@@ -9275,6 +9291,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
92759291
PhiR->setOperand(0, StartV);
92769292
}
92779293
}
9294+
92789295
for (VPRecipeBase *R : ToDelete)
92799296
R->eraseFromParent();
92809297

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1854,7 +1854,8 @@ class LLVM_ABI_FOR_TEST VPHeaderPHIRecipe : public VPSingleDefRecipe,
18541854
~VPHeaderPHIRecipe() override = default;
18551855

18561856
/// Method to support type inquiry through isa, cast, and dyn_cast.
1857-
static inline bool classof(const VPRecipeBase *B) {
1857+
static inline bool classof(const VPUser *U) {
1858+
auto *B = cast<VPRecipeBase>(U);
18581859
return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
18591860
B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
18601861
}
@@ -1863,6 +1864,10 @@ class LLVM_ABI_FOR_TEST VPHeaderPHIRecipe : public VPSingleDefRecipe,
18631864
return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
18641865
B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
18651866
}
1867+
static inline bool classof(const VPSingleDefRecipe *B) {
1868+
return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
1869+
B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
1870+
}
18661871

18671872
/// Generate the phi nodes.
18681873
void execute(VPTransformState &State) override = 0;
@@ -1924,7 +1929,7 @@ class VPWidenInductionRecipe : public VPHeaderPHIRecipe {
19241929
return R && classof(R);
19251930
}
19261931

1927-
static inline bool classof(const VPHeaderPHIRecipe *R) {
1932+
static inline bool classof(const VPSingleDefRecipe *R) {
19281933
return classof(static_cast<const VPRecipeBase *>(R));
19291934
}
19301935

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -628,3 +628,148 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond,
628628
Term->addMetadata(LLVMContext::MD_prof, BranchWeights);
629629
}
630630
}
631+
632+
bool VPlanTransforms::legalizeUnclassifiedPhis(VPlan &Plan) {
633+
using namespace VPlanPatternMatch;
634+
for (auto &PhiR : make_early_inc_range(
635+
Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis())) {
636+
if (!isa<VPWidenPHIRecipe>(&PhiR))
637+
continue;
638+
639+
// Check if PhiR is a min/max reduction that has a user inside the loop
640+
// outside the min/max reduction chain. The other user must be the compare
641+
// of a FindLastIV reduction chain.
642+
auto *MinMaxPhiR = cast<VPWidenPHIRecipe>(&PhiR);
643+
auto *MinMaxOp = dyn_cast_or_null<VPSingleDefRecipe>(
644+
MinMaxPhiR->getOperand(1)->getDefiningRecipe());
645+
if (!MinMaxOp)
646+
return false;
647+
648+
// The incoming value must be a min/max instrinsic.
649+
// TODO: Also handle the select variant.
650+
Intrinsic::ID ID = Intrinsic::not_intrinsic;
651+
if (auto *WideInt = dyn_cast<VPWidenIntrinsicRecipe>(MinMaxOp))
652+
ID = WideInt->getVectorIntrinsicID();
653+
else {
654+
auto *RepR = dyn_cast<VPReplicateRecipe>(MinMaxOp);
655+
if (!RepR || !isa<IntrinsicInst>(RepR->getUnderlyingInstr()))
656+
return false;
657+
ID = cast<IntrinsicInst>(RepR->getUnderlyingInstr())->getIntrinsicID();
658+
}
659+
RecurKind RdxKind = RecurKind::None;
660+
switch (ID) {
661+
case Intrinsic::umax:
662+
RdxKind = RecurKind::UMax;
663+
break;
664+
case Intrinsic::umin:
665+
RdxKind = RecurKind::UMin;
666+
break;
667+
case Intrinsic::smax:
668+
RdxKind = RecurKind::SMax;
669+
break;
670+
case Intrinsic::smin:
671+
RdxKind = RecurKind::SMin;
672+
break;
673+
default:
674+
return false;
675+
}
676+
677+
// The min/max intrinsic must use the phi and itself must only be used by
678+
// the phi and a resume-phi in the scalar preheader.
679+
if (MinMaxOp->getOperand(0) != MinMaxPhiR &&
680+
MinMaxOp->getOperand(1) != MinMaxPhiR)
681+
return false;
682+
if (MinMaxPhiR->getNumUsers() != 2 ||
683+
any_of(MinMaxOp->users(), [MinMaxPhiR, &Plan](VPUser *U) {
684+
auto *Phi = dyn_cast<VPPhi>(U);
685+
return MinMaxPhiR != U &&
686+
(!Phi || Phi->getParent() != Plan.getScalarPreheader());
687+
}))
688+
return false;
689+
690+
// One user of MinMaxPhiR is MinMaxOp, the other users must be a compare
691+
// that's part of a FindLastIV chain.
692+
auto MinMaxUsers = to_vector(MinMaxPhiR->users());
693+
auto *Cmp = dyn_cast<VPRecipeWithIRFlags>(
694+
MinMaxUsers[0] == MinMaxOp ? MinMaxUsers[1] : MinMaxUsers[0]);
695+
VPValue *CmpOpA;
696+
VPValue *CmpOpB;
697+
if (!Cmp || Cmp->getNumUsers() != 1 ||
698+
!match(Cmp, m_Binary<Instruction::ICmp>(m_VPValue(CmpOpA),
699+
m_VPValue(CmpOpB))))
700+
return false;
701+
702+
// Normalize the predicate so MinMaxPhiR is on the right side.
703+
CmpInst::Predicate Pred = Cmp->getPredicate();
704+
if (CmpOpA == MinMaxPhiR)
705+
Pred = CmpInst::getSwappedPredicate(Pred);
706+
707+
// Determine if the predicate is not strict.
708+
bool IsNonStrictPred = ICmpInst::isLE(Pred) || ICmpInst::isGE(Pred);
709+
// Account for a mis-match between RdxKind and the predicate.
710+
switch (RdxKind) {
711+
case RecurKind::UMin:
712+
case RecurKind::SMin:
713+
IsNonStrictPred |= ICmpInst::isGT(Pred);
714+
break;
715+
case RecurKind::UMax:
716+
case RecurKind::SMax:
717+
IsNonStrictPred |= ICmpInst::isLT(Pred);
718+
break;
719+
default:
720+
llvm_unreachable("unsupported kind");
721+
}
722+
723+
// TODO: Strict predicates need to find the first IV value for which the
724+
// predicate holds, not the last.
725+
if (Pred == CmpInst::ICMP_NE || !IsNonStrictPred)
726+
return false;
727+
728+
// Cmp must be used by the select of a FindLastIV chain.
729+
VPValue *Sel = dyn_cast<VPSingleDefRecipe>(*Cmp->user_begin());
730+
VPValue *IVOp, *FindIV;
731+
if (!Sel ||
732+
!match(Sel,
733+
m_Select(m_Specific(Cmp), m_VPValue(IVOp), m_VPValue(FindIV))) ||
734+
Sel->getNumUsers() != 2 || !isa<VPWidenIntOrFpInductionRecipe>(IVOp))
735+
return false;
736+
auto *FindIVPhiR = dyn_cast<VPReductionPHIRecipe>(FindIV);
737+
if (!FindIVPhiR || !RecurrenceDescriptor::isFindLastIVRecurrenceKind(
738+
FindIVPhiR->getRecurrenceKind()))
739+
return false;
740+
741+
assert(!FindIVPhiR->isInLoop() && !FindIVPhiR->isOrdered() &&
742+
"cannot handle inloop/ordered reductions yet");
743+
744+
auto NewPhiR = new VPReductionPHIRecipe(
745+
cast<PHINode>(MinMaxPhiR->getUnderlyingInstr()), RdxKind,
746+
*MinMaxPhiR->getOperand(0), false, false, 1);
747+
NewPhiR->insertBefore(MinMaxPhiR);
748+
MinMaxPhiR->replaceAllUsesWith(NewPhiR);
749+
NewPhiR->addOperand(MinMaxPhiR->getOperand(1));
750+
MinMaxPhiR->eraseFromParent();
751+
752+
// The reduction using MinMaxPhiR needs adjusting to compute the correct
753+
// result:
754+
// 1. We need to find the last IV for which the condition based on the
755+
// min/max recurrence is true,
756+
// 2. Compare the partial min/max reduction result to its final value and,
757+
// 3. Select the lanes of the partial FindLastIV reductions which
758+
// correspond to the lanes matching the min/max reduction result.
759+
VPInstruction *FindIVResult = cast<VPInstruction>(
760+
*(Sel->user_begin() + (*Sel->user_begin() == FindIVPhiR ? 1 : 0)));
761+
VPBuilder B(FindIVResult);
762+
VPInstruction *MinMaxResult =
763+
B.createNaryOp(VPInstruction::ComputeReductionResult,
764+
{NewPhiR, NewPhiR->getBackedgeValue()}, VPIRFlags(), {});
765+
NewPhiR->getBackedgeValue()->replaceUsesWithIf(
766+
MinMaxResult, [](VPUser &U, unsigned) { return isa<VPPhi>(&U); });
767+
auto *FinalMinMaxCmp = B.createICmp(
768+
CmpInst::ICMP_EQ, MinMaxResult->getOperand(1), MinMaxResult);
769+
auto *FinalIVSelect =
770+
B.createSelect(FinalMinMaxCmp, FindIVResult->getOperand(3),
771+
FindIVResult->getOperand(2));
772+
FindIVResult->setOperand(3, FinalIVSelect);
773+
}
774+
return true;
775+
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,11 @@ struct VPlanTransforms {
8989
GetIntOrFpInductionDescriptor,
9090
ScalarEvolution &SE, const TargetLibraryInfo &TLI);
9191

92+
/// Try to legalize unclassified phis by converting VPWidenPHIRecipes to
93+
/// min-max reductions used by FindLastIV reductions if possible. Returns
94+
/// false if the VPlan contains VPWidenPHIRecipes that cannot be legalized.
95+
static bool legalizeUnclassifiedPhis(VPlan &Plan);
96+
9297
/// Try to have all users of fixed-order recurrences appear after the recipe
9398
/// defining their previous value, by either sinking users or hoisting recipes
9499
/// defining their previous value (and its operands). Then introduce

0 commit comments

Comments
 (0)