Skip to content

[LV] Vectorize selecting last IV of min/max element. #141431

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -798,6 +798,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// For each block in the loop.
for (BasicBlock *BB : TheLoop->blocks()) {
// Scan the instructions in the block and look for hazards.
PHINode *UnclassifiedPhi = nullptr;
for (Instruction &I : *BB) {
if (auto *Phi = dyn_cast<PHINode>(&I)) {
Type *PhiTy = Phi->getType();
Expand Down Expand Up @@ -887,12 +888,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
addInductionPhi(Phi, ID, AllowedExit);
continue;
}

reportVectorizationFailure("Found an unidentified PHI",
"value that could not be identified as "
"reduction is used outside the loop",
"NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
return false;
UnclassifiedPhi = Phi;
} // end of PHI handling

// We handle calls that:
Expand Down Expand Up @@ -1043,6 +1039,19 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
return false;
}
} // next instr.
if (UnclassifiedPhi && none_of(BB->phis(), [this](PHINode &P) {
auto I = Reductions.find(&P);
return I != Reductions.end() &&
RecurrenceDescriptor::isFindLastIVRecurrenceKind(
I->second.getRecurrenceKind());
})) {
reportVectorizationFailure("Found an unidentified PHI",
"value that could not be identified as "
"reduction is used outside the loop",
"NonReductionValueUsedOutsideLoop", ORE,
TheLoop, UnclassifiedPhi);
return false;
}
}

if (!PrimaryInduction) {
Expand Down
31 changes: 24 additions & 7 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7177,6 +7177,9 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
Value *StartV = getStartValueFromReductionResult(EpiRedResult);
Value *SentinelV = EpiRedResult->getOperand(2)->getLiveInIRValue();
using namespace llvm::PatternMatch;
MainResumeValue = cast<VPInstruction>(EpiRedHeaderPhi->getStartValue())
->getOperand(0)
->getUnderlyingValue();
Value *Cmp, *OrigResumeV, *CmpOp;
[[maybe_unused]] bool IsExpectedPattern =
match(MainResumeValue,
Expand All @@ -7187,7 +7190,11 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
((CmpOp == StartV && isGuaranteedNotToBeUndefOrPoison(CmpOp))));
assert(IsExpectedPattern && "Unexpected reduction resume pattern");
MainResumeValue = OrigResumeV;
} else {
if (auto *VPI = dyn_cast<VPInstruction>(EpiRedHeaderPhi->getStartValue()))
MainResumeValue = VPI->getOperand(0)->getUnderlyingValue();
}

PHINode *MainResumePhi = cast<PHINode>(MainResumeValue);

// When fixing reductions in the epilogue loop we should already have
Expand Down Expand Up @@ -8193,9 +8200,6 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
return Recipe;

VPHeaderPHIRecipe *PhiRecipe = nullptr;
assert((Legal->isReductionVariable(Phi) ||
Legal->isFixedOrderRecurrence(Phi)) &&
"can only widen reductions and fixed-order recurrences here");
VPValue *StartV = Operands[0];
if (Legal->isReductionVariable(Phi)) {
const RecurrenceDescriptor &RdxDesc = Legal->getRecurrenceDescriptor(Phi);
Expand All @@ -8208,12 +8212,17 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
PhiRecipe = new VPReductionPHIRecipe(
Phi, RdxDesc.getRecurrenceKind(), *StartV, CM.isInLoopReduction(Phi),
CM.useOrderedReductions(RdxDesc), ScaleFactor);
} else {
} else if (Legal->isFixedOrderRecurrence(Phi)) {
// TODO: Currently fixed-order recurrences are modeled as chains of
// first-order recurrences. If there are no users of the intermediate
// recurrences in the chain, the fixed order recurrence should be modeled
// directly, enabling more efficient codegen.
PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV);
} else {
// Failed to identify phi as reduction or fixed-order recurrence. Keep the
// original VPWidenPHIRecipe for now, to be legalized later if possible.
setRecipe(Phi, R);
return nullptr;
}
// Add backedge value.
PhiRecipe->addOperand(Operands[1]);
Expand Down Expand Up @@ -8398,7 +8407,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
// TODO: Extract final value from induction recipe initially, optimize to
// pre-computed end value together in optimizeInductionExitUsers.
auto *VectorPhiR =
cast<VPHeaderPHIRecipe>(Builder.getRecipe(&ScalarPhiIRI->getIRPhi()));
cast<VPSingleDefRecipe>(Builder.getRecipe(&ScalarPhiIRI->getIRPhi()));
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction(
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
Expand All @@ -8420,7 +8429,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
// which for FORs is a vector whose last element needs to be extracted. The
// start value provides the value if the loop is bypassed.
bool IsFOR = isa<VPFirstOrderRecurrencePHIRecipe>(VectorPhiR);
auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
auto *ResumeFromVectorLoop = VectorPhiR->getOperand(1);
assert(VectorRegion->getSingleSuccessor() == Plan.getMiddleBlock() &&
"Cannot handle loops with uncountable early exits");
if (IsFOR)
Expand All @@ -8429,7 +8438,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
"vector.recur.extract");
StringRef Name = IsFOR ? "scalar.recur.init" : "bc.merge.rdx";
auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
{ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name);
{ResumeFromVectorLoop, VectorPhiR->getOperand(0)}, {}, Name);
ScalarPhiIRI->addOperand(ResumePhiR);
}
}
Expand Down Expand Up @@ -8744,6 +8753,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
VPRecipeBase *Recipe =
RecipeBuilder.tryToCreateWidenRecipe(SingleDef, Range);
if (!Recipe) {
if (isa<VPWidenPHIRecipe>(SingleDef))
continue;
SmallVector<VPValue *, 4> Operands(R.operands());
Recipe = RecipeBuilder.handleReplication(Instr, Operands, Range);
}
Expand Down Expand Up @@ -8809,6 +8820,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Adjust the recipes for any inloop reductions.
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);

// Try to convert remaining VPWidenPHIRecipes to reduction recipes.
if (!VPlanTransforms::runPass(VPlanTransforms::legalizeUnclassifiedPhis,
*Plan))
return nullptr;

// Transform recipes to abstract recipes if it is legal and beneficial and
// clamp the range for better cost estimation.
// TODO: Enable following transform when the EVL-version of extended-reduction
Expand Down Expand Up @@ -9275,6 +9291,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
PhiR->setOperand(0, StartV);
}
}

for (VPRecipeBase *R : ToDelete)
R->eraseFromParent();

Expand Down
9 changes: 7 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1854,7 +1854,8 @@ class LLVM_ABI_FOR_TEST VPHeaderPHIRecipe : public VPSingleDefRecipe,
~VPHeaderPHIRecipe() override = default;

/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPRecipeBase *B) {
static inline bool classof(const VPUser *U) {
auto *B = cast<VPRecipeBase>(U);
return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
}
Expand All @@ -1863,6 +1864,10 @@ class LLVM_ABI_FOR_TEST VPHeaderPHIRecipe : public VPSingleDefRecipe,
return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
}
static inline bool classof(const VPSingleDefRecipe *B) {
return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
}

/// Generate the phi nodes.
void execute(VPTransformState &State) override = 0;
Expand Down Expand Up @@ -1924,7 +1929,7 @@ class VPWidenInductionRecipe : public VPHeaderPHIRecipe {
return R && classof(R);
}

static inline bool classof(const VPHeaderPHIRecipe *R) {
static inline bool classof(const VPSingleDefRecipe *R) {
return classof(static_cast<const VPRecipeBase *>(R));
}

Expand Down
145 changes: 145 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -628,3 +628,148 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond,
Term->addMetadata(LLVMContext::MD_prof, BranchWeights);
}
}

bool VPlanTransforms::legalizeUnclassifiedPhis(VPlan &Plan) {
using namespace VPlanPatternMatch;
for (auto &PhiR : make_early_inc_range(
Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis())) {
if (!isa<VPWidenPHIRecipe>(&PhiR))
continue;

// Check if PhiR is a min/max reduction that has a user inside the loop
// outside the min/max reduction chain. The other user must be the compare
// of a FindLastIV reduction chain.
auto *MinMaxPhiR = cast<VPWidenPHIRecipe>(&PhiR);
auto *MinMaxOp = dyn_cast_or_null<VPSingleDefRecipe>(
MinMaxPhiR->getOperand(1)->getDefiningRecipe());
if (!MinMaxOp)
return false;

// The incoming value must be a min/max instrinsic.
// TODO: Also handle the select variant.
Intrinsic::ID ID = Intrinsic::not_intrinsic;
if (auto *WideInt = dyn_cast<VPWidenIntrinsicRecipe>(MinMaxOp))
ID = WideInt->getVectorIntrinsicID();
else {
auto *RepR = dyn_cast<VPReplicateRecipe>(MinMaxOp);
if (!RepR || !isa<IntrinsicInst>(RepR->getUnderlyingInstr()))
return false;
ID = cast<IntrinsicInst>(RepR->getUnderlyingInstr())->getIntrinsicID();
}
RecurKind RdxKind = RecurKind::None;
switch (ID) {
case Intrinsic::umax:
RdxKind = RecurKind::UMax;
break;
case Intrinsic::umin:
RdxKind = RecurKind::UMin;
break;
case Intrinsic::smax:
RdxKind = RecurKind::SMax;
break;
case Intrinsic::smin:
RdxKind = RecurKind::SMin;
break;
default:
return false;
}

// The min/max intrinsic must use the phi and itself must only be used by
// the phi and a resume-phi in the scalar preheader.
if (MinMaxOp->getOperand(0) != MinMaxPhiR &&
MinMaxOp->getOperand(1) != MinMaxPhiR)
return false;
if (MinMaxPhiR->getNumUsers() != 2 ||
any_of(MinMaxOp->users(), [MinMaxPhiR, &Plan](VPUser *U) {
auto *Phi = dyn_cast<VPPhi>(U);
return MinMaxPhiR != U &&
(!Phi || Phi->getParent() != Plan.getScalarPreheader());
}))
return false;

// One user of MinMaxPhiR is MinMaxOp, the other users must be a compare
// that's part of a FindLastIV chain.
auto MinMaxUsers = to_vector(MinMaxPhiR->users());
auto *Cmp = dyn_cast<VPRecipeWithIRFlags>(
MinMaxUsers[0] == MinMaxOp ? MinMaxUsers[1] : MinMaxUsers[0]);
VPValue *CmpOpA;
VPValue *CmpOpB;
if (!Cmp || Cmp->getNumUsers() != 1 ||
!match(Cmp, m_Binary<Instruction::ICmp>(m_VPValue(CmpOpA),
m_VPValue(CmpOpB))))
return false;

// Normalize the predicate so MinMaxPhiR is on the right side.
CmpInst::Predicate Pred = Cmp->getPredicate();
if (CmpOpA == MinMaxPhiR)
Pred = CmpInst::getSwappedPredicate(Pred);

// Determine if the predicate is not strict.
bool IsNonStrictPred = ICmpInst::isLE(Pred) || ICmpInst::isGE(Pred);
// Account for a mis-match between RdxKind and the predicate.
switch (RdxKind) {
case RecurKind::UMin:
case RecurKind::SMin:
IsNonStrictPred |= ICmpInst::isGT(Pred);
break;
case RecurKind::UMax:
case RecurKind::SMax:
IsNonStrictPred |= ICmpInst::isLT(Pred);
break;
default:
llvm_unreachable("unsupported kind");
}

// TODO: Strict predicates need to find the first IV value for which the
// predicate holds, not the last.
if (Pred == CmpInst::ICMP_NE || !IsNonStrictPred)
return false;

// Cmp must be used by the select of a FindLastIV chain.
VPValue *Sel = dyn_cast<VPSingleDefRecipe>(*Cmp->user_begin());
VPValue *IVOp, *FindIV;
if (!Sel ||
!match(Sel,
m_Select(m_Specific(Cmp), m_VPValue(IVOp), m_VPValue(FindIV))) ||
Sel->getNumUsers() != 2 || !isa<VPWidenIntOrFpInductionRecipe>(IVOp))
return false;
auto *FindIVPhiR = dyn_cast<VPReductionPHIRecipe>(FindIV);
if (!FindIVPhiR || !RecurrenceDescriptor::isFindLastIVRecurrenceKind(
FindIVPhiR->getRecurrenceKind()))
return false;

assert(!FindIVPhiR->isInLoop() && !FindIVPhiR->isOrdered() &&
"cannot handle inloop/ordered reductions yet");

auto NewPhiR = new VPReductionPHIRecipe(
cast<PHINode>(MinMaxPhiR->getUnderlyingInstr()), RdxKind,
*MinMaxPhiR->getOperand(0), false, false, 1);
NewPhiR->insertBefore(MinMaxPhiR);
MinMaxPhiR->replaceAllUsesWith(NewPhiR);
NewPhiR->addOperand(MinMaxPhiR->getOperand(1));
MinMaxPhiR->eraseFromParent();

// The reduction using MinMaxPhiR needs adjusting to compute the correct
// result:
// 1. We need to find the last IV for which the condition based on the
// min/max recurrence is true,
// 2. Compare the partial min/max reduction result to its final value and,
// 3. Select the lanes of the partial FindLastIV reductions which
// correspond to the lanes matching the min/max reduction result.
VPInstruction *FindIVResult = cast<VPInstruction>(
*(Sel->user_begin() + (*Sel->user_begin() == FindIVPhiR ? 1 : 0)));
VPBuilder B(FindIVResult);
VPInstruction *MinMaxResult =
B.createNaryOp(VPInstruction::ComputeReductionResult,
{NewPhiR, NewPhiR->getBackedgeValue()}, VPIRFlags(), {});
NewPhiR->getBackedgeValue()->replaceUsesWithIf(
MinMaxResult, [](VPUser &U, unsigned) { return isa<VPPhi>(&U); });
auto *FinalMinMaxCmp = B.createICmp(
CmpInst::ICMP_EQ, MinMaxResult->getOperand(1), MinMaxResult);
auto *FinalIVSelect =
B.createSelect(FinalMinMaxCmp, FindIVResult->getOperand(3),
FindIVResult->getOperand(2));
FindIVResult->setOperand(3, FinalIVSelect);
}
return true;
}
5 changes: 5 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ struct VPlanTransforms {
GetIntOrFpInductionDescriptor,
ScalarEvolution &SE, const TargetLibraryInfo &TLI);

/// Try to legalize unclassified phis by converting VPWidenPHIRecipes to
/// min-max reductions used by FindLastIV reductions if possible. Returns
/// false if the VPlan contains VPWidenPHIRecipes that cannot be legalized.
static bool legalizeUnclassifiedPhis(VPlan &Plan);

/// Try to have all users of fixed-order recurrences appear after the recipe
/// defining their previous value, by either sinking users or hoisting recipes
/// defining their previous value (and its operands). Then introduce
Expand Down
Loading
Loading