Skip to content

[LV] Vectorize maxnum/minnum w/o fast-math flags. #148239

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/include/llvm/Analysis/IVDescriptors.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ enum class RecurKind {
FMul, ///< Product of floats.
FMin, ///< FP min implemented in terms of select(cmp()).
FMax, ///< FP max implemented in terms of select(cmp()).
FMinNumNoFMFs, ///< FP min with llvm.minnum semantics and no fast-math flags.
FMaxNumNoFMFs, ///< FP max with llvm.maxnumsemantics and no fast-math flags.
FMinimum, ///< FP min with llvm.minimum semantics
FMaximum, ///< FP max with llvm.maximum semantics
FMinimumNum, ///< FP min with llvm.minimumnum semantics
Expand Down
24 changes: 21 additions & 3 deletions llvm/lib/Analysis/IVDescriptors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -941,10 +941,28 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
m_Intrinsic<Intrinsic::minimumnum>(m_Value(), m_Value())) ||
match(I, m_Intrinsic<Intrinsic::maximumnum>(m_Value(), m_Value()));
};
if (isIntMinMaxRecurrenceKind(Kind) ||
(HasRequiredFMF() && isFPMinMaxRecurrenceKind(Kind)))
if (isIntMinMaxRecurrenceKind(Kind))
return isMinMaxPattern(I, Kind, Prev);
else if (isFMulAddIntrinsic(I))
if (isFPMinMaxRecurrenceKind(Kind)) {
if (HasRequiredFMF())
return isMinMaxPattern(I, Kind, Prev);
// For FMax/FMin reductions using maxnum/minnum intrinsics with non-NaN
// start value, we may be able to vectorize with extra checks ensuring the
// inputs are not NaN.
auto *StartV = dyn_cast<ConstantFP>(
OrigPhi->getIncomingValueForBlock(L->getLoopPredecessor()));
if (StartV && !StartV->getValue().isNaN() &&
isMinMaxPattern(I, Kind, Prev).isRecurrence()) {
if (((Kind == RecurKind::FMax &&
match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value()))) ||
Kind == RecurKind::FMaxNumNoFMFs))
return InstDesc(I, RecurKind::FMaxNumNoFMFs);
if (((Kind == RecurKind::FMin &&
match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value()))) ||
Kind == RecurKind::FMinNumNoFMFs))
return InstDesc(I, RecurKind::FMinNumNoFMFs);
}
} else if (isFMulAddIntrinsic(I))
return InstDesc(Kind == RecurKind::FMulAdd, I,
I->hasAllowReassoc() ? nullptr : I);
return InstDesc(false, I);
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Transforms/Utils/LoopUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -938,8 +938,10 @@ constexpr Intrinsic::ID llvm::getReductionIntrinsicID(RecurKind RK) {
case RecurKind::UMin:
return Intrinsic::vector_reduce_umin;
case RecurKind::FMax:
case RecurKind::FMaxNumNoFMFs:
return Intrinsic::vector_reduce_fmax;
case RecurKind::FMin:
case RecurKind::FMinNumNoFMFs:
return Intrinsic::vector_reduce_fmin;
case RecurKind::FMaximum:
return Intrinsic::vector_reduce_fmaximum;
Expand Down
14 changes: 12 additions & 2 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4346,8 +4346,15 @@ bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
ElementCount VF) const {
// Cross iteration phis such as reductions need special handling and are
// currently unsupported.
if (any_of(OrigLoop->getHeader()->phis(),
[&](PHINode &Phi) { return Legal->isFixedOrderRecurrence(&Phi); }))
if (any_of(OrigLoop->getHeader()->phis(), [&](PHINode &Phi) {
if (Legal->isReductionVariable(&Phi)) {
RecurKind RK =
Legal->getRecurrenceDescriptor(&Phi).getRecurrenceKind();
return RK == RecurKind::FMinNumNoFMFs ||
RK == RecurKind::FMaxNumNoFMFs;
}
return Legal->isFixedOrderRecurrence(&Phi);
}))
return false;

// Phis with uses outside of the loop require special handling and are
Expand Down Expand Up @@ -8808,6 +8815,9 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(

// Adjust the recipes for any inloop reductions.
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);
if (!VPlanTransforms::runPass(
VPlanTransforms::handleMaxMinNumReductionsWithoutFastMath, *Plan))
return nullptr;

// Transform recipes to abstract recipes if it is legal and beneficial and
// clamp the range for better cost estimation.
Expand Down
16 changes: 9 additions & 7 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1326,20 +1326,22 @@ class LLVM_ABI_FOR_TEST VPWidenRecipe : public VPRecipeWithIRFlags,
unsigned Opcode;

public:
VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
const VPIRFlags &Flags, DebugLoc DL)
: VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL),
Opcode(Opcode) {}

VPWidenRecipe(Instruction &I, ArrayRef<VPValue *> Operands)
: VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), VPIRMetadata(I),
Opcode(I.getOpcode()) {}

VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
const VPIRFlags &Flags, const VPIRMetadata &Metadata,
DebugLoc DL)
: VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL),
VPIRMetadata(Metadata), Opcode(Opcode) {}

~VPWidenRecipe() override = default;

VPWidenRecipe *clone() override {
auto *R = new VPWidenRecipe(*getUnderlyingInstr(), operands());
R->transferFlags(*this);
auto *R =
new VPWidenRecipe(getOpcode(), operands(), *this, *this, getDebugLoc());
R->setUnderlyingValue(getUnderlyingValue());
return R;
}

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
return ResTy;
}
case Instruction::ICmp:
case Instruction::FCmp:
case VPInstruction::ActiveLaneMask:
assert(inferScalarType(R->getOperand(0)) ==
inferScalarType(R->getOperand(1)) &&
Expand Down
108 changes: 108 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -628,3 +628,111 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond,
Term->addMetadata(LLVMContext::MD_prof, BranchWeights);
}
}

static VPValue *getMinMaxCompareValue(VPSingleDefRecipe *MinMaxOp,
VPReductionPHIRecipe *RedPhi) {
auto *RepR = dyn_cast<VPReplicateRecipe>(MinMaxOp);
if (!isa<VPWidenIntrinsicRecipe>(MinMaxOp) &&
!(RepR && (isa<IntrinsicInst>(RepR->getUnderlyingInstr()))))
return nullptr;

if (MinMaxOp->getOperand(0) == RedPhi)
return MinMaxOp->getOperand(1);
return MinMaxOp->getOperand(0);
}

bool VPlanTransforms::handleMaxMinNumReductionsWithoutFastMath(VPlan &Plan) {
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
VPValue *AnyNaN = nullptr;
VPReductionPHIRecipe *RedPhiR = nullptr;
VPRecipeWithIRFlags *MinMaxOp = nullptr;
for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
auto *Cur = dyn_cast<VPReductionPHIRecipe>(&R);
if (!Cur)
continue;
if (RedPhiR)
return false;
if (Cur->getRecurrenceKind() != RecurKind::FMaxNumNoFMFs &&
Cur->getRecurrenceKind() != RecurKind::FMinNumNoFMFs)
continue;

RedPhiR = Cur;

MinMaxOp = dyn_cast<VPRecipeWithIRFlags>(
RedPhiR->getBackedgeValue()->getDefiningRecipe());
if (!MinMaxOp)
return false;
VPValue *In = getMinMaxCompareValue(MinMaxOp, RedPhiR);
if (!In)
return false;

auto *IsNaN =
new VPInstruction(Instruction::FCmp, {In, In}, {CmpInst::FCMP_UNO}, {});
IsNaN->insertBefore(MinMaxOp);
AnyNaN = new VPInstruction(VPInstruction::AnyOf, {IsNaN});
AnyNaN->getDefiningRecipe()->insertAfter(IsNaN);
}

if (!AnyNaN)
return true;

auto *MiddleVPBB = Plan.getMiddleBlock();
auto *RdxResult = cast<VPSingleDefRecipe>(&MiddleVPBB->front());
auto *ScalarPH = Plan.getScalarPreheader();
// Update the resume phis in the scalar preheader. They all must either resume
// from the reduction result or the canonical induction. Bail out if there are
// other resume phis.
for (auto &R : ScalarPH->phis()) {
auto *ResumeR = cast<VPPhi>(&R);
VPValue *VecV = ResumeR->getOperand(0);
VPValue *BypassV = ResumeR->getOperand(ResumeR->getNumOperands() - 1);
if (VecV != RdxResult && VecV != &Plan.getVectorTripCount())
return false;
ResumeR->setOperand(
1, VecV == &Plan.getVectorTripCount() ? Plan.getCanonicalIV() : VecV);
ResumeR->addOperand(BypassV);
}

// Create a new reduction phi recipe with either FMin/FMax, replacing
// FMinNumNoFMFs/FMaxNumNoFMFs.
RecurKind NewRK = RedPhiR->getRecurrenceKind() != RecurKind::FMinNumNoFMFs
? RecurKind::FMin
: RecurKind::FMax;
auto *NewRedPhiR = new VPReductionPHIRecipe(
cast<PHINode>(RedPhiR->getUnderlyingValue()), NewRK,
*RedPhiR->getStartValue(), RedPhiR->isInLoop(), RedPhiR->isOrdered());
NewRedPhiR->addOperand(RedPhiR->getOperand(1));
NewRedPhiR->insertBefore(RedPhiR);
RedPhiR->replaceAllUsesWith(NewRedPhiR);
RedPhiR->eraseFromParent();

// Update the loop exit condition to exit if either any of the inputs is NaN
// or the vector trip count is reached.
VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
VPBuilder Builder(LatchVPBB->getTerminator());
auto *LatchExitingBranch = cast<VPInstruction>(LatchVPBB->getTerminator());
assert(LatchExitingBranch->getOpcode() == VPInstruction::BranchOnCount &&
"Unexpected terminator");
auto *IsLatchExitTaken =
Builder.createICmp(CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
LatchExitingBranch->getOperand(1));
auto *AnyExitTaken =
Builder.createNaryOp(Instruction::Or, {AnyNaN, IsLatchExitTaken});
Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
LatchExitingBranch->eraseFromParent();

// Split the middle block and introduce a new block, branching to the scalar
// preheader to resume iteration in the scalar loop if any NaNs have been
// encountered.
MiddleVPBB->splitAt(std::prev(MiddleVPBB->end()));
Builder.setInsertPoint(MiddleVPBB, MiddleVPBB->begin());
auto *NewSel =
Builder.createSelect(AnyNaN, NewRedPhiR, RdxResult->getOperand(1));
RdxResult->setOperand(1, NewSel);
Builder.setInsertPoint(MiddleVPBB);
Builder.createNaryOp(VPInstruction::BranchOnCond, AnyNaN);
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
MiddleVPBB->swapSuccessors();
std::swap(ScalarPH->getPredecessors()[1], ScalarPH->getPredecessors().back());
return true;
}
15 changes: 11 additions & 4 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
Value *Op = State.get(getOperand(0), vputils::onlyFirstLaneUsed(this));
return Builder.CreateFreeze(Op, Name);
}
case Instruction::FCmp:
case Instruction::ICmp: {
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
Expand All @@ -595,8 +596,11 @@ Value *VPInstruction::generate(VPTransformState &State) {
llvm_unreachable("should be handled by VPPhi::execute");
}
case Instruction::Select: {
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
Value *Cond = State.get(getOperand(0), OnlyFirstLaneUsed);
bool OnlyFirstLaneUsed =
State.VF.isScalar() || vputils::onlyFirstLaneUsed(this);
Value *Cond =
State.get(getOperand(0),
OnlyFirstLaneUsed || vputils::isSingleScalar(getOperand(0)));
Value *Op1 = State.get(getOperand(1), OnlyFirstLaneUsed);
Value *Op2 = State.get(getOperand(2), OnlyFirstLaneUsed);
return Builder.CreateSelect(Cond, Op1, Op2, Name);
Expand Down Expand Up @@ -858,7 +862,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
Value *Res = State.get(getOperand(0));
for (VPValue *Op : drop_begin(operands()))
Res = Builder.CreateOr(Res, State.get(Op));
return Builder.CreateOrReduce(Res);
return State.VF.isScalar() ? Res : Builder.CreateOrReduce(Res);
}
case VPInstruction::FirstActiveLane: {
if (getNumOperands() == 1) {
Expand Down Expand Up @@ -1031,6 +1035,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
switch (getOpcode()) {
case Instruction::ExtractElement:
case Instruction::Freeze:
case Instruction::FCmp:
case Instruction::ICmp:
case Instruction::Select:
case VPInstruction::AnyOf:
Expand Down Expand Up @@ -1066,6 +1071,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
return Op == getOperand(1);
case Instruction::PHI:
return true;
case Instruction::FCmp:
case Instruction::ICmp:
case Instruction::Select:
case Instruction::Or:
Expand Down Expand Up @@ -1098,6 +1104,7 @@ bool VPInstruction::onlyFirstPartUsed(const VPValue *Op) const {
switch (getOpcode()) {
default:
return false;
case Instruction::FCmp:
case Instruction::ICmp:
case Instruction::Select:
return vputils::onlyFirstPartUsed(this);
Expand Down Expand Up @@ -1782,7 +1789,7 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {
return Opcode == Instruction::ZExt;
break;
case OperationType::Cmp:
return Opcode == Instruction::ICmp;
return Opcode == Instruction::FCmp || Opcode == Instruction::ICmp;
case OperationType::Other:
return true;
}
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ struct VPlanTransforms {
/// not valid.
static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder);

/// Check if \p Plan contains any FMaxNumNoFMFs or FMinNumNoFMFs reductions.
/// If they do, try to update the vector loop to exit early if any input is
/// NaN and resume executing in the scalar loop to handle the NaNs there.
static bool handleMaxMinNumReductionsWithoutFastMath(VPlan &Plan);

/// Clear NSW/NUW flags from reduction instructions if necessary.
static void clearReductionWrapFlags(VPlan &Plan);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,18 +42,56 @@ define float @fmaxnum(ptr %src, i64 %n) {
; CHECK-LABEL: define float @fmaxnum(
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ <float -1.000000e+07, float 0xFFF8000000000000, float 0xFFF8000000000000, float 0xFFF8000000000000>, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float 0xFFF8000000000000), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i32 4
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP3]], [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP7]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]])
; CHECK-NEXT: [[TMP8]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI1]], <4 x float> [[WIDE_LOAD2]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP6]], [[TMP9]]
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP6]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]]
; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP6]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP8]]
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp olt <4 x float> [[TMP11]], [[TMP12]]
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP11]], <4 x float> [[TMP12]]
; CHECK-NEXT: [[TMP13:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[RDX_MINMAX_SELECT]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP6]], label %[[SCALAR_PH]], label %[[MIDDLE_BLOCK_SPLIT:.*]]
; CHECK: [[MIDDLE_BLOCK_SPLIT]]:
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK_SPLIT]] ], [ [[IV]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP13]], %[[MIDDLE_BLOCK_SPLIT]] ], [ [[TMP13]], %[[MIDDLE_BLOCK]] ], [ -1.000000e+07, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[MAX:%.*]] = phi float [ -1.000000e+07, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]]
; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[MAX:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP_SRC1:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV1]]
; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC1]], align 4
; CHECK-NEXT: [[MAX_NEXT]] = call float @llvm.maxnum.f32(float [[MAX]], float [[L]])
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ]
; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi float [ [[MAX_NEXT]], %[[LOOP]] ], [ [[TMP13]], %[[MIDDLE_BLOCK_SPLIT]] ]
; CHECK-NEXT: ret float [[MAX_NEXT_LCSSA]]
;
entry:
Expand Down
Loading
Loading