Skip to content

Commit

Permalink
[InstCombine] Replace an integer comparison of a phi node with mult…
Browse files Browse the repository at this point in the history
…iple `ucmp`/`scmp` operands and a constant with `phi` of individual comparisons of original intrinsic's arguments (#107769)

When we have a `phi` instruction with more than one of its incoming
values being a call to `ucmp` or `scmp`, which is then compared with an
integer constant, we can move the comparison through the `phi` into the
incoming basic blocks because we know that a comparison of `ucmp`/`scmp`
with a constant will be simplified by the next iteration of InstCombine.

There's a high chance that other similar patterns can be identified, in
which case they can be easily handled by the same code by moving the
check for "simplifiable" instructions into a lambda.
  • Loading branch information
Poseydon42 committed Sep 13, 2024
1 parent 29e5fe7 commit 21e3a21
Show file tree
Hide file tree
Showing 2 changed files with 185 additions and 38 deletions.
88 changes: 50 additions & 38 deletions llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1809,8 +1809,8 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
// Check to see whether the instruction can be folded into each phi operand.
// If there is one operand that does not fold, remember the BB it is in.
SmallVector<Value *> NewPhiValues;
BasicBlock *NonSimplifiedBB = nullptr;
Value *NonSimplifiedInVal = nullptr;
SmallVector<unsigned int> OpsToMoveUseToIncomingBB;
bool SeenNonSimplifiedInVal = false;
for (unsigned i = 0; i != NumPHIValues; ++i) {
Value *InVal = PN->getIncomingValue(i);
BasicBlock *InBB = PN->getIncomingBlock(i);
Expand All @@ -1820,35 +1820,64 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
continue;
}

if (NonSimplifiedBB) return nullptr; // More than one non-simplified value.
// If the only use of phi is comparing it with a constant then we can
// put this comparison in the incoming BB directly after a ucmp/scmp call
// because we know that it will simplify to a single icmp.
// NOTE: the single-use check here is not only to ensure that the
// optimization is profitable, but also to avoid creating a potentially
// invalid phi node when we have a multi-edge in the CFG.
const APInt *Ignored;
if (isa<CmpIntrinsic>(InVal) && InVal->hasOneUse() &&
match(&I, m_ICmp(m_Specific(PN), m_APInt(Ignored)))) {
OpsToMoveUseToIncomingBB.push_back(i);
NewPhiValues.push_back(nullptr);
continue;
}

if (SeenNonSimplifiedInVal)
return nullptr; // More than one non-simplified value.
SeenNonSimplifiedInVal = true;

// If there is exactly one non-simplified value, we can insert a copy of the
// operation in that block. However, if this is a critical edge, we would
// be inserting the computation on some other paths (e.g. inside a loop).
// Only do this if the pred block is unconditionally branching into the phi
// block. Also, make sure that the pred block is not dead code.
BranchInst *BI = dyn_cast<BranchInst>(InBB->getTerminator());
if (!BI || !BI->isUnconditional() || !DT.isReachableFromEntry(InBB))
return nullptr;

NonSimplifiedBB = InBB;
NonSimplifiedInVal = InVal;
NewPhiValues.push_back(nullptr);
OpsToMoveUseToIncomingBB.push_back(i);

// If the InVal is an invoke at the end of the pred block, then we can't
// insert a computation after it without breaking the edge.
if (isa<InvokeInst>(InVal))
if (cast<Instruction>(InVal)->getParent() == NonSimplifiedBB)
if (cast<Instruction>(InVal)->getParent() == InBB)
return nullptr;

// Do not push the operation across a loop backedge. This could result in
// an infinite combine loop, and is generally non-profitable (especially
// if the operation was originally outside the loop).
if (isBackEdge(NonSimplifiedBB, PN->getParent()))
if (isBackEdge(InBB, PN->getParent()))
return nullptr;
}

// If there is exactly one non-simplified value, we can insert a copy of the
// operation in that block. However, if this is a critical edge, we would be
// inserting the computation on some other paths (e.g. inside a loop). Only
// do this if the pred block is unconditionally branching into the phi block.
// Also, make sure that the pred block is not dead code.
if (NonSimplifiedBB != nullptr) {
BranchInst *BI = dyn_cast<BranchInst>(NonSimplifiedBB->getTerminator());
if (!BI || !BI->isUnconditional() ||
!DT.isReachableFromEntry(NonSimplifiedBB))
return nullptr;
// Clone the instruction that uses the phi node and move it into the incoming
// BB because we know that the next iteration of InstCombine will simplify it.
for (auto OpIndex : OpsToMoveUseToIncomingBB) {
Value *Op = PN->getIncomingValue(OpIndex);
BasicBlock *OpBB = PN->getIncomingBlock(OpIndex);

Instruction *Clone = I.clone();
for (Use &U : Clone->operands()) {
if (U == PN)
U = Op;
else
U = U->DoPHITranslation(PN->getParent(), OpBB);
}
Clone = InsertNewInstBefore(Clone, OpBB->getTerminator()->getIterator());
NewPhiValues[OpIndex] = Clone;
}

// Okay, we can do the transformation: create the new PHI node.
Expand All @@ -1857,30 +1886,13 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
NewPN->takeName(PN);
NewPN->setDebugLoc(PN->getDebugLoc());

// If we are going to have to insert a new computation, do so right before the
// predecessor's terminator.
Instruction *Clone = nullptr;
if (NonSimplifiedBB) {
Clone = I.clone();
for (Use &U : Clone->operands()) {
if (U == PN)
U = NonSimplifiedInVal;
else
U = U->DoPHITranslation(PN->getParent(), NonSimplifiedBB);
}
InsertNewInstBefore(Clone, NonSimplifiedBB->getTerminator()->getIterator());
}

for (unsigned i = 0; i != NumPHIValues; ++i) {
if (NewPhiValues[i])
NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));
else
NewPN->addIncoming(Clone, PN->getIncomingBlock(i));
}
for (unsigned i = 0; i != NumPHIValues; ++i)
NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i));

for (User *U : make_early_inc_range(PN->users())) {
Instruction *User = cast<Instruction>(U);
if (User == &I) continue;
if (User == &I)
continue;
replaceInstUsesWith(*User, NewPN);
eraseInstFromFunction(*User);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -passes=instcombine -S | FileCheck %s

declare void @use(i8 %value);

; Since we know that any comparison of ucmp/scmp with a constant will result in
; a comparison of ucmp/scmp's operands, we can propagate such a comparison
; through the phi node and let the next iteration of instcombine simplify it.
define i1 @icmp_of_phi_of_scmp_with_constant(i1 %c, i16 %x, i16 %y)
; CHECK-LABEL: define i1 @icmp_of_phi_of_scmp_with_constant(
; CHECK-SAME: i1 [[C:%.*]], i16 [[X:%.*]], i16 [[Y:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br i1 [[C]], label %[[TRUE:.*]], label %[[FALSE:.*]]
; CHECK: [[TRUE]]:
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i16 [[X]], [[Y]]
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[FALSE]]:
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i16 [[Y]], [[X]]
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[R:%.*]] = phi i1 [ [[TMP0]], %[[TRUE]] ], [ [[TMP1]], %[[FALSE]] ]
; CHECK-NEXT: ret i1 [[R]]
;
{
entry:
br i1 %c, label %true, label %false
true:
%cmp1 = call i8 @llvm.scmp(i16 %x, i16 %y)
br label %exit
false:
%cmp2 = call i8 @llvm.scmp(i16 %y, i16 %x)
br label %exit
exit:
%phi = phi i8 [%cmp1, %true], [%cmp2, %false]
%r = icmp slt i8 %phi, 0
ret i1 %r
}

; When one of the incoming values is ucmp/scmp and the other is not we can still perform the transformation
define i1 @icmp_of_phi_of_one_scmp_with_constant(i1 %c, i16 %x, i16 %y, i8 %false_val)
; CHECK-LABEL: define i1 @icmp_of_phi_of_one_scmp_with_constant(
; CHECK-SAME: i1 [[C:%.*]], i16 [[X:%.*]], i16 [[Y:%.*]], i8 [[FALSE_VAL:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br i1 [[C]], label %[[TRUE:.*]], label %[[FALSE:.*]]
; CHECK: [[TRUE]]:
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i16 [[X]], [[Y]]
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[FALSE]]:
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i8 [[FALSE_VAL]], 0
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[PHI:%.*]] = phi i1 [ [[TMP0]], %[[TRUE]] ], [ [[TMP1]], %[[FALSE]] ]
; CHECK-NEXT: ret i1 [[PHI]]
;
{
entry:
br i1 %c, label %true, label %false
true:
%cmp1 = call i8 @llvm.scmp(i16 %x, i16 %y)
br label %exit
false:
br label %exit
exit:
%phi = phi i8 [%cmp1, %true], [%false_val, %false]
%r = icmp slt i8 %phi, 0
ret i1 %r
}

; Negative test: the RHS of comparison that uses the phi node is not constant
define i1 @icmp_of_phi_of_scmp_with_non_constant(i1 %c, i16 %x, i16 %y, i8 %cmp)
; CHECK-LABEL: define i1 @icmp_of_phi_of_scmp_with_non_constant(
; CHECK-SAME: i1 [[C:%.*]], i16 [[X:%.*]], i16 [[Y:%.*]], i8 [[CMP:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br i1 [[C]], label %[[TRUE:.*]], label %[[FALSE:.*]]
; CHECK: [[TRUE]]:
; CHECK-NEXT: [[CMP1:%.*]] = call i8 @llvm.scmp.i8.i16(i16 [[X]], i16 [[Y]])
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[FALSE]]:
; CHECK-NEXT: [[CMP2:%.*]] = call i8 @llvm.scmp.i8.i16(i16 [[Y]], i16 [[X]])
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[PHI:%.*]] = phi i8 [ [[CMP1]], %[[TRUE]] ], [ [[CMP2]], %[[FALSE]] ]
; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[PHI]], [[CMP]]
; CHECK-NEXT: ret i1 [[R]]
;
{
entry:
br i1 %c, label %true, label %false
true:
%cmp1 = call i8 @llvm.scmp(i16 %x, i16 %y)
br label %exit
false:
%cmp2 = call i8 @llvm.scmp(i16 %y, i16 %x)
br label %exit
exit:
%phi = phi i8 [%cmp1, %true], [%cmp2, %false]
%r = icmp slt i8 %phi, %cmp
ret i1 %r
}

; Negative test: more than one incoming value of the phi node is not one-use
define i1 @icmp_of_phi_of_scmp_with_constant_not_one_use(i1 %c, i16 %x, i16 %y)
; CHECK-LABEL: define i1 @icmp_of_phi_of_scmp_with_constant_not_one_use(
; CHECK-SAME: i1 [[C:%.*]], i16 [[X:%.*]], i16 [[Y:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br i1 [[C]], label %[[TRUE:.*]], label %[[FALSE:.*]]
; CHECK: [[TRUE]]:
; CHECK-NEXT: [[CMP1:%.*]] = call i8 @llvm.scmp.i8.i16(i16 [[X]], i16 [[Y]])
; CHECK-NEXT: call void @use(i8 [[CMP1]])
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[FALSE]]:
; CHECK-NEXT: [[CMP2:%.*]] = call i8 @llvm.scmp.i8.i16(i16 [[Y]], i16 [[X]])
; CHECK-NEXT: call void @use(i8 [[CMP2]])
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[PHI:%.*]] = phi i8 [ [[CMP1]], %[[TRUE]] ], [ [[CMP2]], %[[FALSE]] ]
; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[PHI]], 0
; CHECK-NEXT: ret i1 [[R]]
;
{
entry:
br i1 %c, label %true, label %false
true:
%cmp1 = call i8 @llvm.scmp(i16 %x, i16 %y)
call void @use(i8 %cmp1)
br label %exit
false:
%cmp2 = call i8 @llvm.scmp(i16 %y, i16 %x)
call void @use(i8 %cmp2)
br label %exit
exit:
%phi = phi i8 [%cmp1, %true], [%cmp2, %false]
%r = icmp slt i8 %phi, 0
ret i1 %r
}

0 comments on commit 21e3a21

Please sign in to comment.