diff --git a/llvm/lib/Analysis/GuardUtils.cpp b/llvm/lib/Analysis/GuardUtils.cpp --- a/llvm/lib/Analysis/GuardUtils.cpp +++ b/llvm/lib/Analysis/GuardUtils.cpp @@ -13,9 +13,9 @@ #include "llvm/IR/PatternMatch.h" using namespace llvm; +using namespace llvm::PatternMatch; bool llvm::isGuard(const User *U) { - using namespace llvm::PatternMatch; return match(U, m_Intrinsic()); } @@ -32,7 +32,6 @@ if (!parseWidenableBranch(U, Condition, WidenableCondition, GuardedBB, DeoptBB)) return false; - using namespace llvm::PatternMatch; for (auto &Insn : *DeoptBB) { if (match(&Insn, m_Intrinsic())) return true; @@ -45,17 +44,32 @@ bool llvm::parseWidenableBranch(const User *U, Value *&Condition, Value *&WidenableCondition, BasicBlock *&IfTrueBB, BasicBlock *&IfFalseBB) { - using namespace llvm::PatternMatch; + if (match(U, m_Br(m_Intrinsic(), + IfTrueBB, IfFalseBB)) && + cast(U)->getCondition()->hasOneUse()) { + WidenableCondition = cast(U)->getCondition(); + Condition = ConstantInt::getTrue(IfTrueBB->getContext()); + return true; + } + + // Check for two cases: + // 1) br (i1 (and A, WC())), label %IfTrue, label %IfFalse + // 2) br (i1 (and WC(), B)), label %IfTrue, label %IfFalse + // We do not check for more generalized and trees as we should canonicalize + // to the form above in instcombine. (TODO) if (!match(U, m_Br(m_And(m_Value(Condition), m_Value(WidenableCondition)), IfTrueBB, IfFalseBB))) return false; + if (!match(WidenableCondition, + m_Intrinsic())) { + if (!match(Condition, + m_Intrinsic())) + return false; + std::swap(Condition, WidenableCondition); + } + // For the branch to be (easily) widenable, it must not correlate with other // branches. Thus, the widenable condition must have a single use. - if (!WidenableCondition->hasOneUse() || - !cast(U)->getCondition()->hasOneUse()) - return false; - // TODO: At the moment, we only recognize the branch if the WC call in this - // specific position. We should generalize! - return match(WidenableCondition, - m_Intrinsic()); + return (WidenableCondition->hasOneUse() && + cast(U)->getCondition()->hasOneUse()); } diff --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp --- a/llvm/lib/Transforms/Scalar/GuardWidening.cpp +++ b/llvm/lib/Transforms/Scalar/GuardWidening.cpp @@ -84,15 +84,16 @@ "Bad guard intrinsic?"); return GI->getArgOperand(0); } - if (isGuardAsWidenableBranch(I)) { - auto *Cond = cast(I)->getCondition(); - return cast(Cond)->getOperand(0); - } + Value *Cond, *WC; + BasicBlock *IfTrueBB, *IfFalseBB; + if (parseWidenableBranch(I, Cond, WC, IfTrueBB, IfFalseBB)) + return Cond; + return cast(I)->getCondition(); } // Set the condition for \p I to \p NewCond. \p I can either be a guard or a -// conditional branch. +// conditional branch. static void setCondition(Instruction *I, Value *NewCond) { if (IntrinsicInst *GI = dyn_cast(I)) { assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && diff --git a/llvm/lib/Transforms/Utils/GuardUtils.cpp b/llvm/lib/Transforms/Utils/GuardUtils.cpp --- a/llvm/lib/Transforms/Utils/GuardUtils.cpp +++ b/llvm/lib/Transforms/Utils/GuardUtils.cpp @@ -15,10 +15,12 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; +using namespace llvm::PatternMatch; static cl::opt PredicatePassBranchWeight( "guards-predicate-pass-branch-weight", cl::Hidden, cl::init(1 << 20), @@ -80,23 +82,49 @@ void llvm::widenWidenableBranch(BranchInst *WidenableBR, Value *NewCond) { assert(isWidenableBranch(WidenableBR) && "precondition"); - Instruction *WCAnd = cast(WidenableBR->getCondition()); - // Condition is only guaranteed to dominate branch - WCAnd->moveBefore(WidenableBR); - Value *OldCond = WCAnd->getOperand(0); - IRBuilder<> B(WCAnd); - WCAnd->setOperand(0, B.CreateAnd(NewCond, OldCond)); + // The tempting trivially option is to produce something like this: + // br (and oldcond, newcond) where oldcond is assumed to contain a widenable + // condition, but that doesn't match the pattern parseWidenableBranch expects + // so we have to be more sophisticated. + if (match(WidenableBR->getCondition(), + m_Intrinsic())) { + IRBuilder<> B(WidenableBR); + WidenableBR->setCondition(B.CreateAnd(NewCond, + WidenableBR->getCondition())); + } else { + Instruction *WCAnd = cast(WidenableBR->getCondition()); + // Condition is only guaranteed to dominate branch + WCAnd->moveBefore(WidenableBR); + IRBuilder<> B(WCAnd); + const bool Op0IsWC = + match(WCAnd->getOperand(0), + m_Intrinsic()); + const unsigned CondOpIdx = Op0IsWC ? 1 : 0; + Value *OldCond = WCAnd->getOperand(CondOpIdx); + NewCond = B.CreateAnd(NewCond, OldCond); + WCAnd->setOperand(CondOpIdx, NewCond); + } assert(isWidenableBranch(WidenableBR) && "preserve widenabiliy"); } void llvm::setWidenableBranchCond(BranchInst *WidenableBR, Value *NewCond) { assert(isWidenableBranch(WidenableBR) && "precondition"); - Instruction *WCAnd = cast(WidenableBR->getCondition()); - // Condition is only guaranteed to dominate branch - WCAnd->moveBefore(WidenableBR); - WCAnd->setOperand(0, NewCond); - + if (match(WidenableBR->getCondition(), + m_Intrinsic())) { + IRBuilder<> B(WidenableBR); + WidenableBR->setCondition(B.CreateAnd(NewCond, + WidenableBR->getCondition())); + } else { + Instruction *WCAnd = cast(WidenableBR->getCondition()); + // Condition is only guaranteed to dominate branch + WCAnd->moveBefore(WidenableBR); + const bool Op0IsWC = + match(WCAnd->getOperand(0), + m_Intrinsic()); + const unsigned CondOpIdx = Op0IsWC ? 1 : 0; + WCAnd->setOperand(CondOpIdx, NewCond); + } assert(isWidenableBranch(WidenableBR) && "preserve widenabiliy"); } diff --git a/llvm/test/Transforms/GuardWidening/basic_widenable_condition_guards.ll b/llvm/test/Transforms/GuardWidening/basic_widenable_condition_guards.ll --- a/llvm/test/Transforms/GuardWidening/basic_widenable_condition_guards.ll +++ b/llvm/test/Transforms/GuardWidening/basic_widenable_condition_guards.ll @@ -1021,6 +1021,91 @@ ret void } + +define void @swapped_wb(i1 %cond_0, i1 %cond_1) { +; CHECK-LABEL: @swapped_wb( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[COND_1:%.*]] +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDENABLE_COND]], [[WIDE_CHK]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] +; CHECK-NEXT: ret void +; CHECK: guarded: +; CHECK-NEXT: [[WIDENABLE_COND3:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND4:%.*]] = and i1 [[COND_1]], [[WIDENABLE_COND3]] +; CHECK-NEXT: br i1 true, label [[GUARDED1:%.*]], label [[DEOPT2:%.*]], !prof !0 +; CHECK: deopt2: +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] +; CHECK-NEXT: ret void +; CHECK: guarded1: +; CHECK-NEXT: ret void +; +entry: + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %widenable_cond, %cond_0 + br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %entry + call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] + ret void + +guarded: ; preds = %entry + %widenable_cond3 = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond4 = and i1 %cond_1, %widenable_cond3 + br i1 %exiplicit_guard_cond4, label %guarded1, label %deopt2, !prof !0 + +deopt2: ; preds = %guarded + call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] + ret void + +guarded1: ; preds = %guarded + ret void +} + +define void @trivial_wb(i1 %cond_0) { +; CHECK-LABEL: @trivial_wb( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 true, [[COND_0:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[TMP0]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] +; CHECK-NEXT: ret void +; CHECK: guarded: +; CHECK-NEXT: [[WIDENABLE_COND3:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND4:%.*]] = and i1 [[COND_0]], [[WIDENABLE_COND3]] +; CHECK-NEXT: br i1 true, label [[GUARDED1:%.*]], label [[DEOPT2:%.*]], !prof !0 +; CHECK: deopt2: +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] +; CHECK-NEXT: ret void +; CHECK: guarded1: +; CHECK-NEXT: ret void +; +entry: + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + br i1 %widenable_cond, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %entry + call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] + ret void + +guarded: ; preds = %entry + %widenable_cond3 = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond4 = and i1 %cond_0, %widenable_cond3 + br i1 %exiplicit_guard_cond4, label %guarded1, label %deopt2, !prof !0 + +deopt2: ; preds = %guarded + call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] + ret void + +guarded1: ; preds = %guarded + ret void +} + + declare void @llvm.experimental.deoptimize.isVoid(...) ; Function Attrs: inaccessiblememonly nounwind diff --git a/llvm/test/Transforms/LoopPredication/basic_widenable_branch_guards.ll b/llvm/test/Transforms/LoopPredication/basic_widenable_branch_guards.ll --- a/llvm/test/Transforms/LoopPredication/basic_widenable_branch_guards.ll +++ b/llvm/test/Transforms/LoopPredication/basic_widenable_branch_guards.ll @@ -1924,6 +1924,72 @@ ret i32 0 } +define i32 @swapped_wb(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @swapped_wb( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i32 [[N]], [[LENGTH:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 0, [[LENGTH]] +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[WIDENABLE_COND]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: [[DEOPTCALL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32(i32 9) [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTCALL]] +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]], !prof !1 +; CHECK: exit.loopexit: +; CHECK-NEXT: [[LOOP_ACC_NEXT_LCSSA:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_ACC_NEXT_LCSSA]], [[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +entry: + %tmp5 = icmp eq i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +loop.preheader: ; preds = %entry + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + br label %loop + +loop: ; preds = %guarded, %loop.preheader + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + %within.bounds = icmp ult i32 %i, %length + %exiplicit_guard_cond = and i1 %widenable_cond, %within.bounds + br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0 + +deopt: ; preds = %loop + %deoptcall = call i32 (...) @llvm.experimental.deoptimize.i32(i32 9) [ "deopt"() ] + ret i32 %deoptcall + +guarded: ; preds = %loop + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit, !prof !2 + +exit: ; preds = %guarded, %entry + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %guarded ] + ret i32 %result +} declare i32 @llvm.experimental.deoptimize.i32(...) diff --git a/llvm/test/Transforms/LoopPredication/predicate-exits.ll b/llvm/test/Transforms/LoopPredication/predicate-exits.ll --- a/llvm/test/Transforms/LoopPredication/predicate-exits.ll +++ b/llvm/test/Transforms/LoopPredication/predicate-exits.ll @@ -762,6 +762,166 @@ ret i32 %result } +define i32 @swapped_wb(i32* %array, i32 %length, i32 %n, i1 %cond_0) { +; CHECK-LABEL: @swapped_wb( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[COND_0:%.*]] +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDENABLE_COND]], [[TMP5]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH]] +; CHECK-NEXT: br i1 true, label [[GUARDED]], label [[DEOPT2:%.*]], !prof !0 +; CHECK: deopt2: +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET2]] +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +entry: + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %widenable_cond, %cond_0 + br i1 %exiplicit_guard_cond, label %loop.preheader, label %deopt, !prof !0 + +deopt: + %deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret + +loop.preheader: + br label %loop + +loop: + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + call void @unknown() + %within.bounds = icmp ult i32 %i, %length + br i1 %within.bounds, label %guarded, label %deopt2, !prof !0 + +deopt2: + call void @unknown() + %deoptret2 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret2 + +guarded: + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + store i32 0, i32* %array.i.ptr + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + +define i32 @trivial_wb(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @trivial_wb( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[N:%.*]], 1 +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[LENGTH:%.*]], [[TMP1]] +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i32 [[LENGTH]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[TMP3]] +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[TMP5]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH]] +; CHECK-NEXT: br i1 true, label [[GUARDED]], label [[DEOPT2:%.*]], !prof !0 +; CHECK: deopt2: +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET2]] +; CHECK: guarded: +; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] +; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 +; CHECK-NEXT: store i32 0, i32* [[ARRAY_I_PTR]] +; CHECK-NEXT: [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]] +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ] +; CHECK-NEXT: ret i32 [[RESULT]] +; +entry: + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + br i1 %widenable_cond, label %loop.preheader, label %deopt, !prof !0 + +deopt: + %deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret + +loop.preheader: + br label %loop + +loop: + %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ] + call void @unknown() + %within.bounds = icmp ult i32 %i, %length + br i1 %within.bounds, label %guarded, label %deopt2, !prof !0 + +deopt2: + call void @unknown() + %deoptret2 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret2 + +guarded: + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + store i32 0, i32* %array.i.ptr + %loop.acc.next = add i32 %loop.acc, %array.i + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ %loop.acc.next, %guarded ] + ret i32 %result +} + ; TODO: Non-latch exits can still be predicated ; This is currently prevented by an overly restrictive profitability check. define i32 @todo_unconditional_latch(i32* %array, i32 %length, i1 %cond_0) { diff --git a/llvm/test/Transforms/SimplifyCFG/wc-widen-block.ll b/llvm/test/Transforms/SimplifyCFG/wc-widen-block.ll --- a/llvm/test/Transforms/SimplifyCFG/wc-widen-block.ll +++ b/llvm/test/Transforms/SimplifyCFG/wc-widen-block.ll @@ -363,6 +363,81 @@ ret i32 0 } +define i32 @trivial_wb(i1 %cond_0, i32* %p) { +; CHECK-LABEL: @trivial_wb( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: br i1 [[WIDENABLE_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET]] +; CHECK: guarded: +; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]] +; CHECK-NEXT: [[COND_1:%.*]] = icmp eq i32 [[V]], 0 +; CHECK-NEXT: br i1 [[COND_1]], label [[RETURN:%.*]], label [[DEOPT]], !prof !0 +; CHECK: return: +; CHECK-NEXT: ret i32 0 +; +entry: + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + br i1 %widenable_cond, label %guarded, label %deopt, !prof !0 + +deopt: + %deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret + +guarded: + %v = load i32, i32* %p + %cond_1 = icmp eq i32 %v, 0 + br i1 %cond_1, label %return, label %deopt2, !prof !0 + +deopt2: + %deoptret2 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret2 + +return: + ret i32 0 +} + + +define i32 @swapped_wb(i1 %cond_0, i32* %p) { +; CHECK-LABEL: @swapped_wb( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDENABLE_COND]], [[COND_0:%.*]] +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK: deopt: +; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] +; CHECK-NEXT: ret i32 [[DEOPTRET]] +; CHECK: guarded: +; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]] +; CHECK-NEXT: [[COND_1:%.*]] = icmp eq i32 [[V]], 0 +; CHECK-NEXT: br i1 [[COND_1]], label [[RETURN:%.*]], label [[DEOPT]], !prof !0 +; CHECK: return: +; CHECK-NEXT: ret i32 0 +; +entry: + %widenable_cond = call i1 @llvm.experimental.widenable.condition() + %exiplicit_guard_cond = and i1 %widenable_cond, %cond_0 + br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0 + +deopt: + %deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret + +guarded: + %v = load i32, i32* %p + %cond_1 = icmp eq i32 %v, 0 + br i1 %cond_1, label %return, label %deopt2, !prof !0 + +deopt2: + %deoptret2 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] + ret i32 %deoptret2 + +return: + ret i32 0 +} +