Index: lib/Transforms/Scalar/GuardWidening.cpp =================================================================== --- lib/Transforms/Scalar/GuardWidening.cpp +++ lib/Transforms/Scalar/GuardWidening.cpp @@ -108,7 +108,7 @@ bool eliminateGuardViaWidening( Instruction *Guard, const df_iterator &DFSI, const DenseMap> & - GuardsPerBlock); + GuardsPerBlock, bool InvertCondition = false); // Get the condition from \p GuardInst. Value *getGuardCondition(Instruction *GuardInst); @@ -164,13 +164,14 @@ void makeAvailableAt(Value *V, Instruction *InsertPos); /// Common helper used by \c widenGuard and \c isWideningCondProfitable. Try - /// to generate an expression computing the logical AND of \p Cond0 and \p - /// Cond1. Return true if the expression computing the AND is only as + /// to generate an expression computing the logical AND of \p Cond0 and (\p + /// Cond1 XOR \p InvertCondition). + /// Return true if the expression computing the AND is only as /// expensive as computing one of the two. If \p InsertPt is true then /// actually generate the resulting expression, make it available at \p /// InsertPt and return it in \p Result (else no change to the IR is made). bool widenCondCommon(Value *Cond0, Value *Cond1, Instruction *InsertPt, - Value *&Result); + Value *&Result, bool InvertCondition); /// Represents a range check of the form \c Base + \c Offset u< \c Length, /// with the constraint that \c Length is not negative. \c CheckInst is the @@ -233,14 +234,18 @@ /// computing only one of the two expressions? bool isWideningCondProfitable(Value *Cond0, Value *Cond1) { Value *ResultUnused; - return widenCondCommon(Cond0, Cond1, /*InsertPt=*/nullptr, ResultUnused); + return widenCondCommon(Cond0, Cond1, /*InsertPt=*/nullptr, ResultUnused, + /*InvertCond*/false); } - /// Widen \p ToWiden to fail if \p NewCondition is false (in addition to - /// whatever it is already checking). - void widenGuard(Instruction *ToWiden, Value *NewCondition) { + /// If \p InvertCondition is false, Widen \p ToWiden to fail if + /// \p NewCondition is false, otherwise make it fail if \p NewCondition is + /// true (in addition to whatever it is already checking). + void widenGuard(Instruction *ToWiden, Value *NewCondition, + bool InvertCondition) { Value *Result; - widenCondCommon(ToWiden->getOperand(0), NewCondition, ToWiden, Result); + widenCondCommon(ToWiden->getOperand(0), NewCondition, ToWiden, Result, + InvertCondition); setGuardCondition(ToWiden, Result); } @@ -283,9 +288,15 @@ Changed |= eliminateGuardViaWidening(II, DFI, GuardsInBlock); if (WidenFrequentBranches && BPI) if (auto *BI = dyn_cast(BB->getTerminator())) - if (BI->isConditional() && - BPI->getEdgeProbability(BB, 0U) >= *LikelyTaken) - Changed |= eliminateGuardViaWidening(BI, DFI, GuardsInBlock); + if (BI->isConditional()) { + // If one of branches of a conditional is likely taken, try to + // eliminate it. + if (BPI->getEdgeProbability(BB, 0U) >= *LikelyTaken) + Changed |= eliminateGuardViaWidening(BI, DFI, GuardsInBlock); + else if (BPI->getEdgeProbability(BB, 1U) >= *LikelyTaken) + Changed |= eliminateGuardViaWidening(BI, DFI, GuardsInBlock, + /*InvertCondition*/true); + } } assert(EliminatedGuards.empty() || Changed); @@ -299,7 +310,7 @@ bool GuardWideningImpl::eliminateGuardViaWidening( Instruction *GuardInst, const df_iterator &DFSI, const DenseMap> & - GuardsInBlock) { + GuardsInBlock, bool InvertCondition) { Instruction *BestSoFar = nullptr; auto BestScoreSoFar = WS_IllegalOrNegative; auto *GuardInstLoop = LI.getLoopFor(GuardInst->getParent()); @@ -365,8 +376,11 @@ LLVM_DEBUG(dbgs() << "Widening " << *GuardInst << " into " << *BestSoFar << " with score " << scoreTypeToString(BestScoreSoFar) << "\n"); - widenGuard(BestSoFar, getGuardCondition(GuardInst)); - setGuardCondition(GuardInst, ConstantInt::getTrue(GuardInst->getContext())); + widenGuard(BestSoFar, getGuardCondition(GuardInst), InvertCondition); + auto NewGuardCondition = InvertCondition + ? ConstantInt::getFalse(GuardInst->getContext()) + : ConstantInt::getTrue(GuardInst->getContext()); + setGuardCondition(GuardInst, NewGuardCondition); EliminatedGuards.push_back(GuardInst); WidenedGuards.insert(BestSoFar); return true; @@ -404,16 +418,22 @@ } void GuardWideningImpl::eliminateGuard(Instruction *GuardInst) { - // Make sure that the condition of the guard to eliminate is already true. - assert(getGuardCondition(GuardInst) == - ConstantInt::getTrue(GuardInst->getContext()) && - "Attempt eliminate a guard with not trivial true condition?"); // Only erase guard intrinsics. Do nothing about branches. if (IntrinsicInst *GI = dyn_cast(GuardInst)) { + // Make sure that the condition of the guard to eliminate is already true. + assert(getGuardCondition(GuardInst) == + ConstantInt::getTrue(GuardInst->getContext()) && + "Attempt eliminate a guard with not trivial true condition?"); GI->eraseFromParent(); ++GuardsEliminated; } else { assert(isa(GuardInst) && "Should be!"); + // Make sure that the condition of the guard to eliminate is trivial. + assert((getGuardCondition(GuardInst) == + ConstantInt::getTrue(GuardInst->getContext()) || + getGuardCondition(GuardInst) == + ConstantInt::getFalse(GuardInst->getContext())) && + "Attempt eliminate a guard with non-trivial condition?"); ++CondBranchEliminated; } } @@ -509,7 +529,8 @@ } bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1, - Instruction *InsertPt, Value *&Result) { + Instruction *InsertPt, Value *&Result, + bool InvertCondition) { using namespace llvm::PatternMatch; { @@ -517,7 +538,9 @@ ConstantInt *RHS0, *RHS1; Value *LHS; ICmpInst::Predicate Pred0, Pred1; - if (match(Cond0, m_ICmp(Pred0, m_Value(LHS), m_ConstantInt(RHS0))) && + // TODO: Match InvertCondition && L >u C0 && L <=u C1 -> L >u max(C0, C1). + if (!InvertCondition && + match(Cond0, m_ICmp(Pred0, m_Value(LHS), m_ConstantInt(RHS0))) && match(Cond1, m_ICmp(Pred1, m_Specific(LHS), m_ConstantInt(RHS1)))) { ConstantRange CR0 = @@ -552,7 +575,9 @@ { SmallVector Checks, CombinedChecks; - if (parseRangeChecks(Cond0, Checks) && parseRangeChecks(Cond1, Checks) && + // TODO: Support InvertCondition case? + if (!InvertCondition && + parseRangeChecks(Cond0, Checks) && parseRangeChecks(Cond1, Checks) && combineRangeChecks(Checks, CombinedChecks)) { if (InsertPt) { Result = nullptr; @@ -576,7 +601,8 @@ if (InsertPt) { makeAvailableAt(Cond0, InsertPt); makeAvailableAt(Cond1, InsertPt); - + if (InvertCondition) + Cond1 = BinaryOperator::CreateNot(Cond1, "inverted", InsertPt); Result = BinaryOperator::CreateAnd(Cond0, Cond1, "wide.chk", InsertPt); } Index: test/Transforms/GuardWidening/widen-frequent-branches.ll =================================================================== --- test/Transforms/GuardWidening/widen-frequent-branches.ll +++ test/Transforms/GuardWidening/widen-frequent-branches.ll @@ -101,6 +101,39 @@ ret void } +; Similar to test_03, but the likely taken branch is the false branch. +define void @test_03_not_taken(i1 %cond_0, i1 %cond_1) { +; CHECK-LABEL: @test_03_not_taken( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVERTED:%.*]] = xor i1 [[COND_1:%.*]], true +; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[INVERTED]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK]]) [ "deopt"() ] +; CHECK-NEXT: br i1 false, label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]], !prof !2 +; CHECK: if.true: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[MERGE:%.*]] +; CHECK: if.false: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: ret void +; +entry: + call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ] + br i1 %cond_1, label %if.true, label %if.false, !prof !3 + +if.true: + call void @foo() + br label %merge + +if.false: + call void @bar() + br label %merge + +merge: + ret void +} + ; Widen loop-invariant condition into the guard in preheader. define void @test_04(i1 %cond_0, i1 %cond_1, i32 %n) { ; CHECK-LABEL: @test_04( @@ -149,6 +182,55 @@ ret void } +; Similar to test_04, but the likely taken branch is the false branch. +define void @test_04_not_taken(i1 %cond_0, i1 %cond_1, i32 %n) { +; CHECK-LABEL: @test_04_not_taken( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVERTED:%.*]] = xor i1 [[COND_1:%.*]], true +; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[INVERTED]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK]]) [ "deopt"() ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[MERGE:%.*]] ] +; CHECK-NEXT: br i1 false, label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]], !prof !2 +; CHECK: if.true: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[MERGE]] +; CHECK: if.false: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]] +; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ] + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %merge ] + br i1 %cond_1, label %if.true, label %if.false, !prof !3 + +if.true: + call void @foo() + br label %merge + +if.false: + call void @bar() + br label %merge + +merge: + %iv.next = add i32 %iv, 1 + %cond = icmp slt i32 %iv.next, %n + br i1 %cond, label %loop, label %exit + +exit: + ret void +} + ; Widen loop-invariant condition into the guard in the same loop. define void @test_05(i1 %cond_0, i1 %cond_1, i32 %n) { ; CHECK-LABEL: @test_05( @@ -197,6 +279,55 @@ ret void } +; Similar to test_05, but the likely taken branch is the false branch. +define void @test_05_not_taken(i1 %cond_0, i1 %cond_1, i32 %n) { +; CHECK-LABEL: @test_05_not_taken( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[MERGE:%.*]] ] +; CHECK-NEXT: [[INVERTED:%.*]] = xor i1 [[COND_1:%.*]], true +; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[INVERTED]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK]]) [ "deopt"() ] +; CHECK-NEXT: br i1 false, label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]], !prof !2 +; CHECK: if.true: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[MERGE]] +; CHECK: if.false: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]] +; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %merge ] + call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ] + br i1 %cond_1, label %if.true, label %if.false, !prof !3 + +if.true: + call void @foo() + br label %merge + +if.false: + call void @bar() + br label %merge + +merge: + %iv.next = add i32 %iv, 1 + %cond = icmp slt i32 %iv.next, %n + br i1 %cond, label %loop, label %exit + +exit: + ret void +} + ; Some of checks are frequently taken and some are not, make sure that we only ; widen frequent ones. define void @test_06(i1 %cond_0, i1 %cond_1, i1 %cond_2, i1 %cond_3, i1 %cond_4, i32 %n) { @@ -208,7 +339,7 @@ ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: br i1 [[COND_1:%.*]], label [[IF_TRUE_1:%.*]], label [[IF_FALSE_1:%.*]], !prof !2 +; CHECK-NEXT: br i1 [[COND_1:%.*]], label [[IF_TRUE_1:%.*]], label [[IF_FALSE_1:%.*]], !prof !3 ; CHECK: if.true_1: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[MERGE_1:%.*]] @@ -224,7 +355,7 @@ ; CHECK-NEXT: call void @bar() ; CHECK-NEXT: br label [[MERGE_2]] ; CHECK: merge_2: -; CHECK-NEXT: br i1 [[COND_3:%.*]], label [[IF_TRUE_3:%.*]], label [[IF_FALSE_3:%.*]], !prof !2 +; CHECK-NEXT: br i1 [[COND_3:%.*]], label [[IF_TRUE_3:%.*]], label [[IF_FALSE_3:%.*]], !prof !3 ; CHECK: if.true_3: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[MERGE_3:%.*]] @@ -304,6 +435,115 @@ ret void } +; Similar to test_06, but the likely taken branch is the false branch. +define void @test_06_not_taken(i1 %cond_0, i1 %cond_1, i1 %cond_2, i1 %cond_3, i1 %cond_4, i32 %n) { +; CHECK-LABEL: @test_06_not_taken( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INVERTED:%.*]] = xor i1 [[COND_2:%.*]], true +; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 [[COND_0:%.*]], [[INVERTED]] +; CHECK-NEXT: [[INVERTED1:%.*]] = xor i1 [[COND_4:%.*]], true +; CHECK-NEXT: [[WIDE_CHK2:%.*]] = and i1 [[WIDE_CHK]], [[INVERTED1]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK2]]) [ "deopt"() ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: br i1 [[COND_1:%.*]], label [[IF_TRUE_1:%.*]], label [[IF_FALSE_1:%.*]], !prof !3 +; CHECK: if.true_1: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[MERGE_1:%.*]] +; CHECK: if.false_1: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[MERGE_1]] +; CHECK: merge_1: +; CHECK-NEXT: br i1 false, label [[IF_TRUE_2:%.*]], label [[IF_FALSE_2:%.*]], !prof !2 +; CHECK: if.true_2: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[MERGE_2:%.*]] +; CHECK: if.false_2: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[MERGE_2]] +; CHECK: merge_2: +; CHECK-NEXT: br i1 [[COND_3:%.*]], label [[IF_TRUE_3:%.*]], label [[IF_FALSE_3:%.*]], !prof !3 +; CHECK: if.true_3: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[MERGE_3:%.*]] +; CHECK: if.false_3: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[MERGE_3]] +; CHECK: merge_3: +; CHECK-NEXT: br i1 false, label [[IF_TRUE_4:%.*]], label [[IF_FALSE_4:%.*]], !prof !2 +; CHECK: if.true_4: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: br label [[BACKEDGE]] +; CHECK: if.false_4: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]] +; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ] + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %backedge ] + br i1 %cond_1, label %if.true_1, label %if.false_1, !prof !2 + +if.true_1: + call void @foo() + br label %merge_1 + +if.false_1: + call void @bar() + br label %merge_1 + +merge_1: + br i1 %cond_2, label %if.true_2, label %if.false_2, !prof !3 + +if.true_2: + call void @foo() + br label %merge_2 + +if.false_2: + call void @bar() + br label %merge_2 + +merge_2: + br i1 %cond_3, label %if.true_3, label %if.false_3, !prof !2 + +if.true_3: + call void @foo() + br label %merge_3 + +if.false_3: + call void @bar() + br label %merge_3 + +merge_3: + br i1 %cond_4, label %if.true_4, label %if.false_4, !prof !3 + +if.true_4: + call void @foo() + br label %backedge + +if.false_4: + call void @bar() + br label %backedge + +backedge: + %iv.next = add i32 %iv, 1 + %cond = icmp slt i32 %iv.next, %n + br i1 %cond, label %loop, label %exit + +exit: + ret void +} + !0 = !{!"branch_weights", i32 998, i32 1} !1 = !{!"branch_weights", i32 999, i32 1} !2 = !{!"branch_weights", i32 500, i32 500} +!3 = !{!"branch_weights", i32 1, i32 999}