Index: llvm/lib/Transforms/Scalar/LoopPredication.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopPredication.cpp +++ llvm/lib/Transforms/Scalar/LoopPredication.cpp @@ -307,8 +307,9 @@ widenICmpRangeCheckDecrementingLoop(LoopICmp LatchCheck, LoopICmp RangeCheck, SCEVExpander &Expander, Instruction *Guard); - unsigned widenChecks(SmallVectorImpl &Checks, SCEVExpander &Expander, - Instruction *Guard); + void widenChecks(SmallVectorImpl &Checks, + SmallVectorImpl &WidenedChecks, + SCEVExpander &Expander, Instruction *Guard); bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander); bool widenWidenableBranchGuardConditions(BranchInst *Guard, SCEVExpander &Expander); // If the loop always exits through another block in the loop, we should not @@ -754,17 +755,15 @@ } } -unsigned LoopPredication::widenChecks(SmallVectorImpl &Checks, - SCEVExpander &Expander, - Instruction *Guard) { - unsigned NumWidened = 0; +void widenChecks(SmallVectorImpl &Checks, + SmallVectorImpl &WidenedChecks, + SCEVExpander &Expander, Instruction *Guard) { for (auto &Check : Checks) if (ICmpInst *ICI = dyn_cast(Check)) if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander, Guard)) { - NumWidened++; + WidenedChecks.push_back(Check); Check = *NewRangeCheck; } - return NumWidened; } bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard, @@ -774,12 +773,13 @@ TotalConsidered++; SmallVector Checks; + SmallVector WidenedChecks; parseWidenableGuard(Guard, Checks); - unsigned NumWidened = widenChecks(Checks, Expander, Guard); - if (NumWidened == 0) + widenChecks(Checks, WidenedChecks, Expander, Guard); + if (WidenedChecks.empty()) return false; - TotalWidened += NumWidened; + TotalWidened += WidenedChecks.size(); // Emit the new guard condition IRBuilder<> Builder(findInsertPt(Guard, Checks)); @@ -792,7 +792,7 @@ } RecursivelyDeleteTriviallyDeadInstructions(OldCond, nullptr /* TLI */, MSSAU); - LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n"); + LLVM_DEBUG(dbgs() << "Widened checks = " << WidenedChecks.size() << "\n"); return true; } @@ -810,15 +810,16 @@ TotalConsidered++; SmallVector Checks; + SmallVector WidenedChecks; parseWidenableGuard(BI, Checks); // At the moment, our matching logic for wideable conditions implicitly // assumes we preserve the form: (br (and Cond, WC())). FIXME Checks.push_back(WC); - unsigned NumWidened = widenChecks(Checks, Expander, BI); - if (NumWidened == 0) + widenChecks(Checks, WidenedChecks, Expander, BI); + if (WidenedChecks.empty()) return false; - TotalWidened += NumWidened; + TotalWidened += WidenedChecks.size(); // Emit the new guard condition IRBuilder<> Builder(findInsertPt(BI, Checks)); @@ -830,13 +831,13 @@ // If this block has other predecessors, we might not be able to use Cond. // In this case, create a Phi where every other input is `true` and input // from guard block is Cond. - Value *AssumeCond = Cond; + Value *AssumeCond = Builder.CreateAnd(WidenedChecks); if (!IfTrueBB->getUniquePredecessor()) { auto *GuardBB = BI->getParent(); - auto *PN = Builder.CreatePHI(Cond->getType(), pred_size(IfTrueBB), + auto *PN = Builder.CreatePHI(AssumeCond->getType(), pred_size(IfTrueBB), "assume.cond"); for (auto *Pred : predecessors(IfTrueBB)) - PN->addIncoming(Pred == GuardBB ? Cond : Builder.getTrue(), Pred); + PN->addIncoming(Pred == GuardBB ? AssumeCond : Builder.getTrue(), Pred); AssumeCond = PN; } Builder.CreateAssumption(AssumeCond); @@ -845,7 +846,7 @@ assert(isGuardAsWidenableBranch(BI) && "Stopped being a guard after transform?"); - LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n"); + LLVM_DEBUG(dbgs() << "Widened checks = " << WidenedChecks.size() << "\n"); return true; } Index: llvm/test/Transforms/LoopPredication/basic_widenable_branch_guards.ll =================================================================== --- llvm/test/Transforms/LoopPredication/basic_widenable_branch_guards.ll +++ llvm/test/Transforms/LoopPredication/basic_widenable_branch_guards.ll @@ -1224,7 +1224,6 @@ ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[WITHIN_BOUNDS_1:%.*]] = icmp ult i32 [[I]], [[LENGTH_1]] ; CHECK-NEXT: [[WITHIN_BOUNDS_2:%.*]] = icmp ult i32 [[I]], [[LENGTH_2]] -; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = and i1 [[WITHIN_BOUNDS_1]], [[WITHIN_BOUNDS_2]] ; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[TMP8:%.*]] = and i1 [[TMP3]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = and i1 [[TMP8]], [[WIDENABLE_COND]] @@ -1233,7 +1232,8 @@ ; CHECK-NEXT: [[DEOPTCALL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32(i32 9) [ "deopt"() ] ; CHECK-NEXT: ret i32 [[DEOPTCALL]] ; CHECK: guarded: -; CHECK-NEXT: call void @llvm.assume(i1 [[WITHIN_BOUNDS]]) +; CHECK-NEXT: [[TMP10:%.*]] = and i1 [[WITHIN_BOUNDS_2]], [[WITHIN_BOUNDS_1]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP10]]) ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_1_I:%.*]] = load i32, ptr [[ARRAY_1_I_PTR]], align 4 @@ -1314,8 +1314,6 @@ ; CHECK-NEXT: [[WITHIN_BOUNDS_1:%.*]] = icmp ult i32 [[I]], [[LENGTH_1]] ; CHECK-NEXT: [[WITHIN_BOUNDS_2:%.*]] = icmp ult i32 [[I]], [[LENGTH_2]] ; CHECK-NEXT: [[WITHIN_BOUNDS_3:%.*]] = icmp ult i32 [[I]], [[LENGTH_3]] -; CHECK-NEXT: [[WITHIN_BOUNDS_1_AND_2:%.*]] = and i1 [[WITHIN_BOUNDS_1]], [[WITHIN_BOUNDS_2]] -; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = and i1 [[WITHIN_BOUNDS_1_AND_2]], [[WITHIN_BOUNDS_3]] ; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[TMP12:%.*]] = and i1 [[TMP3]], [[TMP7]] ; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP12]], [[TMP11]] @@ -1325,7 +1323,9 @@ ; CHECK-NEXT: [[DEOPTCALL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32(i32 9) [ "deopt"() ] ; CHECK-NEXT: ret i32 [[DEOPTCALL]] ; CHECK: guarded: -; CHECK-NEXT: call void @llvm.assume(i1 [[WITHIN_BOUNDS]]) +; CHECK-NEXT: [[TMP15:%.*]] = and i1 [[WITHIN_BOUNDS_3]], [[WITHIN_BOUNDS_2]] +; CHECK-NEXT: [[TMP16:%.*]] = and i1 [[TMP15]], [[WITHIN_BOUNDS_1]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP16]]) ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_1_I:%.*]] = load i32, ptr [[ARRAY_1_I_PTR]], align 4 @@ -1533,7 +1533,6 @@ ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH]] ; CHECK-NEXT: [[UNRELATED_COND:%.*]] = icmp ult i32 [[X:%.*]], [[LENGTH]] -; CHECK-NEXT: [[GUARD_COND:%.*]] = and i1 [[WITHIN_BOUNDS]], [[UNRELATED_COND]] ; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[TMP4:%.*]] = and i1 [[UNRELATED_COND]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[WIDENABLE_COND]] @@ -1542,7 +1541,7 @@ ; CHECK-NEXT: [[DEOPTCALL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32(i32 9) [ "deopt"() ] ; CHECK-NEXT: ret i32 [[DEOPTCALL]] ; CHECK: guarded: -; CHECK-NEXT: call void @llvm.assume(i1 [[GUARD_COND]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[WITHIN_BOUNDS]]) ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, ptr [[ARRAY_I_PTR]], align 4