diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp --- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp @@ -178,6 +178,7 @@ #include "llvm/Transforms/Scalar/LoopPredication.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/LoopInfo.h" @@ -246,6 +247,7 @@ } }; + AliasAnalysis *AA; ScalarEvolution *SE; BranchProbabilityInfo *BPI; @@ -275,7 +277,11 @@ /// passed to SCEVExpander! Instruction *findInsertPt(Instruction *User, ArrayRef Ops); - bool CanExpand(const SCEV* S); + /// Return true if the value is known to produce a single fixed value across + /// all iterations on which it executes. Note that this does not imply + /// speculation safety. That must be established seperately. + bool isLoopInvariantValue(const SCEV* S); + Value *expandCheck(SCEVExpander &Expander, Instruction *Guard, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); @@ -318,8 +324,9 @@ Optional generateLoopLatchCheck(Type *RangeCheckType); public: - LoopPredication(ScalarEvolution *SE, BranchProbabilityInfo *BPI) - : SE(SE), BPI(BPI){}; + LoopPredication(AliasAnalysis *AA, ScalarEvolution *SE, + BranchProbabilityInfo *BPI) + : AA(AA), SE(SE), BPI(BPI){}; bool runOnLoop(Loop *L); }; @@ -341,7 +348,8 @@ auto *SE = &getAnalysis().getSE(); BranchProbabilityInfo &BPI = getAnalysis().getBPI(); - LoopPredication LP(SE, &BPI); + auto *AA = &getAnalysis().getAAResults(); + LoopPredication LP(AA, SE, &BPI); return LP.runOnLoop(L); } }; @@ -367,7 +375,7 @@ AM.getResult(L, AR).getManager(); Function *F = L.getHeader()->getParent(); auto *BPI = FAM.getCachedResult(*F); - LoopPredication LP(&AR.SE, BPI); + LoopPredication LP(&AR.AA, &AR.SE, BPI); if (!LP.runOnLoop(&L)) return PreservedAnalyses::all(); @@ -462,15 +470,51 @@ Instruction *LoopPredication::findInsertPt(Instruction *Use, ArrayRef Ops) { + // Subtlety: SCEV considers things to be invariant if the value produced is + // the same across iterations. This is not the same as being able to + // evaluate outside the loop, which is what we actually need here. for (const SCEV *Op : Ops) - if (!SE->isLoopInvariant(Op, L)) + if (!SE->isLoopInvariant(Op, L) || + !isSafeToExpandAt(Op, Preheader->getTerminator(), *SE)) return Use; return Preheader->getTerminator(); } +bool LoopPredication::isLoopInvariantValue(const SCEV* S) { + // Handling expressions which produce invariant results, but *haven't* yet + // been removed from the loop serves two important purposes. + // 1) Most importantly, it resolves a pass ordering cycle which would + // otherwise need us to iteration licm, loop-predication, and either + // loop-unswitch or loop-peeling to make progress on examples with lots of + // predicable range checks in a row. (Since, in the general case, we can't + // hoist the length checks until the dominating checks have been discharged + // as we can't prove doing so is safe.) + // 2) As a nice side effect, this exposes the value of peeling or unswitching + // much more obviously in the IR. Otherwise, the cost modeling for other + // transforms would end up needing to duplicate all of this logic to model a + // check which becomes predictable based on a modeled peel or unswitch. + // + // The cost of doing so in the worst case is an extra fill from the stack in + // the loop to materialize the loop invariant test value instead of checking + // against the original IV which is presumable in a register inside the loop. + // Such cases are presumably rare, and hint at missing oppurtunities for + // other passes. + + if (SE->isLoopInvariant(S, L)) + // Note: This the SCEV variant, so the original Value* may be within the + // loop even though SCEV has proven it is loop invariant. + return true; -bool LoopPredication::CanExpand(const SCEV* S) { - return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE); + // Handle a particular important case which SCEV doesn't yet know about which + // shows up in range checks on arrays with immutable lengths. + // TODO: This should be sunk inside SCEV. + if (const SCEVUnknown *U = dyn_cast(S)) + if (const auto *LI = dyn_cast(U->getValue())) + if (LI->isUnordered()) + if (AA->pointsToConstantMemory(LI->getOperand(0)) || + LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr) + return true; + return false; } Optional LoopPredication::widenICmpRangeCheckIncrementingLoop( @@ -487,16 +531,26 @@ const SCEV *GuardLimit = RangeCheck.Limit; const SCEV *LatchStart = LatchCheck.IV->getStart(); const SCEV *LatchLimit = LatchCheck.Limit; + // Subtlety: We need all the values to be *invariant* across all iterations, + // but we only need to check expansion safety for those which *aren't* + // already guaranteed to dominate the guard. + if (!isLoopInvariantValue(GuardStart) || + !isLoopInvariantValue(GuardLimit) || + !isLoopInvariantValue(LatchStart) || + !isLoopInvariantValue(LatchLimit)) { + LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); + return None; + } + if (!isSafeToExpandAt(LatchStart, Guard, *SE) || + !isSafeToExpandAt(LatchLimit, Guard, *SE)) { + LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); + return None; + } // guardLimit - guardStart + latchStart - 1 const SCEV *RHS = SE->getAddExpr(SE->getMinusSCEV(GuardLimit, GuardStart), SE->getMinusSCEV(LatchStart, SE->getOne(Ty))); - if (!CanExpand(GuardStart) || !CanExpand(GuardLimit) || - !CanExpand(LatchLimit) || !CanExpand(RHS)) { - LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); - return None; - } auto LimitCheckPred = ICmpInst::getFlippedStrictnessPredicate(LatchCheck.Pred); @@ -518,9 +572,20 @@ auto *Ty = RangeCheck.IV->getType(); const SCEV *GuardStart = RangeCheck.IV->getStart(); const SCEV *GuardLimit = RangeCheck.Limit; + const SCEV *LatchStart = LatchCheck.IV->getStart(); const SCEV *LatchLimit = LatchCheck.Limit; - if (!CanExpand(GuardStart) || !CanExpand(GuardLimit) || - !CanExpand(LatchLimit)) { + // Subtlety: We need all the values to be *invariant* across all iterations, + // but we only need to check expansion safety for those which *aren't* + // already guaranteed to dominate the guard. + if (!isLoopInvariantValue(GuardStart) || + !isLoopInvariantValue(GuardLimit) || + !isLoopInvariantValue(LatchStart) || + !isLoopInvariantValue(LatchLimit)) { + LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); + return None; + } + if (!isSafeToExpandAt(LatchStart, Guard, *SE) || + !isSafeToExpandAt(LatchLimit, Guard, *SE)) { LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); return None; } diff --git a/llvm/test/Transforms/LoopPredication/basic.ll b/llvm/test/Transforms/LoopPredication/basic.ll --- a/llvm/test/Transforms/LoopPredication/basic.ll +++ b/llvm/test/Transforms/LoopPredication/basic.ll @@ -1501,8 +1501,10 @@ ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[LENGTH_UDIV:%.*]] = udiv i32 [[LENGTH:%.*]], [[DIVIDER:%.*]] -; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH_UDIV]] -; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i32 [[N]], [[LENGTH_UDIV]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 0, [[LENGTH_UDIV]] +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP2]], i32 9) [ "deopt"() ] ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 diff --git a/llvm/test/Transforms/LoopPredication/basic_widenable_branch_guards.ll b/llvm/test/Transforms/LoopPredication/basic_widenable_branch_guards.ll --- a/llvm/test/Transforms/LoopPredication/basic_widenable_branch_guards.ll +++ b/llvm/test/Transforms/LoopPredication/basic_widenable_branch_guards.ll @@ -1808,10 +1808,12 @@ ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[LENGTH_UDIV:%.*]] = udiv i32 [[LENGTH:%.*]], [[DIVIDER:%.*]] -; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH_UDIV]] ; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() -; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WITHIN_BOUNDS]], [[WIDENABLE_COND]] -; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i32 [[N]], [[LENGTH_UDIV]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 0, [[LENGTH_UDIV]] +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 ; CHECK: deopt: ; CHECK-NEXT: [[DEOPTCALL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32(i32 9) [ "deopt"() ] ; CHECK-NEXT: ret i32 [[DEOPTCALL]] diff --git a/llvm/test/Transforms/LoopPredication/invariant_load.ll b/llvm/test/Transforms/LoopPredication/invariant_load.ll --- a/llvm/test/Transforms/LoopPredication/invariant_load.ll +++ b/llvm/test/Transforms/LoopPredication/invariant_load.ll @@ -78,8 +78,10 @@ ; CHECK-NEXT: [[UNKNOWN:%.*]] = load volatile i1, i1* @UNKNOWN ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[UNKNOWN]]) [ "deopt"() ] ; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* [[LENGTH:%.*]], align 4, !invariant.load !0 -; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LEN]] -; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i32 [[N]], [[LEN]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 0, [[LEN]] +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP2]], i32 9) [ "deopt"() ] ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 @@ -264,8 +266,10 @@ ; CHECK-NEXT: [[UNKNOWN:%.*]] = load volatile i1, i1* @UNKNOWN ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[UNKNOWN]]) [ "deopt"() ] ; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* @Length, align 4 -; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LEN]] -; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i32 [[N]], [[LEN]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 0, [[LEN]] +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP2]], i32 9) [ "deopt"() ] ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4