Index: lib/Transforms/Scalar/LoopPredication.cpp =================================================================== --- lib/Transforms/Scalar/LoopPredication.cpp +++ lib/Transforms/Scalar/LoopPredication.cpp @@ -177,7 +177,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LoopPredication.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/LoopInfo.h" @@ -246,6 +248,7 @@ } }; + AliasAnalysis *AA; ScalarEvolution *SE; BranchProbabilityInfo *BPI; @@ -254,6 +257,12 @@ BasicBlock *Preheader; LoopICmp LatchCheck; + /// The set of all instructions within the loop which have been proven to + /// produce the same result across all iterations of the given loop. Note + /// that this does not say anything about whether the instruction is safe to + /// move. For instance, "udiv i32 %arg, 0" could be in this set. + SmallPtrSet InvariantExpressions; + bool isSupportedStep(const SCEV* Step); Optional parseLoopICmp(ICmpInst *ICI) { return parseLoopICmp(ICI->getPredicate(), ICI->getOperand(0), @@ -264,7 +273,7 @@ Optional parseLoopLatchICmp(); - bool CanExpand(const SCEV* S); + bool CanExpand(const SCEV* S, const Instruction *At); Value *expandCheck(SCEVExpander &Expander, IRBuilder<> &Builder, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); @@ -307,8 +316,9 @@ Optional generateLoopLatchCheck(Type *RangeCheckType); public: - LoopPredication(ScalarEvolution *SE, BranchProbabilityInfo *BPI) - : SE(SE), BPI(BPI){}; + LoopPredication(AliasAnalysis *AA, ScalarEvolution *SE, + BranchProbabilityInfo *BPI) + : AA(AA), SE(SE), BPI(BPI){}; bool runOnLoop(Loop *L); }; @@ -329,8 +339,9 @@ return false; auto *SE = &getAnalysis().getSE(); BranchProbabilityInfo &BPI = - getAnalysis().getBPI(); - LoopPredication LP(SE, &BPI); + getAnalysis().getBPI(); + auto *AA = &getAnalysis().getAAResults(); + LoopPredication LP(AA, SE, &BPI); return LP.runOnLoop(L); } }; @@ -356,7 +367,7 @@ AM.getResult(L, AR).getManager(); Function *F = L.getHeader()->getParent(); auto *BPI = FAM.getCachedResult(*F); - LoopPredication LP(&AR.SE, BPI); + LoopPredication LP(&AR.AA, &AR.SE, BPI); if (!LP.runOnLoop(&L)) return PreservedAnalyses::all(); @@ -394,15 +405,21 @@ Type *Ty = LHS->getType(); assert(Ty == RHS->getType() && "expandCheck operands have different types?"); - if (SE->isLoopEntryGuardedByCond(L, Pred, LHS, RHS)) - return Builder.getTrue(); - if (SE->isLoopEntryGuardedByCond(L, ICmpInst::getInversePredicate(Pred), - LHS, RHS)) - return Builder.getFalse(); + if (SE->isLoopInvariant(LHS, L) && SE->isLoopInvariant(RHS, L)) { + if (SE->isLoopEntryGuardedByCond(L, Pred, LHS, RHS)) + return Builder.getTrue(); + if (SE->isLoopEntryGuardedByCond(L, ICmpInst::getInversePredicate(Pred), + LHS, RHS)) + return Builder.getFalse(); + } Instruction *InsertAt = &*Builder.GetInsertPoint(); + if (SE->isLoopInvariant(LHS, L) && SE->isLoopInvariant(RHS, L)) + InsertAt = L->getLoopPreheader()->getTerminator(); Value *LHSV = Expander.expandCodeFor(LHS, Ty, InsertAt); Value *RHSV = Expander.expandCodeFor(RHS, Ty, InsertAt); + IRBuilder<>::InsertPointGuard G(Builder); + Builder.SetInsertPoint(InsertAt); return Builder.CreateICmp(Pred, LHSV, RHSV); } @@ -438,8 +455,39 @@ return Step->isOne() || (Step->isAllOnesValue() && EnableCountDownLoop); } -bool LoopPredication::CanExpand(const SCEV* S) { - return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE); +bool LoopPredication::CanExpand(const SCEV* S, const Instruction *At) { + if (SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE)) + return true; + + // Handling expressions which produce invariant results, but *haven't* yet + // been removed from the loop serves two important purposes. + // 1) Most importantly, it resolves a pass ordering cycle which would + // otherwise need us to iteration licm, loop-predication, and either + // loop-unswitch or loop-peeling to make progress on examples with lots of + // predicable range checks in a row. (Since, in the general case, we can't + // hoist the length checks until the dominating checks have been discharged + // as we can't prove doing so is safe.) + // 2) As a nice side effect, this exposes the value of peeling or unswitching + // much more obviously in the IR. Otherwise, the cost modeling for other + // transforms would end up needing to duplicate all of this logic to model a + // check which becomes predictable based on a modeled peel or unswitch. + // + // The cost of doing so in the worst case is an extra load in the loop to + // materialize the loop invariant test value instead of checking against the + // original IV which is presumable in a register inside the loop. Such cases + // are presumably rare, and hint at missing oppurtunities for other passes. + S->dump(); + dbgs() << isSafeToExpandAt(S, At, *SE) << "\n"; + if (SE->getLoopDisposition(S, L) == ScalarEvolution::LoopInvariant) { + dbgs() << "IV1\n"; + return isSafeToExpandAt(S, At, *SE); + } + if (const SCEVUnknown *U = dyn_cast(S)) + if (InvariantExpressions.count(U->getValue())) { + dbgs() << "IV2\n"; + return isSafeToExpandAt(S, At, *SE); + } + return false; } Optional LoopPredication::widenICmpRangeCheckIncrementingLoop( @@ -461,8 +509,10 @@ const SCEV *RHS = SE->getAddExpr(SE->getMinusSCEV(GuardLimit, GuardStart), SE->getMinusSCEV(LatchStart, SE->getOne(Ty))); - if (!CanExpand(GuardStart) || !CanExpand(GuardLimit) || - !CanExpand(LatchLimit) || !CanExpand(RHS)) { + Instruction *InsertAt = &*Builder.GetInsertPoint(); + Instruction *PHTerm = &*L->getLoopPreheader()->getTerminator(); + if (!CanExpand(GuardStart, PHTerm) || !CanExpand(GuardLimit, InsertAt) || + !CanExpand(LatchLimit, PHTerm) || !CanExpand(RHS, InsertAt)) { LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); return None; } @@ -477,6 +527,10 @@ expandCheck(Expander, Builder, LimitCheckPred, LatchLimit, RHS); auto *FirstIterationCheck = expandCheck(Expander, Builder, RangeCheck.Pred, GuardStart, GuardLimit); + if (L->isLoopInvariant(LimitCheck) && L->isLoopInvariant(FirstIterationCheck)) + InsertAt = L->getLoopPreheader()->getTerminator(); + IRBuilder<>::InsertPointGuard G(Builder); + Builder.SetInsertPoint(InsertAt); return Builder.CreateAnd(FirstIterationCheck, LimitCheck); } @@ -487,8 +541,9 @@ const SCEV *GuardStart = RangeCheck.IV->getStart(); const SCEV *GuardLimit = RangeCheck.Limit; const SCEV *LatchLimit = LatchCheck.Limit; - if (!CanExpand(GuardStart) || !CanExpand(GuardLimit) || - !CanExpand(LatchLimit)) { + Instruction *InsertAt = &*Builder.GetInsertPoint(); + if (!CanExpand(GuardStart, InsertAt) || !CanExpand(GuardLimit, InsertAt) || + !CanExpand(LatchLimit, InsertAt)) { LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); return None; } @@ -512,6 +567,10 @@ GuardStart, GuardLimit); auto *LimitCheck = expandCheck(Expander, Builder, LimitCheckPred, LatchLimit, SE->getOne(Ty)); + if (L->isLoopInvariant(LimitCheck) && L->isLoopInvariant(FirstIterationCheck)) + InsertAt = L->getLoopPreheader()->getTerminator(); + IRBuilder<>::InsertPointGuard G(Builder); + Builder.SetInsertPoint(InsertAt); return Builder.CreateAnd(FirstIterationCheck, LimitCheck); } @@ -586,6 +645,7 @@ Value *Condition, SCEVExpander &Expander, IRBuilder<> &Builder) { + IRBuilder<>::InsertPointGuard G(Builder); unsigned NumWidened = 0; // The guard condition is expected to be in form of: // cond1 && cond2 && cond3 ... @@ -608,6 +668,8 @@ } if (ICmpInst *ICI = dyn_cast(Condition)) { + IRBuilder<>::InsertPointGuard G(Builder); + Builder.SetInsertPoint(ICI); if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander, Builder)) { Checks.push_back(NewRangeCheck.getValue()); @@ -838,6 +900,32 @@ return true; } +/// Return true for instructions which can cheaply be proven to produce a +/// single fixed result across all iterations on which they might execute. +/// IMPORTANT: This does not prove that the instruction *does* execute or is +/// safe to move in any way. It only proves that all executions of the given +/// instruction must return the same value across all loop iterations. +static bool IsInvariantExpression(Loop *L, Instruction *I, + AliasAnalysis *AA, + SmallPtrSetImpl& InvExprs) { + if (L->isLoopInvariant(I)) + return true; + + for (Value *Op : I->operands()) + if (!L->isLoopInvariant(Op) && + !InvExprs.count(Op)) + return false; + + if (const auto *LI = dyn_cast(I)) { + if (!LI->isUnordered()) + return false; + if (AA->pointsToConstantMemory(LI->getOperand(0)) || + LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr) + return true; + } + return !I->mayHaveSideEffects() && !I->mayReadOrWriteMemory(); +} + bool LoopPredication::runOnLoop(Loop *Loop) { L = Loop; @@ -892,6 +980,28 @@ if (Guards.empty() && GuardsAsWidenableBranches.empty()) return false; + // Populate the set of invariant expressions for the given loop. This is in + // the worst case O(Instructions) in the function (since we have to consider + // uses outside the loop to reject them). + assert(InvariantExpressions.empty()); + SmallVector Worklist; + for (const auto BB : L->blocks()) + for (auto &I : *BB) + if (IsInvariantExpression(L, &I, AA, InvariantExpressions)) + Worklist.push_back(&I); + while (!Worklist.empty()) { + Value *Item = Worklist.pop_back_val(); + InvariantExpressions.insert(Item);; + for (User *U : Item->users()) { + Instruction *User = cast(U); + if (!L->contains(User->getParent())) + continue; + if (!IsInvariantExpression(L, User, AA, InvariantExpressions)) + continue; + Worklist.push_back(User); + } + } + SCEVExpander Expander(*SE, *DL, "loop-predication"); bool Changed = false; @@ -900,5 +1010,6 @@ for (auto *Guard : GuardsAsWidenableBranches) Changed |= widenWidenableBranchGuardConditions(Guard, Expander); + InvariantExpressions.clear(); return Changed; } Index: test/Transforms/LoopPredication/invariant_load.ll =================================================================== --- test/Transforms/LoopPredication/invariant_load.ll +++ test/Transforms/LoopPredication/invariant_load.ll @@ -78,8 +78,10 @@ ; CHECK-NEXT: [[UNKNOWN:%.*]] = load volatile i1, i1* @UNKNOWN ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[UNKNOWN]]) [ "deopt"() ] ; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* [[LENGTH:%.*]], align 4, !invariant.load !0 -; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LEN]] -; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i32 [[N]], [[LEN]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 0, [[LEN]] +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP2]], i32 9) [ "deopt"() ] ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 @@ -139,8 +141,10 @@ ; CHECK-NEXT: [[UNKNOWN:%.*]] = load volatile i1, i1* @UNKNOWN ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[UNKNOWN]]) [ "deopt"() ] ; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* @Length, align 4 -; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LEN]] -; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i32 [[N]], [[LEN]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 0, [[LEN]] +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP2]], i32 9) [ "deopt"() ] ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4