Index: llvm/trunk/include/llvm/InitializePasses.h =================================================================== --- llvm/trunk/include/llvm/InitializePasses.h +++ llvm/trunk/include/llvm/InitializePasses.h @@ -199,6 +199,7 @@ void initializeLoopInterchangePass(PassRegistry &); void initializeLoopLoadEliminationPass(PassRegistry&); void initializeLoopPassPass(PassRegistry&); +void initializeLoopPredicationLegacyPassPass(PassRegistry&); void initializeLoopRerollPass(PassRegistry&); void initializeLoopRotateLegacyPassPass(PassRegistry&); void initializeLoopSimplifyCFGLegacyPassPass(PassRegistry&); Index: llvm/trunk/include/llvm/LinkAllPasses.h =================================================================== --- llvm/trunk/include/llvm/LinkAllPasses.h +++ llvm/trunk/include/llvm/LinkAllPasses.h @@ -116,6 +116,7 @@ (void) llvm::createLazyValueInfoPass(); (void) llvm::createLoopExtractorPass(); (void) llvm::createLoopInterchangePass(); + (void) llvm::createLoopPredicationPass(); (void) llvm::createLoopSimplifyPass(); (void) llvm::createLoopSimplifyCFGPass(); (void) llvm::createLoopStrengthReducePass(); Index: llvm/trunk/include/llvm/Transforms/Scalar.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Scalar.h +++ llvm/trunk/include/llvm/Transforms/Scalar.h @@ -147,6 +147,12 @@ //===----------------------------------------------------------------------===// // +// LoopPredication - This pass does loop predication on guards. +// +Pass *createLoopPredicationPass(); + +//===----------------------------------------------------------------------===// +// // LoopInterchange - This pass interchanges loops to provide a more // cache-friendly memory access patterns. // Index: llvm/trunk/include/llvm/Transforms/Scalar/LoopPredication.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Scalar/LoopPredication.h +++ llvm/trunk/include/llvm/Transforms/Scalar/LoopPredication.h @@ -0,0 +1,32 @@ +//===- LoopPredication.h - Guard based loop predication pass ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass tries to convert loop variant range checks to loop invariant by +// widening checks across loop iterations. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_LOOPPREDICATION_H +#define LLVM_TRANSFORMS_SCALAR_LOOPPREDICATION_H + +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" + +namespace llvm { + +/// Performs Loop Predication Pass. +class LoopPredicationPass : public PassInfoMixin { +public: + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); +}; +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_LOOPPREDICATION_H Index: llvm/trunk/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/trunk/lib/Passes/PassBuilder.cpp +++ llvm/trunk/lib/Passes/PassBuilder.cpp @@ -105,6 +105,7 @@ #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" #include "llvm/Transforms/Scalar/LoopInstSimplify.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Transforms/Scalar/LoopPredication.h" #include "llvm/Transforms/Scalar/LoopRotation.h" #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h" #include "llvm/Transforms/Scalar/LoopSink.h" Index: llvm/trunk/lib/Passes/PassRegistry.def =================================================================== --- llvm/trunk/lib/Passes/PassRegistry.def +++ llvm/trunk/lib/Passes/PassRegistry.def @@ -227,4 +227,5 @@ LOOP_PASS("unroll", LoopUnrollPass()) LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs())) LOOP_PASS("print", IVUsersPrinterPass(dbgs())) +LOOP_PASS("loop-predication", LoopPredicationPass()) #undef LOOP_PASS Index: llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt +++ llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt @@ -29,6 +29,7 @@ LoopInterchange.cpp LoopLoadElimination.cpp LoopPassManager.cpp + LoopPredication.cpp LoopRerollPass.cpp LoopRotation.cpp LoopSimplifyCFG.cpp Index: llvm/trunk/lib/Transforms/Scalar/LoopPredication.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopPredication.cpp +++ llvm/trunk/lib/Transforms/Scalar/LoopPredication.cpp @@ -0,0 +1,280 @@ +//===-- LoopPredication.cpp - Guard based loop predication pass -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The LoopPredication pass tries to convert loop variant range checks to loop +// invariant by widening checks across loop iterations. For example, it will +// convert +// +// for (i = 0; i < n; i++) { +// guard(i < len); +// ... +// } +// +// to +// +// for (i = 0; i < n; i++) { +// guard(n - 1 < len); +// ... +// } +// +// After this transformation the condition of the guard is loop invariant, so +// loop-unswitch can later unswitch the loop by this condition which basically +// predicates the loop by the widened condition: +// +// if (n - 1 < len) +// for (i = 0; i < n; i++) { +// ... +// } +// else +// deoptimize +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/LoopPredication.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/LoopUtils.h" + +#define DEBUG_TYPE "loop-predication" + +using namespace llvm; + +namespace { +class LoopPredication { + ScalarEvolution *SE; + + Loop *L; + const DataLayout *DL; + BasicBlock *Preheader; + + Optional widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander, + IRBuilder<> &Builder); + bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander); + +public: + LoopPredication(ScalarEvolution *SE) : SE(SE){}; + bool runOnLoop(Loop *L); +}; + +class LoopPredicationLegacyPass : public LoopPass { +public: + static char ID; + LoopPredicationLegacyPass() : LoopPass(ID) { + initializeLoopPredicationLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + getLoopAnalysisUsage(AU); + } + + bool runOnLoop(Loop *L, LPPassManager &LPM) override { + if (skipLoop(L)) + return false; + auto *SE = &getAnalysis().getSE(); + LoopPredication LP(SE); + return LP.runOnLoop(L); + } +}; + +char LoopPredicationLegacyPass::ID = 0; +} // end namespace llvm + +INITIALIZE_PASS_BEGIN(LoopPredicationLegacyPass, "loop-predication", + "Loop predication", false, false) +INITIALIZE_PASS_DEPENDENCY(LoopPass) +INITIALIZE_PASS_END(LoopPredicationLegacyPass, "loop-predication", + "Loop predication", false, false) + +Pass *llvm::createLoopPredicationPass() { + return new LoopPredicationLegacyPass(); +} + +PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + LoopPredication LP(&AR.SE); + if (!LP.runOnLoop(&L)) + return PreservedAnalyses::all(); + + return getLoopPassPreservedAnalyses(); +} + +/// If ICI can be widened to a loop invariant condition emits the loop +/// invariant condition in the loop preheader and return it, otherwise +/// returns None. +Optional LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, + SCEVExpander &Expander, + IRBuilder<> &Builder) { + DEBUG(dbgs() << "Analyzing ICmpInst condition:\n"); + DEBUG(ICI->dump()); + + ICmpInst::Predicate Pred = ICI->getPredicate(); + Value *LHS = ICI->getOperand(0); + Value *RHS = ICI->getOperand(1); + const SCEV *LHSS = SE->getSCEV(LHS); + if (isa(LHSS)) + return None; + const SCEV *RHSS = SE->getSCEV(RHS); + if (isa(RHSS)) + return None; + + // Canonicalize RHS to be loop invariant bound, LHS - a loop computable index + if (SE->isLoopInvariant(LHSS, L)) { + std::swap(LHS, RHS); + std::swap(LHSS, RHSS); + Pred = ICmpInst::getSwappedPredicate(Pred); + } + if (!SE->isLoopInvariant(RHSS, L)) + return None; + + Value *Bound = RHS; + const SCEVAddRecExpr *IndexAR = dyn_cast(LHSS); + if (!IndexAR || IndexAR->getLoop() != L) + return None; + + DEBUG(dbgs() << "IndexAR: "); + DEBUG(IndexAR->dump()); + + bool IsIncreasing = false; + if (!SE->isMonotonicPredicate(IndexAR, Pred, IsIncreasing)) + return None; + + // If the predicate is increasing the condition can change from false to true + // as the loop progresses, in this case take the value on the first iteration + // for the widened check. Otherwise the condition can change from true to + // false as the loop progresses, so take the value on the last iteration. + const SCEV *NewLHSS = IsIncreasing + ? IndexAR->getStart() + : SE->getSCEVAtScope(IndexAR, L->getParentLoop()); + if (NewLHSS == IndexAR) { + DEBUG(dbgs() << "Can't compute NewLHSS!"); + return None; + } + + DEBUG(dbgs() << "NewLHSS: "); + DEBUG(NewLHSS->dump()); + + if (!SE->isLoopInvariant(NewLHSS, L) || !isSafeToExpand(NewLHSS, *SE)) + return None; + + DEBUG(dbgs() << "NewLHSS is loop invariant and safe to expand. Expand!\n"); + + Value *NewLHS = Expander.expandCodeFor(NewLHSS, Bound->getType(), + Preheader->getTerminator()); + return Builder.CreateICmp(Pred, NewLHS, Bound); +} + +bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard, + SCEVExpander &Expander) { + DEBUG(dbgs() << "Processing guard:\n"); + DEBUG(Guard->dump()); + + IRBuilder<> Builder(cast(Preheader->getTerminator())); + + // The guard condition is expected to be in form of: + // cond1 && cond2 && cond3 ... + // Iterate over subconditions looking for for icmp conditions which can be + // widened across loop iterations. Widening these conditions remember the + // resulting list of subconditions in Checks vector. + SmallVector Worklist(1, Guard->getOperand(0)); + SmallPtrSet Visited; + + SmallVector Checks; + + unsigned NumWidened = 0; + do { + Value *Condition = Worklist.pop_back_val(); + if (!Visited.insert(Condition).second) + continue; + + Value *LHS, *RHS; + using namespace llvm::PatternMatch; + if (match(Condition, m_And(m_Value(LHS), m_Value(RHS)))) { + Worklist.push_back(LHS); + Worklist.push_back(RHS); + continue; + } + + if (ICmpInst *ICI = dyn_cast(Condition)) { + if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander, Builder)) { + Checks.push_back(NewRangeCheck.getValue()); + NumWidened++; + continue; + } + } + + // Save the condition as is if we can't widen it + Checks.push_back(Condition); + } while (Worklist.size() != 0); + + if (NumWidened == 0) + return false; + + // Emit the new guard condition + Builder.SetInsertPoint(Guard); + Value *LastCheck = nullptr; + for (auto *Check : Checks) + if (!LastCheck) + LastCheck = Check; + else + LastCheck = Builder.CreateAnd(LastCheck, Check); + Guard->setOperand(0, LastCheck); + + DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n"); + return true; +} + +bool LoopPredication::runOnLoop(Loop *Loop) { + L = Loop; + + DEBUG(dbgs() << "Analyzing "); + DEBUG(L->dump()); + + Module *M = L->getHeader()->getModule(); + + // There is nothing to do if the module doesn't use guards + auto *GuardDecl = + M->getFunction(Intrinsic::getName(Intrinsic::experimental_guard)); + if (!GuardDecl || GuardDecl->use_empty()) + return false; + + DL = &M->getDataLayout(); + + Preheader = L->getLoopPreheader(); + if (!Preheader) + return false; + + // Collect all the guards into a vector and process later, so as not + // to invalidate the instruction iterator. + SmallVector Guards; + for (const auto BB : L->blocks()) + for (auto &I : *BB) + if (auto *II = dyn_cast(&I)) + if (II->getIntrinsicID() == Intrinsic::experimental_guard) + Guards.push_back(II); + + SCEVExpander Expander(*SE, *DL, "loop-predication"); + + bool Changed = false; + for (auto *Guard : Guards) + Changed |= widenGuardConditions(Guard, Expander); + + return Changed; +} Index: llvm/trunk/lib/Transforms/Scalar/Scalar.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/Scalar.cpp +++ llvm/trunk/lib/Transforms/Scalar/Scalar.cpp @@ -58,6 +58,7 @@ initializeLoopAccessLegacyAnalysisPass(Registry); initializeLoopInstSimplifyLegacyPassPass(Registry); initializeLoopInterchangePass(Registry); + initializeLoopPredicationLegacyPassPass(Registry); initializeLoopRotateLegacyPassPass(Registry); initializeLoopStrengthReducePass(Registry); initializeLoopRerollPass(Registry); Index: llvm/trunk/test/Transforms/LoopPredication/basic.ll =================================================================== --- llvm/trunk/test/Transforms/LoopPredication/basic.ll +++ llvm/trunk/test/Transforms/LoopPredication/basic.ll @@ -0,0 +1,496 @@ +; RUN: opt -S -loop-predication < %s 2>&1 | FileCheck %s +; RUN: opt -S -passes='require,loop(loop-predication)' < %s 2>&1 | FileCheck %s + +declare void @llvm.experimental.guard(i1, ...) + +define i32 @unsigned_loop_0_to_n_ult_check(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @unsigned_loop_0_to_n_ult_check +entry: + %tmp5 = icmp eq i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +loop.preheader: +; CHECK: loop.preheader: +; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 +; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], %length +; CHECK-NEXT: br label %loop + br label %loop + +loop: +; CHECK: loop: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + %within.bounds = icmp ult i32 %i, %length + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} + +define i32 @unsigned_loop_0_to_n_ugt_check(i32* %array, i32 %length, i32 %n) { +; CHECK-LABEL: @unsigned_loop_0_to_n_ugt_check +entry: + %tmp5 = icmp eq i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +loop.preheader: +; CHECK: loop.preheader: +; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 +; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], %length +; CHECK-NEXT: br label %loop + br label %loop + +loop: +; CHECK: loop: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + %within.bounds = icmp ugt i32 %length, %i + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} + + +define i32 @two_range_checks(i32* %array.1, i32 %length.1, + i32* %array.2, i32 %length.2, i32 %n) { +; CHECK-LABEL: @two_range_checks +entry: + %tmp5 = icmp eq i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +loop.preheader: +; CHECK: loop.preheader: +; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 +; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2}} +; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2}} +; CHECK-NEXT: br label %loop + br label %loop + +loop: +; CHECK: loop: +; CHECK: [[wide_cond:[^ ]+]] = and i1 [[wide_cond_1]], [[wide_cond_2]] +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + %within.bounds.1 = icmp ult i32 %i, %length.1 + %within.bounds.2 = icmp ult i32 %i, %length.2 + %within.bounds = and i1 %within.bounds.1, %within.bounds.2 + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + + %i.i64 = zext i32 %i to i64 + %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 + %array.1.i = load i32, i32* %array.1.i.ptr, align 4 + %loop.acc.1 = add i32 %loop.acc, %array.1.i + + %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 + %array.2.i = load i32, i32* %array.2.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc.1, %array.2.i + + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} + +define i32 @three_range_checks(i32* %array.1, i32 %length.1, + i32* %array.2, i32 %length.2, + i32* %array.3, i32 %length.3, i32 %n) { +; CHECK-LABEL: @three_range_checks +entry: + %tmp5 = icmp eq i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +loop.preheader: +; CHECK: loop.preheader: +; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 +; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2|3}} +; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2|3}} +; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2|3}} +; CHECK-NEXT: br label %loop + br label %loop + +loop: +; CHECK: loop: +; CHECK: [[wide_cond_and:[^ ]+]] = and i1 [[wide_cond_1]], [[wide_cond_2]] +; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[wide_cond_and]], [[wide_cond_3]] +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + %within.bounds.1 = icmp ult i32 %i, %length.1 + %within.bounds.2 = icmp ult i32 %i, %length.2 + %within.bounds.3 = icmp ult i32 %i, %length.3 + %within.bounds.1.and.2 = and i1 %within.bounds.1, %within.bounds.2 + %within.bounds = and i1 %within.bounds.1.and.2, %within.bounds.3 + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + + %i.i64 = zext i32 %i to i64 + %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 + %array.1.i = load i32, i32* %array.1.i.ptr, align 4 + %loop.acc.1 = add i32 %loop.acc, %array.1.i + + %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 + %array.2.i = load i32, i32* %array.2.i.ptr, align 4 + %loop.acc.2 = add i32 %loop.acc.1, %array.2.i + + %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64 + %array.3.i = load i32, i32* %array.3.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc.2, %array.3.i + + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} + +define i32 @three_guards(i32* %array.1, i32 %length.1, + i32* %array.2, i32 %length.2, + i32* %array.3, i32 %length.3, i32 %n) { +; CHECK-LABEL: @three_guards +entry: + %tmp5 = icmp eq i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +loop.preheader: +; CHECK: loop.preheader: +; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 +; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = icmp ult i32 [[max_index]], %length.1 +; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = icmp ult i32 [[max_index]], %length.2 +; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = icmp ult i32 [[max_index]], %length.3 +; CHECK-NEXT: br label %loop + br label %loop + +loop: +; CHECK: loop: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_1]], i32 9) [ "deopt"() ] +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_2]], i32 9) [ "deopt"() ] +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_3]], i32 9) [ "deopt"() ] + + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + + %within.bounds.1 = icmp ult i32 %i, %length.1 + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.1, i32 9) [ "deopt"() ] + + %i.i64 = zext i32 %i to i64 + %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64 + %array.1.i = load i32, i32* %array.1.i.ptr, align 4 + %loop.acc.1 = add i32 %loop.acc, %array.1.i + + %within.bounds.2 = icmp ult i32 %i, %length.2 + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.2, i32 9) [ "deopt"() ] + + %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64 + %array.2.i = load i32, i32* %array.2.i.ptr, align 4 + %loop.acc.2 = add i32 %loop.acc.1, %array.2.i + + %within.bounds.3 = icmp ult i32 %i, %length.3 + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.3, i32 9) [ "deopt"() ] + + %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64 + %array.3.i = load i32, i32* %array.3.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc.2, %array.3.i + + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} + +define i32 @signed_loop_start_to_n_sge_0_check(i32* %array, i32 %length, i32 %start, i32 %n) { +; CHECK-LABEL: @signed_loop_start_to_n_sge_0_check +entry: + %tmp5 = icmp eq i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +loop.preheader: +; CHECK: loop.preheader: +; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp sge i32 %start, 0 +; CHECK-NEXT: br label %loop + br label %loop + +loop: +; CHECK: loop: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ] + %within.bounds = icmp sge i32 %i, 0 + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + + %i.next = add nsw i32 %i, 1 + %continue = icmp slt i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} + +define i32 @signed_loop_start_to_n_upper_slt_length_check(i32* %array, i32 %length, i32 %start, i32 %n) { +; CHECK-LABEL: @signed_loop_start_to_n_upper_slt_length_check +entry: + %tmp5 = icmp sle i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +loop.preheader: +; CHECK: loop.preheader: +; CHECK: [[start_1:[^ ]+]] = add i32 %start, 1 +; CHECK-NEXT: [[n_sgt_start_1:[^ ]+]] = icmp sgt i32 %n, [[start_1]] +; CHECK-NEXT: [[smax:[^ ]+]] = select i1 [[n_sgt_start_1]], i32 %n, i32 [[start_1]] +; CHECK-NEXT: [[max_index:[^ ]+]] = add i32 [[smax]], -1 +; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp slt i32 [[max_index]], %length +; CHECK-NEXT: br label %loop + br label %loop + +loop: +; CHECK: loop: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ] + %within.bounds = icmp slt i32 %i, %length + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + + %i.next = add nsw i32 %i, 1 + %continue = icmp slt i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} + +define i32 @signed_loop_start_to_n_both_checks(i32* %array, i32 %length, i32 %start, i32 %n) { +; CHECK-LABEL: @signed_loop_start_to_n_both_checks +entry: + %tmp5 = icmp sle i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +loop.preheader: +; CHECK: loop.preheader: +; CHECK: [[lower_check:[^ ]+]] = icmp sge i32 %start, 0 +; CHECK-NEXT: [[start_1:[^ ]+]] = add i32 %start, 1 +; CHECK-NEXT: [[n_sgt_start_1:[^ ]+]] = icmp sgt i32 %n, [[start_1]] +; CHECK-NEXT: [[smax:[^ ]+]] = select i1 [[n_sgt_start_1]], i32 %n, i32 [[start_1]] +; CHECK-NEXT: [[max_index:[^ ]+]] = add i32 [[smax]], -1 +; CHECK-NEXT: [[upper_check:[^ ]+]] = icmp slt i32 [[max_index]], %length +; CHECK-NEXT: br label %loop + br label %loop + +loop: +; CHECK: loop: +; CHECK: [[wide_cond:[^ ]+]] = and i1 [[lower_check]], [[upper_check]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ] + %within.bounds.1 = icmp slt i32 %i, %length + %within.bounds.2 = icmp sge i32 %i, 0 + %within.bounds = and i1 %within.bounds.1, %within.bounds.2 + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + + %i.next = add nsw i32 %i, 1 + %continue = icmp slt i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} + +define i32 @unsigned_loop_0_to_n_unrelated_condition(i32* %array, i32 %length, i32 %n, i32 %x) { +; CHECK-LABEL: @unsigned_loop_0_to_n_unrelated_condition +entry: + %tmp5 = icmp eq i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +loop.preheader: +; CHECK: loop.preheader: +; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1 +; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], %length +; CHECK-NEXT: br label %loop + br label %loop + +loop: +; CHECK: loop: +; CHECK: %unrelated.cond = icmp ult i32 %x, %length +; CHECK: [[guard_cond:[^ ]+]] = and i1 %unrelated.cond, [[wide_cond]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[guard_cond]], i32 9) [ "deopt"() ] + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + %within.bounds = icmp ult i32 %i, %length + %unrelated.cond = icmp ult i32 %x, %length + %guard.cond = and i1 %within.bounds, %unrelated.cond + call void (i1, ...) @llvm.experimental.guard(i1 %guard.cond, i32 9) [ "deopt"() ] + + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} + +; Don't change the guard condition if there were no widened subconditions +define i32 @test_no_widened_conditions(i32* %array, i32 %length, i32 %n, i32 %x1, i32 %x2, i32 %x3) { +; CHECK-LABEL: @test_no_widened_conditions +entry: + %tmp5 = icmp eq i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +loop.preheader: +; CHECK: loop.preheader: +; CHECK-NEXT: br label %loop + br label %loop + +loop: +; CHECK: loop: +; CHECK: %unrelated.cond.1 = icmp eq i32 %x1, %i +; CHECK-NEXT: %unrelated.cond.2 = icmp eq i32 %x2, %i +; CHECK-NEXT: %unrelated.cond.3 = icmp eq i32 %x3, %i +; CHECK-NEXT: %unrelated.cond.and.1 = and i1 %unrelated.cond.1, %unrelated.cond.2 +; CHECK-NEXT: %guard.cond = and i1 %unrelated.cond.and.1, %unrelated.cond.3 +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %guard.cond, i32 9) [ "deopt"() ] + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + %unrelated.cond.1 = icmp eq i32 %x1, %i + %unrelated.cond.2 = icmp eq i32 %x2, %i + %unrelated.cond.3 = icmp eq i32 %x3, %i + %unrelated.cond.and.1 = and i1 %unrelated.cond.1, %unrelated.cond.2 + %guard.cond = and i1 %unrelated.cond.and.1, %unrelated.cond.3 + + call void (i1, ...) @llvm.experimental.guard(i1 %guard.cond, i32 9) [ "deopt"() ] + + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} + +define i32 @signed_loop_start_to_n_loop_variant_bound(i32* %array, i32 %x, i32 %start, i32 %n) { +; CHECK-LABEL: @signed_loop_start_to_n_loop_variant_bound +entry: + %tmp5 = icmp sle i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +loop.preheader: +; CHECK: loop.preheader: +; CHECK-NEXT: br label %loop + br label %loop + +loop: +; CHECK: loop: +; CHECK: %bound = add i32 %i, %x +; CHECK-NEXT: %within.bounds = icmp slt i32 %i, %bound +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ] + %bound = add i32 %i, %x + %within.bounds = icmp slt i32 %i, %bound + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + + %i.next = add nsw i32 %i, 1 + %continue = icmp slt i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} + +define i32 @signed_loop_start_to_n_non_monotonic_predicate(i32* %array, i32 %x, i32 %start, i32 %n) { +; CHECK-LABEL: @signed_loop_start_to_n_non_monotonic_predicate +entry: + %tmp5 = icmp sle i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +loop.preheader: +; CHECK: loop.preheader: +; CHECK-NEXT: br label %loop + br label %loop + +loop: +; CHECK: loop: +; CHECK: %guard.cond = icmp eq i32 %i, %x +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %guard.cond, i32 9) [ "deopt"() ] + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ] + %guard.cond = icmp eq i32 %i, %x + call void (i1, ...) @llvm.experimental.guard(i1 %guard.cond, i32 9) [ "deopt"() ] + + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + + %i.next = add nsw i32 %i, 1 + %continue = icmp slt i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} \ No newline at end of file Index: llvm/trunk/test/Transforms/LoopPredication/nested.ll =================================================================== --- llvm/trunk/test/Transforms/LoopPredication/nested.ll +++ llvm/trunk/test/Transforms/LoopPredication/nested.ll @@ -0,0 +1,160 @@ +; RUN: opt -S -loop-predication < %s 2>&1 | FileCheck %s +; RUN: opt -S -passes='require,loop(loop-predication)' < %s 2>&1 | FileCheck %s + +declare void @llvm.experimental.guard(i1, ...) + +define i32 @signed_loop_0_to_n_nested_0_to_l_inner_index_check(i32* %array, i32 %length, i32 %n, i32 %l) { +; CHECK-LABEL: @signed_loop_0_to_n_nested_0_to_l_inner_index_check +entry: + %tmp5 = icmp sle i32 %n, 0 + br i1 %tmp5, label %exit, label %outer.loop.preheader + +outer.loop.preheader: +; CHECK: outer.loop.preheader: +; CHECK: [[iteration_count:[^ ]+]] = add i32 %l, -1 + br label %outer.loop + +outer.loop: + %outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ] + %i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ] + %tmp6 = icmp sle i32 %l, 0 + br i1 %tmp6, label %outer.loop.inc, label %inner.loop.preheader + +inner.loop.preheader: +; CHECK: inner.loop.preheader: +; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[iteration_count]], %length + br label %inner.loop + +inner.loop: +; CHECK: inner.loop: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] + %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ] + %j = phi i32 [ %j.next, %inner.loop ], [ 0, %inner.loop.preheader ] + + %within.bounds = icmp slt i32 %j, %length + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + + %j.i64 = zext i32 %j to i64 + %array.j.ptr = getelementptr inbounds i32, i32* %array, i64 %j.i64 + %array.j = load i32, i32* %array.j.ptr, align 4 + %inner.loop.acc.next = add i32 %inner.loop.acc, %array.j + + %j.next = add nsw i32 %j, 1 + %inner.continue = icmp slt i32 %j.next, %l + br i1 %inner.continue, label %inner.loop, label %outer.loop.inc + +outer.loop.inc: + %outer.loop.acc.next = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %outer.loop ] + %i.next = add nsw i32 %i, 1 + %outer.continue = icmp slt i32 %i.next, %n + br i1 %outer.continue, label %outer.loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %outer.loop.acc.next, %outer.loop.inc ] + ret i32 %result +} + +define i32 @signed_loop_0_to_n_nested_0_to_l_outer_index_check(i32* %array, i32 %length, i32 %n, i32 %l) { +; CHECK-LABEL: @signed_loop_0_to_n_nested_0_to_l_outer_index_check +entry: + %tmp5 = icmp sle i32 %n, 0 + br i1 %tmp5, label %exit, label %outer.loop.preheader + +outer.loop.preheader: +; CHECK: outer.loop.preheader: +; CHECK: [[iteration_count:[^ ]+]] = add i32 %n, -1 +; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[iteration_count]], %length + br label %outer.loop + +outer.loop: + %outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ] + %i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ] + %tmp6 = icmp sle i32 %l, 0 + br i1 %tmp6, label %outer.loop.inc, label %inner.loop.preheader + +inner.loop.preheader: + br label %inner.loop + +inner.loop: +; CHECK: inner.loop: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] + + %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ] + %j = phi i32 [ %j.next, %inner.loop ], [ 0, %inner.loop.preheader ] + + %within.bounds = icmp slt i32 %i, %length + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %inner.loop.acc.next = add i32 %inner.loop.acc, %array.i + + %j.next = add nsw i32 %j, 1 + %inner.continue = icmp slt i32 %j.next, %l + br i1 %inner.continue, label %inner.loop, label %outer.loop.inc + +outer.loop.inc: + %outer.loop.acc.next = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %outer.loop ] + %i.next = add nsw i32 %i, 1 + %outer.continue = icmp slt i32 %i.next, %n + br i1 %outer.continue, label %outer.loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %outer.loop.acc.next, %outer.loop.inc ] + ret i32 %result +} + +define i32 @signed_loop_0_to_n_nested_i_to_l_inner_index_check(i32* %array, i32 %length, i32 %n, i32 %l) { +; CHECK-LABEL: @signed_loop_0_to_n_nested_i_to_l_inner_index_check +entry: + %tmp5 = icmp sle i32 %n, 0 + br i1 %tmp5, label %exit, label %outer.loop.preheader + +outer.loop.preheader: + br label %outer.loop + +outer.loop: +; CHECK: outer.loop: +; CHECK: [[i_1:[^ ]+]] = add i32 %i, 1 +; CHECK-NEXT: [[l_sgt_i_1:[^ ]+]] = icmp sgt i32 %l, [[i_1]] +; CHECK-NEXT: [[smax:[^ ]+]] = select i1 [[l_sgt_i_1]], i32 %l, i32 [[i_1]] +; CHECK-NEXT: [[max_j:[^ ]+]] = add i32 [[smax]], -1 + %outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ] + %i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ] + %tmp6 = icmp sle i32 %l, 0 + br i1 %tmp6, label %outer.loop.inc, label %inner.loop.preheader + +inner.loop.preheader: +; CHECK: inner.loop.preheader: +; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[max_j]], %length + br label %inner.loop + +inner.loop: +; CHECK: inner.loop: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ] + %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ] + %j = phi i32 [ %j.next, %inner.loop ], [ %i, %inner.loop.preheader ] + + %within.bounds = icmp slt i32 %j, %length + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + + %j.i64 = zext i32 %j to i64 + %array.j.ptr = getelementptr inbounds i32, i32* %array, i64 %j.i64 + %array.j = load i32, i32* %array.j.ptr, align 4 + %inner.loop.acc.next = add i32 %inner.loop.acc, %array.j + + %j.next = add nsw i32 %j, 1 + %inner.continue = icmp slt i32 %j.next, %l + br i1 %inner.continue, label %inner.loop, label %outer.loop.inc + +outer.loop.inc: + %outer.loop.acc.next = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %outer.loop ] + %i.next = add nsw i32 %i, 1 + %outer.continue = icmp slt i32 %i.next, %n + br i1 %outer.continue, label %outer.loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %outer.loop.acc.next, %outer.loop.inc ] + ret i32 %result +} \ No newline at end of file Index: llvm/trunk/test/Transforms/LoopPredication/visited.ll =================================================================== --- llvm/trunk/test/Transforms/LoopPredication/visited.ll +++ llvm/trunk/test/Transforms/LoopPredication/visited.ll @@ -0,0 +1,140 @@ +; RUN: opt -S -loop-predication < %s 2>&1 | FileCheck %s +; RUN: opt -S -passes='require,loop(loop-predication)' < %s 2>&1 | FileCheck %s + +declare void @llvm.experimental.guard(i1, ...) + +define i32 @test_visited(i32* %array, i32 %length, i32 %n, i32 %x) { +; CHECK-LABEL: @test_visited +entry: + %tmp5 = icmp eq i32 %n, 0 + br i1 %tmp5, label %exit, label %loop.preheader + +loop.preheader: +; CHECK: loop.preheader: +; CHECK: [[iteration_count:[^ ]+]] = add i32 %n, -1 +; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[iteration_count]], %length +; CHECK-NEXT: br label %loop + br label %loop + +loop: +; CHECK: loop: +; CHECK: %unrelated.cond = icmp eq i32 %x, %i +; CHECK: [[guard_cond:[^ ]+]] = and i1 %unrelated.cond, [[wide_cond]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[guard_cond]], i32 9) [ "deopt"() ] + %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ] + %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ] + %within.bounds = icmp ult i32 %i, %length + %unrelated.cond = icmp eq i32 %x, %i + %guard.cond.2 = and i1 %within.bounds, %unrelated.cond + %guard.cond.3 = and i1 %guard.cond.2, %unrelated.cond + %guard.cond.4 = and i1 %guard.cond.3, %guard.cond.2 + %guard.cond.5 = and i1 %guard.cond.4, %guard.cond.3 + %guard.cond.6 = and i1 %guard.cond.5, %guard.cond.4 + %guard.cond.7 = and i1 %guard.cond.6, %guard.cond.5 + %guard.cond.8 = and i1 %guard.cond.7, %guard.cond.6 + %guard.cond.9 = and i1 %guard.cond.8, %guard.cond.7 + %guard.cond.10 = and i1 %guard.cond.9, %guard.cond.8 + %guard.cond.11 = and i1 %guard.cond.10, %guard.cond.9 + %guard.cond.12 = and i1 %guard.cond.11, %guard.cond.10 + %guard.cond.13 = and i1 %guard.cond.12, %guard.cond.11 + %guard.cond.14 = and i1 %guard.cond.13, %guard.cond.12 + %guard.cond.15 = and i1 %guard.cond.14, %guard.cond.13 + %guard.cond.16 = and i1 %guard.cond.15, %guard.cond.14 + %guard.cond.17 = and i1 %guard.cond.16, %guard.cond.15 + %guard.cond.18 = and i1 %guard.cond.17, %guard.cond.16 + %guard.cond.19 = and i1 %guard.cond.18, %guard.cond.17 + %guard.cond.20 = and i1 %guard.cond.19, %guard.cond.18 + %guard.cond.21 = and i1 %guard.cond.20, %guard.cond.19 + %guard.cond.22 = and i1 %guard.cond.21, %guard.cond.20 + %guard.cond.23 = and i1 %guard.cond.22, %guard.cond.21 + %guard.cond.24 = and i1 %guard.cond.23, %guard.cond.22 + %guard.cond.25 = and i1 %guard.cond.24, %guard.cond.23 + %guard.cond.26 = and i1 %guard.cond.25, %guard.cond.24 + %guard.cond.27 = and i1 %guard.cond.26, %guard.cond.25 + %guard.cond.28 = and i1 %guard.cond.27, %guard.cond.26 + %guard.cond.29 = and i1 %guard.cond.28, %guard.cond.27 + %guard.cond.30 = and i1 %guard.cond.29, %guard.cond.28 + %guard.cond.31 = and i1 %guard.cond.30, %guard.cond.29 + %guard.cond.32 = and i1 %guard.cond.31, %guard.cond.30 + %guard.cond.33 = and i1 %guard.cond.32, %guard.cond.31 + %guard.cond.34 = and i1 %guard.cond.33, %guard.cond.32 + %guard.cond.35 = and i1 %guard.cond.34, %guard.cond.33 + %guard.cond.36 = and i1 %guard.cond.35, %guard.cond.34 + %guard.cond.37 = and i1 %guard.cond.36, %guard.cond.35 + %guard.cond.38 = and i1 %guard.cond.37, %guard.cond.36 + %guard.cond.39 = and i1 %guard.cond.38, %guard.cond.37 + %guard.cond.40 = and i1 %guard.cond.39, %guard.cond.38 + %guard.cond.41 = and i1 %guard.cond.40, %guard.cond.39 + %guard.cond.42 = and i1 %guard.cond.41, %guard.cond.40 + %guard.cond.43 = and i1 %guard.cond.42, %guard.cond.41 + %guard.cond.44 = and i1 %guard.cond.43, %guard.cond.42 + %guard.cond.45 = and i1 %guard.cond.44, %guard.cond.43 + %guard.cond.46 = and i1 %guard.cond.45, %guard.cond.44 + %guard.cond.47 = and i1 %guard.cond.46, %guard.cond.45 + %guard.cond.48 = and i1 %guard.cond.47, %guard.cond.46 + %guard.cond.49 = and i1 %guard.cond.48, %guard.cond.47 + %guard.cond.50 = and i1 %guard.cond.49, %guard.cond.48 + %guard.cond.51 = and i1 %guard.cond.50, %guard.cond.49 + %guard.cond.52 = and i1 %guard.cond.51, %guard.cond.50 + %guard.cond.53 = and i1 %guard.cond.52, %guard.cond.51 + %guard.cond.54 = and i1 %guard.cond.53, %guard.cond.52 + %guard.cond.55 = and i1 %guard.cond.54, %guard.cond.53 + %guard.cond.56 = and i1 %guard.cond.55, %guard.cond.54 + %guard.cond.57 = and i1 %guard.cond.56, %guard.cond.55 + %guard.cond.58 = and i1 %guard.cond.57, %guard.cond.56 + %guard.cond.59 = and i1 %guard.cond.58, %guard.cond.57 + %guard.cond.60 = and i1 %guard.cond.59, %guard.cond.58 + %guard.cond.61 = and i1 %guard.cond.60, %guard.cond.59 + %guard.cond.62 = and i1 %guard.cond.61, %guard.cond.60 + %guard.cond.63 = and i1 %guard.cond.62, %guard.cond.61 + %guard.cond.64 = and i1 %guard.cond.63, %guard.cond.62 + %guard.cond.65 = and i1 %guard.cond.64, %guard.cond.63 + %guard.cond.66 = and i1 %guard.cond.65, %guard.cond.64 + %guard.cond.67 = and i1 %guard.cond.66, %guard.cond.65 + %guard.cond.68 = and i1 %guard.cond.67, %guard.cond.66 + %guard.cond.69 = and i1 %guard.cond.68, %guard.cond.67 + %guard.cond.70 = and i1 %guard.cond.69, %guard.cond.68 + %guard.cond.71 = and i1 %guard.cond.70, %guard.cond.69 + %guard.cond.72 = and i1 %guard.cond.71, %guard.cond.70 + %guard.cond.73 = and i1 %guard.cond.72, %guard.cond.71 + %guard.cond.74 = and i1 %guard.cond.73, %guard.cond.72 + %guard.cond.75 = and i1 %guard.cond.74, %guard.cond.73 + %guard.cond.76 = and i1 %guard.cond.75, %guard.cond.74 + %guard.cond.77 = and i1 %guard.cond.76, %guard.cond.75 + %guard.cond.78 = and i1 %guard.cond.77, %guard.cond.76 + %guard.cond.79 = and i1 %guard.cond.78, %guard.cond.77 + %guard.cond.80 = and i1 %guard.cond.79, %guard.cond.78 + %guard.cond.81 = and i1 %guard.cond.80, %guard.cond.79 + %guard.cond.82 = and i1 %guard.cond.81, %guard.cond.80 + %guard.cond.83 = and i1 %guard.cond.82, %guard.cond.81 + %guard.cond.84 = and i1 %guard.cond.83, %guard.cond.82 + %guard.cond.85 = and i1 %guard.cond.84, %guard.cond.83 + %guard.cond.86 = and i1 %guard.cond.85, %guard.cond.84 + %guard.cond.87 = and i1 %guard.cond.86, %guard.cond.85 + %guard.cond.88 = and i1 %guard.cond.87, %guard.cond.86 + %guard.cond.89 = and i1 %guard.cond.88, %guard.cond.87 + %guard.cond.90 = and i1 %guard.cond.89, %guard.cond.88 + %guard.cond.91 = and i1 %guard.cond.90, %guard.cond.89 + %guard.cond.92 = and i1 %guard.cond.91, %guard.cond.90 + %guard.cond.93 = and i1 %guard.cond.92, %guard.cond.91 + %guard.cond.94 = and i1 %guard.cond.93, %guard.cond.92 + %guard.cond.95 = and i1 %guard.cond.94, %guard.cond.93 + %guard.cond.96 = and i1 %guard.cond.95, %guard.cond.94 + %guard.cond.97 = and i1 %guard.cond.96, %guard.cond.95 + %guard.cond.98 = and i1 %guard.cond.97, %guard.cond.96 + %guard.cond.99 = and i1 %guard.cond.98, %guard.cond.97 + call void (i1, ...) @llvm.experimental.guard(i1 %guard.cond.99, i32 9) [ "deopt"() ] + + %i.i64 = zext i32 %i to i64 + %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64 + %array.i = load i32, i32* %array.i.ptr, align 4 + %loop.acc.next = add i32 %loop.acc, %array.i + + %i.next = add nuw i32 %i, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] + ret i32 %result +} \ No newline at end of file