Index: llvm/trunk/include/llvm/InitializePasses.h =================================================================== --- llvm/trunk/include/llvm/InitializePasses.h +++ llvm/trunk/include/llvm/InitializePasses.h @@ -142,6 +142,7 @@ void initializeIPSCCPPass(PassRegistry&); void initializeIVUsersPass(PassRegistry&); void initializeIfConverterPass(PassRegistry&); +void initializeInductiveRangeCheckEliminationPass(PassRegistry&); void initializeIndVarSimplifyPass(PassRegistry&); void initializeInlineCostAnalysisPass(PassRegistry&); void initializeInstCombinerPass(PassRegistry&); Index: llvm/trunk/include/llvm/LinkAllPasses.h =================================================================== --- llvm/trunk/include/llvm/LinkAllPasses.h +++ llvm/trunk/include/llvm/LinkAllPasses.h @@ -86,6 +86,7 @@ (void) llvm::createGlobalsModRefPass(); (void) llvm::createIPConstantPropagationPass(); (void) llvm::createIPSCCPPass(); + (void) llvm::createInductiveRangeCheckEliminationPass(); (void) llvm::createIndVarSimplifyPass(); (void) llvm::createInstructionCombiningPass(); (void) llvm::createInternalizePass(); Index: llvm/trunk/include/llvm/Transforms/Scalar.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Scalar.h +++ llvm/trunk/include/llvm/Transforms/Scalar.h @@ -98,6 +98,13 @@ //===----------------------------------------------------------------------===// // +// InductiveRangeCheckElimination - Transform loops to elide range checks on +// linear functions of the induction variable. +// +Pass *createInductiveRangeCheckEliminationPass(); + +//===----------------------------------------------------------------------===// +// // InductionVariableSimplify - Transform induction variables in a program to all // use a single canonical induction variable per loop. // Index: llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt +++ llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt @@ -9,6 +9,7 @@ EarlyCSE.cpp FlattenCFGPass.cpp GVN.cpp + InductiveRangeCheckElimination.cpp IndVarSimplify.cpp JumpThreading.cpp LICM.cpp Index: llvm/trunk/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ llvm/trunk/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -0,0 +1,1189 @@ +//===-- InductiveRangeCheckElimination.cpp - ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// The InductiveRangeCheckElimination pass splits a loop's iteration space into +// three disjoint ranges. It does that in a way such that the loop running in +// the middle loop provably does not need range checks. As an example, it will +// convert +// +// len = < known positive > +// for (i = 0; i < n; i++) { +// if (0 <= i && i < len) { +// do_something(); +// } else { +// throw_out_of_bounds(); +// } +// } +// +// to +// +// len = < known positive > +// limit = smin(n, len) +// // no first segment +// for (i = 0; i < limit; i++) { +// if (0 <= i && i < len) { // this check is fully redundant +// do_something(); +// } else { +// throw_out_of_bounds(); +// } +// } +// for (i = limit; i < n; i++) { +// if (0 <= i && i < len) { +// do_something(); +// } else { +// throw_out_of_bounds(); +// } +// } +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Optional.h" + +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ValueTracking.h" + +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/IR/Verifier.h" + +#include "llvm/Support/Debug.h" + +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/SimplifyIndVar.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" + +#include "llvm/Pass.h" + +#include + +using namespace llvm; + +cl::opt LoopSizeCutoff("irce-loop-size-cutoff", cl::Hidden, + cl::init(64)); + +cl::opt PrintChangedLoops("irce-print-changed-loops", cl::Hidden, + cl::init(false)); + +#define DEBUG_TYPE "irce" + +namespace { + +/// An inductive range check is conditional branch in a loop with +/// +/// 1. a very cold successor (i.e. the branch jumps to that successor very +/// rarely) +/// +/// and +/// +/// 2. a condition that is provably true for some range of values taken by the +/// containing loop's induction variable. +/// +/// Currently all inductive range checks are branches conditional on an +/// expression of the form +/// +/// 0 <= (Offset + Scale * I) < Length +/// +/// where `I' is the canonical induction variable of a loop to which Offset and +/// Scale are loop invariant, and Length is >= 0. Currently the 'false' branch +/// is considered cold, looking at profiling data to verify that is a TODO. + +class InductiveRangeCheck { + const SCEV *Offset = nullptr; + const SCEV *Scale = nullptr; + Value *Length = nullptr; + BranchInst *Branch = nullptr; + + InductiveRangeCheck() {} + +public: + const SCEV *getOffset() const { return Offset; } + const SCEV *getScale() const { return Scale; } + Value *getLength() const { return Length; } + + void print(raw_ostream &OS) const { + OS << "InductiveRangeCheck:\n"; + OS << " Offset: "; + Offset->print(OS); + OS << " Scale: "; + Scale->print(OS); + OS << " Length: "; + Length->print(OS); + OS << " Branch: "; + getBranch()->print(OS); + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + void dump() { + print(dbgs()); + } +#endif + + BranchInst *getBranch() const { return Branch; } + + /// Represents an integer range [Range.first, Range.second). If Range.second + /// < Range.first, then the value denotes the empty range. + typedef std::pair Range; + typedef SpecificBumpPtrAllocator AllocatorTy; + + /// This is the value the condition of the branch needs to evaluate to for the + /// branch to take the hot successor (see (1) above). + bool getPassingDirection() { return true; } + + /// Computes a range for the induction variable in which the range check is + /// redundant and can be constant-folded away. + Optional computeSafeIterationSpace(ScalarEvolution &SE, + IRBuilder<> &B) const; + + /// Create an inductive range check out of BI if possible, else return + /// nullptr. + static InductiveRangeCheck *create(AllocatorTy &Alloc, BranchInst *BI, + Loop *L, ScalarEvolution &SE); +}; + +class InductiveRangeCheckElimination : public LoopPass { + InductiveRangeCheck::AllocatorTy Allocator; + +public: + static char ID; + InductiveRangeCheckElimination() : LoopPass(ID) { + initializeInductiveRangeCheckEliminationPass( + *PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequiredID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addRequired(); + } + + bool runOnLoop(Loop *L, LPPassManager &LPM) override; +}; + +char InductiveRangeCheckElimination::ID = 0; +} + +INITIALIZE_PASS(InductiveRangeCheckElimination, "irce", + "Inductive range check elimination", false, false) + +static bool IsLowerBoundCheck(Value *Check, Value *&IndexV) { + using namespace llvm::PatternMatch; + + ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; + Value *LHS = nullptr, *RHS = nullptr; + + if (!match(Check, m_ICmp(Pred, m_Value(LHS), m_Value(RHS)))) + return false; + + switch (Pred) { + default: + return false; + + case ICmpInst::ICMP_SLE: + std::swap(LHS, RHS); + // fallthrough + case ICmpInst::ICMP_SGE: + if (!match(RHS, m_ConstantInt<0>())) + return false; + IndexV = LHS; + return true; + + case ICmpInst::ICMP_SLT: + std::swap(LHS, RHS); + // fallthrough + case ICmpInst::ICMP_SGT: + if (!match(RHS, m_ConstantInt<-1>())) + return false; + IndexV = LHS; + return true; + } +} + +static bool IsUpperBoundCheck(Value *Check, Value *Index, Value *&UpperLimit) { + using namespace llvm::PatternMatch; + + ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; + Value *LHS = nullptr, *RHS = nullptr; + + if (!match(Check, m_ICmp(Pred, m_Value(LHS), m_Value(RHS)))) + return false; + + switch (Pred) { + default: + return false; + + case ICmpInst::ICMP_SGT: + std::swap(LHS, RHS); + // fallthrough + case ICmpInst::ICMP_SLT: + if (LHS != Index) + return false; + UpperLimit = RHS; + return true; + + case ICmpInst::ICMP_UGT: + std::swap(LHS, RHS); + // fallthrough + case ICmpInst::ICMP_ULT: + if (LHS != Index) + return false; + UpperLimit = RHS; + return true; + } +} + +/// Split a condition into something semantically equivalent to (0 <= I < +/// Limit), both comparisons signed and Len loop invariant on L and positive. +/// On success, return true and set Index to I and UpperLimit to Limit. Return +/// false on failure (we may still write to UpperLimit and Index on failure). +/// It does not try to interpret I as a loop index. +/// +static bool SplitRangeCheckCondition(Loop *L, ScalarEvolution &SE, + Value *Condition, const SCEV *&Index, + Value *&UpperLimit) { + + // TODO: currently this catches some silly cases like comparing "%idx slt 1". + // Our transformations are still correct, but less likely to be profitable in + // those cases. We have to come up with some heuristics that pick out the + // range checks that are more profitable to clone a loop for. This function + // in general can be made more robust. + + using namespace llvm::PatternMatch; + + Value *A = nullptr; + Value *B = nullptr; + ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; + + // In these early checks we assume that the matched UpperLimit is positive. + // We'll verify that fact later, before returning true. + + if (match(Condition, m_And(m_Value(A), m_Value(B)))) { + Value *IndexV = nullptr; + Value *ExpectedUpperBoundCheck = nullptr; + + if (IsLowerBoundCheck(A, IndexV)) + ExpectedUpperBoundCheck = B; + else if (IsLowerBoundCheck(B, IndexV)) + ExpectedUpperBoundCheck = A; + else + return false; + + if (!IsUpperBoundCheck(ExpectedUpperBoundCheck, IndexV, UpperLimit)) + return false; + + Index = SE.getSCEV(IndexV); + + if (isa(Index)) + return false; + + } else if (match(Condition, m_ICmp(Pred, m_Value(A), m_Value(B)))) { + switch (Pred) { + default: + return false; + + case ICmpInst::ICMP_SGT: + std::swap(A, B); + // fall through + case ICmpInst::ICMP_SLT: + UpperLimit = B; + Index = SE.getSCEV(A); + if (isa(Index) || !SE.isKnownNonNegative(Index)) + return false; + break; + + case ICmpInst::ICMP_UGT: + std::swap(A, B); + // fall through + case ICmpInst::ICMP_ULT: + UpperLimit = B; + Index = SE.getSCEV(A); + if (isa(Index)) + return false; + break; + } + } else { + return false; + } + + const SCEV *UpperLimitSCEV = SE.getSCEV(UpperLimit); + if (isa(UpperLimitSCEV) || + !SE.isKnownNonNegative(UpperLimitSCEV)) + return false; + + if (SE.getLoopDisposition(UpperLimitSCEV, L) != + ScalarEvolution::LoopInvariant) { + DEBUG(dbgs() << " in function: " << L->getHeader()->getParent()->getName() + << " "; + dbgs() << " UpperLimit is not loop invariant: " + << UpperLimit->getName() << "\n";); + return false; + } + + return true; +} + +InductiveRangeCheck * +InductiveRangeCheck::create(InductiveRangeCheck::AllocatorTy &A, BranchInst *BI, + Loop *L, ScalarEvolution &SE) { + + if (BI->isUnconditional() || BI->getParent() == L->getLoopLatch()) + return nullptr; + + Value *Length = nullptr; + const SCEV *IndexSCEV = nullptr; + + if (!SplitRangeCheckCondition(L, SE, BI->getCondition(), IndexSCEV, Length)) + return nullptr; + + assert(IndexSCEV && Length && "contract with SplitRangeCheckCondition!"); + + const SCEVAddRecExpr *IndexAddRec = dyn_cast(IndexSCEV); + bool IsAffineIndex = + IndexAddRec && (IndexAddRec->getLoop() == L) && IndexAddRec->isAffine(); + + if (!IsAffineIndex) + return nullptr; + + InductiveRangeCheck *IRC = new (A.Allocate()) InductiveRangeCheck; + IRC->Length = Length; + IRC->Offset = IndexAddRec->getStart(); + IRC->Scale = IndexAddRec->getStepRecurrence(SE); + IRC->Branch = BI; + return IRC; +} + +static Value *MaybeSimplify(Value *V) { + if (Instruction *I = dyn_cast(V)) + if (Value *Simplified = SimplifyInstruction(I)) + return Simplified; + return V; +} + +static Value *ConstructSMinOf(Value *X, Value *Y, IRBuilder<> &B) { + return MaybeSimplify(B.CreateSelect(B.CreateICmpSLT(X, Y), X, Y)); +}; + +static Value *ConstructSMaxOf(Value *X, Value *Y, IRBuilder<> &B) { + return MaybeSimplify(B.CreateSelect(B.CreateICmpSGT(X, Y), X, Y)); +}; + +namespace { + +/// This class is used to constrain loops to run within a given iteration space. +/// The algorithm this class implements is given a Loop and a range [Begin, +/// End). The algorithm then tries to break out a "main loop" out of the loop +/// it is given in a way that the "main loop" runs with the induction variable +/// in a subset of [Begin, End). The algorithm emits appropriate pre and post +/// loops to run any remaining iterations. The pre loop runs any iterations in +/// which the induction variable is < Begin, and the post loop runs any +/// iterations in which the induction variable is >= End. +/// +class LoopConstrainer { + + // Keeps track of the structure of a loop. This is similar to llvm::Loop, + // except that it is more lightweight and can track the state of a loop + // through changing and potentially invalid IR. This structure also + // formalizes the kinds of loops we can deal with -- ones that have a single + // latch that is also an exiting block *and* have a canonical induction + // variable. + struct LoopStructure { + const char *Tag = ""; + + BasicBlock *Header = nullptr; + BasicBlock *Latch = nullptr; + + // `Latch's terminator instruction is `LatchBr', and it's `LatchBrExitIdx'th + // successor is `LatchExit', the exit block of the loop. + BranchInst *LatchBr = nullptr; + BasicBlock *LatchExit = nullptr; + unsigned LatchBrExitIdx = -1; + + // The canonical induction variable. It's value is `CIVStart` on the 0th + // itertion and `CIVNext` for all iterations after that. + PHINode *CIV = nullptr; + Value *CIVStart = nullptr; + Value *CIVNext = nullptr; + + template LoopStructure map(M Map) const { + LoopStructure Result; + Result.Tag = Tag; + Result.Header = cast(Map(Header)); + Result.Latch = cast(Map(Latch)); + Result.LatchBr = cast(Map(LatchBr)); + Result.LatchExit = cast(Map(LatchExit)); + Result.LatchBrExitIdx = LatchBrExitIdx; + Result.CIV = cast(Map(CIV)); + Result.CIVNext = Map(CIVNext); + Result.CIVStart = Map(CIVStart); + return Result; + } + }; + + // The representation of a clone of the original loop we started out with. + struct ClonedLoop { + // The cloned blocks + std::vector Blocks; + + // `Map` maps values in the clonee into values in the cloned version + ValueToValueMapTy Map; + + // An instance of `LoopStructure` for the cloned loop + LoopStructure Structure; + }; + + // Result of rewriting the range of a loop. See changeIterationSpaceEnd for + // more details on what these fields mean. + struct RewrittenRangeInfo { + BasicBlock *PseudoExit = nullptr; + BasicBlock *ExitSelector = nullptr; + std::vector PHIValuesAtPseudoExit; + }; + + // Calculated subranges we restrict the iteration space of the main loop to. + // See the implementation of `calculateSubRanges' for more details on how + // these fields are computed. `ExitPreLoopAt' is `None' if we don't need a + // pre loop. `ExitMainLoopAt' is `None' if we don't need a post loop. + struct SubRanges { + Optional ExitPreLoopAt; + Optional ExitMainLoopAt; + }; + + // Some global state. + Function *F = nullptr; + LLVMContext &Ctx; + ScalarEvolution &SE; + + // Information about the original loop we started out with. + Loop *OriginalLoop = nullptr; + LoopInfo *OriginalLoopInfo = nullptr; + const SCEV *LatchTakenCount = nullptr; + BasicBlock *OriginalPreheader = nullptr; + Value *OriginalHeaderCount = nullptr; + + // The range we need to run the main loop in. + InductiveRangeCheck::Range Range; + + // The structure of the main loop (see comment at the beginning of this class + // for a definition) + LoopStructure MainLoopStructure; + + // The preheader of the main loop. This may or may not be different from + // `OriginalPreheader'. + BasicBlock *MainLoopPreheader = nullptr; + + // A utility function that does a `replaceUsesOfWith' on the incoming block + // set of a `PHINode' -- replaces instances of `Block' in the `PHINode's + // incoming block list with `ReplaceBy'. + static void replacePHIBlock(PHINode *PN, BasicBlock *Block, + BasicBlock *ReplaceBy); + + // Try to "parse" `OriginalLoop' and populate the various out parameters. + // Returns true on success, false on failure. + // + bool recognizeLoop(LoopStructure &LoopStructureOut, + const SCEV *&LatchCountOut, BasicBlock *&PreHeaderOut, + const char *&FailureReasonOut) const; + + // Compute a safe set of limits for the main loop to run in -- effectively the + // intersection of `Range' and the iteration space of the original loop. + // Return the header count (1 + the latch taken count) in `HeaderCount'. + // + SubRanges calculateSubRanges(Value *&HeaderCount) const; + + // Clone `OriginalLoop' and return the result in CLResult. The IR after + // running `cloneLoop' is well formed except for the PHI nodes in CLResult -- + // the PHI nodes say that there is an incoming edge from `OriginalPreheader` + // but there is no such edge. + // + void cloneLoop(ClonedLoop &CLResult, const char *Tag) const; + + // Rewrite the iteration space of the loop denoted by (LS, Preheader). The + // iteration space of the rewritten loop ends at ExitLoopAt. The start of the + // iteration space is not changed. `ExitLoopAt' is assumed to be slt + // `OriginalHeaderCount'. + // + // If there are iterations left to execute, control is made to jump to + // `ContinuationBlock', otherwise they take the normal loop exit. The + // returned `RewrittenRangeInfo' object is populated as follows: + // + // .PseudoExit is a basic block that unconditionally branches to + // `ContinuationBlock'. + // + // .ExitSelector is a basic block that decides, on exit from the loop, + // whether to branch to the "true" exit or to `PseudoExit'. + // + // .PHIValuesAtPseudoExit are PHINodes in `PseudoExit' that compute the value + // for each PHINode in the loop header on taking the pseudo exit. + // + // After changeIterationSpaceEnd, `Preheader' is no longer a legitimate + // preheader because it is made to branch to the loop header only + // conditionally. + // + RewrittenRangeInfo + changeIterationSpaceEnd(const LoopStructure &LS, BasicBlock *Preheader, + Value *ExitLoopAt, + BasicBlock *ContinuationBlock) const; + + // The loop denoted by `LS' has `OldPreheader' as its preheader. This + // function creates a new preheader for `LS' and returns it. + // + BasicBlock *createPreheader(const LoopConstrainer::LoopStructure &LS, + BasicBlock *OldPreheader, const char *Tag) const; + + // `ContinuationBlockAndPreheader' was the continuation block for some call to + // `changeIterationSpaceEnd' and is the preheader to the loop denoted by `LS'. + // This function rewrites the PHI nodes in `LS.Header' to start with the + // correct value. + void rewriteIncomingValuesForPHIs( + LoopConstrainer::LoopStructure &LS, + BasicBlock *ContinuationBlockAndPreheader, + const LoopConstrainer::RewrittenRangeInfo &RRI) const; + + // Even though we do not preserve any passes at this time, we at least need to + // keep the parent loop structure consistent. The `LPPassManager' seems to + // verify this after running a loop pass. This function adds the list of + // blocks denoted by the iterator range [BlocksBegin, BlocksEnd) to this loops + // parent loop if required. + template + void addToParentLoopIfNeeded(IteratorTy BlocksBegin, IteratorTy BlocksEnd); + +public: + LoopConstrainer(Loop *L, LoopInfo *LI, ScalarEvolution &SE, + InductiveRangeCheck::Range R) + : F(L->getHeader()->getParent()), Ctx(F->getContext()), SE(SE), + OriginalLoop(L), OriginalLoopInfo(LI), Range(R) {} + + // Entry point for the algorithm. Returns true on success. + bool run(); +}; +}; + +void LoopConstrainer::replacePHIBlock(PHINode *PN, BasicBlock *Block, + BasicBlock *ReplaceBy) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingBlock(i) == Block) + PN->setIncomingBlock(i, ReplaceBy); +} + +bool LoopConstrainer::recognizeLoop(LoopStructure &LoopStructureOut, + const SCEV *&LatchCountOut, + BasicBlock *&PreheaderOut, + const char *&FailureReason) const { + using namespace llvm::PatternMatch; + + assert(OriginalLoop->isLoopSimplifyForm() && + "should follow from addRequired<>"); + + BasicBlock *Latch = OriginalLoop->getLoopLatch(); + if (!OriginalLoop->isLoopExiting(Latch)) { + FailureReason = "no loop latch"; + return false; + } + + PHINode *CIV = OriginalLoop->getCanonicalInductionVariable(); + if (!CIV) { + FailureReason = "no CIV"; + return false; + } + + BasicBlock *Header = OriginalLoop->getHeader(); + BasicBlock *Preheader = OriginalLoop->getLoopPreheader(); + if (!Preheader) { + FailureReason = "no preheader"; + return false; + } + + Value *CIVNext = CIV->getIncomingValueForBlock(Latch); + Value *CIVStart = CIV->getIncomingValueForBlock(Preheader); + + const SCEV *LatchCount = SE.getExitCount(OriginalLoop, Latch); + if (isa(LatchCount)) { + FailureReason = "could not compute latch count"; + return false; + } + + // While SCEV does most of the analysis for us, we still have to + // modify the latch; and currently we can only deal with certain + // kinds of latches. This can be made more sophisticated as needed. + + BranchInst *LatchBr = dyn_cast(&*Latch->rbegin()); + + if (!LatchBr || LatchBr->isUnconditional()) { + FailureReason = "latch terminator not conditional branch"; + return false; + } + + // Currently we only support a latch condition of the form: + // + // %condition = icmp slt %civNext, %limit + // br i1 %condition, label %header, label %exit + + if (LatchBr->getSuccessor(0) != Header) { + FailureReason = "unknown latch form (header not first successor)"; + return false; + } + + Value *CIVComparedTo = nullptr; + ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; + if (!(match(LatchBr->getCondition(), + m_ICmp(Pred, m_Specific(CIVNext), m_Value(CIVComparedTo))) && + Pred == ICmpInst::ICMP_SLT)) { + FailureReason = "unknown latch form (not slt)"; + return false; + } + + const SCEV *CIVComparedToSCEV = SE.getSCEV(CIVComparedTo); + if (isa(CIVComparedToSCEV)) { + FailureReason = "could not relate CIV to latch expression"; + return false; + } + + const SCEV *ShouldBeOne = SE.getMinusSCEV(CIVComparedToSCEV, LatchCount); + const SCEVConstant *SCEVOne = dyn_cast(ShouldBeOne); + if (!SCEVOne || SCEVOne->getValue()->getValue() != 1) { + FailureReason = "unexpected header count in latch"; + return false; + } + + unsigned LatchBrExitIdx = 1; + BasicBlock *LatchExit = LatchBr->getSuccessor(LatchBrExitIdx); + + assert(SE.getLoopDisposition(LatchCount, OriginalLoop) == + ScalarEvolution::LoopInvariant && + "loop variant exit count doesn't make sense!"); + + assert(!OriginalLoop->contains(LatchExit) && "expected an exit block!"); + + LoopStructureOut.Tag = "main"; + LoopStructureOut.Header = Header; + LoopStructureOut.Latch = Latch; + LoopStructureOut.LatchBr = LatchBr; + LoopStructureOut.LatchExit = LatchExit; + LoopStructureOut.LatchBrExitIdx = LatchBrExitIdx; + LoopStructureOut.CIV = CIV; + LoopStructureOut.CIVNext = CIVNext; + LoopStructureOut.CIVStart = CIVStart; + + LatchCountOut = LatchCount; + PreheaderOut = Preheader; + FailureReason = nullptr; + + return true; +} + +LoopConstrainer::SubRanges +LoopConstrainer::calculateSubRanges(Value *&HeaderCountOut) const { + IntegerType *Ty = cast(LatchTakenCount->getType()); + + SCEVExpander Expander(SE, "irce"); + Instruction *InsertPt = OriginalPreheader->getTerminator(); + + Value *LatchCountV = + MaybeSimplify(Expander.expandCodeFor(LatchTakenCount, Ty, InsertPt)); + + IRBuilder<> B(InsertPt); + + LoopConstrainer::SubRanges Result; + + // I think we can be more aggressive here and make this nuw / nsw if the + // addition that feeds into the icmp for the latch's terminating branch is nuw + // / nsw. In any case, a wrapping 2's complement addition is safe. + ConstantInt *One = ConstantInt::get(Ty, 1); + HeaderCountOut = MaybeSimplify(B.CreateAdd(LatchCountV, One, "header.count")); + + const SCEV *RangeBegin = SE.getSCEV(Range.first); + const SCEV *RangeEnd = SE.getSCEV(Range.second); + const SCEV *HeaderCountSCEV = SE.getSCEV(HeaderCountOut); + const SCEV *Zero = SE.getConstant(Ty, 0); + + // In some cases we can prove that we don't need a pre or post loop + + bool ProvablyNoPreloop = + SE.isKnownPredicate(ICmpInst::ICMP_SLE, RangeBegin, Zero); + if (!ProvablyNoPreloop) + Result.ExitPreLoopAt = ConstructSMinOf(HeaderCountOut, Range.first, B); + + bool ProvablyNoPostLoop = + SE.isKnownPredicate(ICmpInst::ICMP_SLE, HeaderCountSCEV, RangeEnd); + if (!ProvablyNoPostLoop) + Result.ExitMainLoopAt = ConstructSMinOf(HeaderCountOut, Range.second, B); + + return Result; +} + +void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result, + const char *Tag) const { + for (BasicBlock *BB : OriginalLoop->getBlocks()) { + BasicBlock *Clone = CloneBasicBlock(BB, Result.Map, Twine(".") + Tag, F); + Result.Blocks.push_back(Clone); + Result.Map[BB] = Clone; + } + + auto GetClonedValue = [&Result](Value *V) { + assert(V && "null values not in domain!"); + auto It = Result.Map.find(V); + if (It == Result.Map.end()) + return V; + return static_cast(It->second); + }; + + Result.Structure = MainLoopStructure.map(GetClonedValue); + Result.Structure.Tag = Tag; + + for (unsigned i = 0, e = Result.Blocks.size(); i != e; ++i) { + BasicBlock *ClonedBB = Result.Blocks[i]; + BasicBlock *OriginalBB = OriginalLoop->getBlocks()[i]; + + assert(Result.Map[OriginalBB] == ClonedBB && "invariant!"); + + for (Instruction &I : *ClonedBB) + RemapInstruction(&I, Result.Map, + RF_NoModuleLevelChanges | RF_IgnoreMissingEntries); + + // Exit blocks will now have one more predecessor and their PHI nodes need + // to be edited to reflect that. No phi nodes need to be introduced because + // the loop is in LCSSA. + + for (auto SBBI = succ_begin(OriginalBB), SBBE = succ_end(OriginalBB); + SBBI != SBBE; ++SBBI) { + + if (OriginalLoop->contains(*SBBI)) + continue; // not an exit block + + for (Instruction &I : **SBBI) { + if (!isa(&I)) + break; + + PHINode *PN = cast(&I); + Value *OldIncoming = PN->getIncomingValueForBlock(OriginalBB); + PN->addIncoming(GetClonedValue(OldIncoming), ClonedBB); + } + } + } +} + +LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd( + const LoopStructure &LS, BasicBlock *Preheader, Value *ExitLoopAt, + BasicBlock *ContinuationBlock) const { + + // We start with a loop with a single latch: + // + // +--------------------+ + // | | + // | preheader | + // | | + // +--------+-----------+ + // | ----------------\ + // | / | + // +--------v----v------+ | + // | | | + // | header | | + // | | | + // +--------------------+ | + // | + // ..... | + // | + // +--------------------+ | + // | | | + // | latch >----------/ + // | | + // +-------v------------+ + // | + // | + // | +--------------------+ + // | | | + // +---> original exit | + // | | + // +--------------------+ + // + // We change the control flow to look like + // + // + // +--------------------+ + // | | + // | preheader >-------------------------+ + // | | | + // +--------v-----------+ | + // | /-------------+ | + // | / | | + // +--------v--v--------+ | | + // | | | | + // | header | | +--------+ | + // | | | | | | + // +--------------------+ | | +-----v-----v-----------+ + // | | | | + // | | | .pseudo.exit | + // | | | | + // | | +-----------v-----------+ + // | | | + // ..... | | | + // | | +--------v-------------+ + // +--------------------+ | | | | + // | | | | | ContinuationBlock | + // | latch >------+ | | | + // | | | +----------------------+ + // +---------v----------+ | + // | | + // | | + // | +---------------^-----+ + // | | | + // +-----> .exit.selector | + // | | + // +----------v----------+ + // | + // +--------------------+ | + // | | | + // | original exit <----+ + // | | + // +--------------------+ + // + + RewrittenRangeInfo RRI; + + auto BBInsertLocation = std::next(Function::iterator(LS.Latch)); + RRI.ExitSelector = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".exit.selector", + F, BBInsertLocation); + RRI.PseudoExit = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".pseudo.exit", F, + BBInsertLocation); + + BranchInst *PreheaderJump = cast(&*Preheader->rbegin()); + + IRBuilder<> B(PreheaderJump); + + // EnterLoopCond - is it okay to start executing this `LS'? + Value *EnterLoopCond = B.CreateICmpSLT(LS.CIVStart, ExitLoopAt); + B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit); + PreheaderJump->eraseFromParent(); + + assert(LS.LatchBrExitIdx == 1 && "generalize this as needed!"); + + B.SetInsertPoint(LS.LatchBr); + + // ContinueCond - is it okay to execute the next iteration in `LS'? + Value *ContinueCond = B.CreateICmpSLT(LS.CIVNext, ExitLoopAt); + + LS.LatchBr->setCondition(ContinueCond); + assert(LS.LatchBr->getSuccessor(LS.LatchBrExitIdx) == LS.LatchExit && + "invariant!"); + LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector); + + B.SetInsertPoint(RRI.ExitSelector); + + // IterationsLeft - are there any more iterations left, given the original + // upper bound on the induction variable? If not, we branch to the "real" + // exit. + Value *IterationsLeft = B.CreateICmpSLT(LS.CIVNext, OriginalHeaderCount); + B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit); + + BranchInst *BranchToContinuation = + BranchInst::Create(ContinuationBlock, RRI.PseudoExit); + + // We emit PHI nodes into `RRI.PseudoExit' that compute the "latest" value of + // each of the PHI nodes in the loop header. This feeds into the initial + // value of the same PHI nodes if/when we continue execution. + for (Instruction &I : *LS.Header) { + if (!isa(&I)) + break; + + PHINode *PN = cast(&I); + + PHINode *NewPHI = PHINode::Create(PN->getType(), 2, PN->getName() + ".copy", + BranchToContinuation); + + NewPHI->addIncoming(PN->getIncomingValueForBlock(Preheader), Preheader); + NewPHI->addIncoming(PN->getIncomingValueForBlock(LS.Latch), + RRI.ExitSelector); + RRI.PHIValuesAtPseudoExit.push_back(NewPHI); + } + + // The latch exit now has a branch from `RRI.ExitSelector' instead of + // `LS.Latch'. The PHI nodes need to be updated to reflect that. + for (Instruction &I : *LS.LatchExit) { + if (PHINode *PN = dyn_cast(&I)) + replacePHIBlock(PN, LS.Latch, RRI.ExitSelector); + else + break; + } + + return RRI; +} + +void LoopConstrainer::rewriteIncomingValuesForPHIs( + LoopConstrainer::LoopStructure &LS, BasicBlock *ContinuationBlock, + const LoopConstrainer::RewrittenRangeInfo &RRI) const { + + unsigned PHIIndex = 0; + for (Instruction &I : *LS.Header) { + if (!isa(&I)) + break; + + PHINode *PN = cast(&I); + + for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) + if (PN->getIncomingBlock(i) == ContinuationBlock) + PN->setIncomingValue(i, RRI.PHIValuesAtPseudoExit[PHIIndex++]); + } + + LS.CIVStart = LS.CIV->getIncomingValueForBlock(ContinuationBlock); +} + +BasicBlock * +LoopConstrainer::createPreheader(const LoopConstrainer::LoopStructure &LS, + BasicBlock *OldPreheader, + const char *Tag) const { + + BasicBlock *Preheader = BasicBlock::Create(Ctx, Tag, F, LS.Header); + BranchInst::Create(LS.Header, Preheader); + + for (Instruction &I : *LS.Header) { + if (!isa(&I)) + break; + + PHINode *PN = cast(&I); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) + replacePHIBlock(PN, OldPreheader, Preheader); + } + + return Preheader; +} + +template +void LoopConstrainer::addToParentLoopIfNeeded(IteratorTy Begin, + IteratorTy End) { + Loop *ParentLoop = OriginalLoop->getParentLoop(); + if (!ParentLoop) + return; + + auto &LoopInfoBase = OriginalLoopInfo->getBase(); + for (; Begin != End; Begin++) + ParentLoop->addBasicBlockToLoop(*Begin, LoopInfoBase); +} + +bool LoopConstrainer::run() { + BasicBlock *Preheader = nullptr; + const char *CouldNotProceedBecause = nullptr; + if (!recognizeLoop(MainLoopStructure, LatchTakenCount, Preheader, + CouldNotProceedBecause)) { + DEBUG(dbgs() << "irce: could not recognize loop, " << CouldNotProceedBecause + << "\n";); + return false; + } + + OriginalPreheader = Preheader; + MainLoopPreheader = Preheader; + + SubRanges SR = calculateSubRanges(OriginalHeaderCount); + + // It would have been better to make `PreLoop' and `PostLoop' + // `Optional's, but `ValueToValueMapTy' does not have a copy + // constructor. + ClonedLoop PreLoop, PostLoop; + bool NeedsPreLoop = SR.ExitPreLoopAt.hasValue(); + bool NeedsPostLoop = SR.ExitMainLoopAt.hasValue(); + + // We clone these ahead of time so that we don't have to deal with changing + // and temporarily invalid IR as we transform the loops. + if (NeedsPreLoop) + cloneLoop(PreLoop, "preloop"); + if (NeedsPostLoop) + cloneLoop(PostLoop, "postloop"); + + RewrittenRangeInfo PreLoopRRI; + + if (NeedsPreLoop) { + Preheader->getTerminator()->replaceUsesOfWith(MainLoopStructure.Header, + PreLoop.Structure.Header); + + MainLoopPreheader = + createPreheader(MainLoopStructure, Preheader, "mainloop"); + PreLoopRRI = + changeIterationSpaceEnd(PreLoop.Structure, Preheader, + SR.ExitPreLoopAt.getValue(), MainLoopPreheader); + rewriteIncomingValuesForPHIs(MainLoopStructure, MainLoopPreheader, + PreLoopRRI); + } + + BasicBlock *PostLoopPreheader = nullptr; + RewrittenRangeInfo PostLoopRRI; + + if (NeedsPostLoop) { + PostLoopPreheader = + createPreheader(PostLoop.Structure, Preheader, "postloop"); + PostLoopRRI = changeIterationSpaceEnd(MainLoopStructure, MainLoopPreheader, + SR.ExitMainLoopAt.getValue(), + PostLoopPreheader); + rewriteIncomingValuesForPHIs(PostLoop.Structure, PostLoopPreheader, + PostLoopRRI); + } + + std::array NewBlocks { {PostLoopPreheader, + PreLoopRRI.PseudoExit, PreLoopRRI.ExitSelector, PostLoopRRI.PseudoExit, + PostLoopRRI.ExitSelector, + MainLoopPreheader == Preheader ? nullptr : MainLoopPreheader } }; + // Some of the above may be nullptr, filter them out before passing to + // addToParentLoopIfNeeded. + auto NewBlocksEnd = std::remove(NewBlocks.begin(), NewBlocks.end(), nullptr); + + addToParentLoopIfNeeded(NewBlocks.begin(), NewBlocksEnd); + addToParentLoopIfNeeded(PreLoop.Blocks.begin(), PreLoop.Blocks.end()); + addToParentLoopIfNeeded(PostLoop.Blocks.begin(), PostLoop.Blocks.end()); + + return true; +} + +/// Computes and returns a range of values for the induction variable in which +/// the range check can be safely elided. If it cannot compute such a range, +/// returns None. +Optional +InductiveRangeCheck::computeSafeIterationSpace(ScalarEvolution &SE, + IRBuilder<> &B) const { + + // Currently we support inequalities of the form: + // + // 0 <= Offset + 1 * CIV < L given L >= 0 + // + // The inequality is satisfied by -Offset <= CIV < (L - Offset) [^1]. All + // additions and subtractions are twos-complement wrapping and comparisons are + // signed. + // + // Proof: + // + // If there exists CIV such that -Offset <= CIV < (L - Offset) then it + // follows that -Offset <= (-Offset + L) [== Eq. 1]. Since L >= 0, if + // (-Offset + L) sign-overflows then (-Offset + L) < (-Offset). Hence by + // [Eq. 1], (-Offset + L) could not have overflown. + // + // This means CIV = t + (-Offset) for t in [0, L). Hence (CIV + Offset) = + // t. Hence 0 <= (CIV + Offset) < L + + // [^1]: Note that the solution does _not_ apply if L < 0; consider values + // Offset = 127, CIV = 126 and L = -2 in an i8 world. + + const SCEVConstant *ScaleC = dyn_cast(getScale()); + if (!(ScaleC && ScaleC->getValue()->getValue() == 1)) { + DEBUG(dbgs() << "irce: could not compute safe iteration space for:\n"; + print(dbgs())); + return None; + } + + Value *OffsetV = SCEVExpander(SE, "safe.itr.space").expandCodeFor( + getOffset(), getOffset()->getType(), B.GetInsertPoint()); + OffsetV = MaybeSimplify(OffsetV); + + Value *Begin = MaybeSimplify(B.CreateNeg(OffsetV)); + Value *End = MaybeSimplify(B.CreateSub(getLength(), OffsetV)); + + return std::make_pair(Begin, End); +} + +static InductiveRangeCheck::Range +IntersectRange(const Optional &R1, + const InductiveRangeCheck::Range &R2, IRBuilder<> &B) { + if (!R1.hasValue()) + return R2; + auto &R1Value = R1.getValue(); + + Value *NewMin = ConstructSMaxOf(R1Value.first, R2.first, B); + Value *NewMax = ConstructSMinOf(R1Value.second, R2.second, B); + return std::make_pair(NewMin, NewMax); +} + +bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) { + if (L->getBlocks().size() >= LoopSizeCutoff) { + DEBUG(dbgs() << "irce: giving up constraining loop, too large\n";); + return false; + } + + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) { + DEBUG(dbgs() << "irce: loop has no preheader, leaving\n"); + return false; + } + + LLVMContext &Context = Preheader->getContext(); + InductiveRangeCheck::AllocatorTy IRCAlloc; + SmallVector RangeChecks; + ScalarEvolution &SE = getAnalysis(); + + for (auto BBI : L->getBlocks()) + if (BranchInst *TBI = dyn_cast(BBI->getTerminator())) + if (InductiveRangeCheck *IRC = + InductiveRangeCheck::create(IRCAlloc, TBI, L, SE)) + RangeChecks.push_back(IRC); + + if (RangeChecks.empty()) + return false; + + DEBUG(dbgs() << "irce: looking at loop "; L->print(dbgs()); + dbgs() << "irce: loop has " << RangeChecks.size() + << " inductive range checks: \n"; + for (InductiveRangeCheck *IRC : RangeChecks) + IRC->print(dbgs()); + ); + + Optional SafeIterRange; + Instruction *ExprInsertPt = Preheader->getTerminator(); + + SmallVector RangeChecksToEliminate; + + IRBuilder<> B(ExprInsertPt); + for (InductiveRangeCheck *IRC : RangeChecks) { + auto Result = IRC->computeSafeIterationSpace(SE, B); + if (Result.hasValue()) { + SafeIterRange = IntersectRange(SafeIterRange, Result.getValue(), B); + RangeChecksToEliminate.push_back(IRC); + } + } + + if (!SafeIterRange.hasValue()) + return false; + + LoopConstrainer LC(L, &getAnalysis(), SE, SafeIterRange.getValue()); + bool Changed = LC.run(); + + if (Changed) { + auto PrintConstrainedLoopInfo = [L]() { + dbgs() << "irce: in function "; + dbgs() << L->getHeader()->getParent()->getName() << ": "; + dbgs() << "constrained "; + L->print(dbgs()); + }; + + DEBUG(PrintConstrainedLoopInfo()); + + if (PrintChangedLoops) + PrintConstrainedLoopInfo(); + + // Optimize away the now-redundant range checks. + + for (InductiveRangeCheck *IRC : RangeChecksToEliminate) { + ConstantInt *FoldedRangeCheck = IRC->getPassingDirection() + ? ConstantInt::getTrue(Context) + : ConstantInt::getFalse(Context); + IRC->getBranch()->setCondition(FoldedRangeCheck); + } + } + + return Changed; +} + +Pass *llvm::createInductiveRangeCheckEliminationPass() { + return new InductiveRangeCheckElimination; +} Index: llvm/trunk/lib/Transforms/Scalar/Scalar.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/Scalar.cpp +++ llvm/trunk/lib/Transforms/Scalar/Scalar.cpp @@ -40,6 +40,7 @@ initializeGVNPass(Registry); initializeEarlyCSEPass(Registry); initializeFlattenCFGPassPass(Registry); + initializeInductiveRangeCheckEliminationPass(Registry); initializeIndVarSimplifyPass(Registry); initializeJumpThreadingPass(Registry); initializeLICMPass(Registry); Index: llvm/trunk/test/Transforms/IRCE/multiple-access-no-preloop.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/multiple-access-no-preloop.ll +++ llvm/trunk/test/Transforms/IRCE/multiple-access-no-preloop.ll @@ -0,0 +1,59 @@ +; RUN: opt -irce -S < %s | FileCheck %s + +define void @multiple_access_no_preloop( + i32* %arr_a, i32* %a_len_ptr, i32* %arr_b, i32* %b_len_ptr, i32 %n) { + + entry: + %len.a = load i32* %a_len_ptr, !range !0 + %len.b = load i32* %b_len_ptr, !range !0 + %first.itr.check = icmp sgt i32 %n, 0 + br i1 %first.itr.check, label %loop, label %exit + + loop: + %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds.b ] + %idx.next = add i32 %idx, 1 + %abc.a = icmp slt i32 %idx, %len.a + br i1 %abc.a, label %in.bounds.a, label %out.of.bounds + + in.bounds.a: + %addr.a = getelementptr i32* %arr_a, i32 %idx + store i32 0, i32* %addr.a + %abc.b = icmp slt i32 %idx, %len.b + br i1 %abc.b, label %in.bounds.b, label %out.of.bounds + + in.bounds.b: + %addr.b = getelementptr i32* %arr_b, i32 %idx + store i32 -1, i32* %addr.b + %next = icmp slt i32 %idx.next, %n + br i1 %next, label %loop, label %exit + + out.of.bounds: + ret void + + exit: + ret void +} + +; CHECK-LABEL: multiple_access_no_preloop + +; CHECK-LABEL: loop.preheader: +; CHECK: [[smaller_len_cmp:[^ ]+]] = icmp slt i32 %len.a, %len.b +; CHECK: [[smaller_len:[^ ]+]] = select i1 [[smaller_len_cmp]], i32 %len.a, i32 %len.b +; CHECK: [[upper_bound_cmp:[^ ]+]] = icmp slt i32 %n, %3 +; CHECK: [[upper_bound:[^ ]+]] = select i1 %5, i32 %n, i32 %3 + +; CHECK-LABEL: loop: +; CHECK: br i1 true, label %in.bounds.a, label %out.of.bounds + +; CHECK-LABEL: in.bounds.a: +; CHECK: br i1 true, label %in.bounds.b, label %out.of.bounds + +; CHECK-LABEL: in.bounds.b: +; CHECK: [[main_loop_cond:[^ ]+]] = icmp slt i32 %idx.next, [[upper_bound]] +; CHECK: br i1 [[main_loop_cond]], label %loop, label %main.exit.selector + +; CHECK-LABEL: in.bounds.b.postloop: +; CHECK: %next.postloop = icmp slt i32 %idx.next.postloop, %n +; CHECK: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit + +!0 = !{i32 0, i32 2147483647} Index: llvm/trunk/test/Transforms/IRCE/single-access-no-preloop.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/single-access-no-preloop.ll +++ llvm/trunk/test/Transforms/IRCE/single-access-no-preloop.ll @@ -0,0 +1,110 @@ +; RUN: opt -irce -S < %s | FileCheck %s + +define void @single_access_no_preloop_no_offset(i32 *%arr, i32 *%a_len_ptr, i32 %n) { + entry: + %len = load i32* %a_len_ptr, !range !0 + %first.itr.check = icmp sgt i32 %n, 0 + br i1 %first.itr.check, label %loop, label %exit + + loop: + %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, 1 + %abc = icmp slt i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds + + in.bounds: + %addr = getelementptr i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp slt i32 %idx.next, %n + br i1 %next, label %loop, label %exit + + out.of.bounds: + ret void + + exit: + ret void +} + +; CHECK-LABEL: single_access_no_preloop + +; CHECK-LABEL: loop: +; CHECK: br i1 true, label %in.bounds, label %out.of.bounds + +; CHECK-LABEL: main.exit.selector: +; CHECK-NEXT: [[continue:%[^ ]+]] = icmp slt i32 %idx.next, %n +; CHECK-NEXT: br i1 [[continue]], label %main.pseudo.exit, label %exit.loopexit + +; CHECK-LABEL: main.pseudo.exit: +; CHECK-NEXT: %idx.copy = phi i32 [ 0, %loop.preheader ], [ %idx.next, %main.exit.selector ] +; CHECK-NEXT: br label %postloop + +; CHECK-LABEL: postloop: +; CHECK-NEXT: br label %loop.postloop + +; CHECK-LABEL: loop.postloop: +; CHECK-NEXT: %idx.postloop = phi i32 [ %idx.next.postloop, %in.bounds.postloop ], [ %idx.copy, %postloop ] +; CHECK-NEXT: %idx.next.postloop = add i32 %idx.postloop, 1 +; CHECK-NEXT: %abc.postloop = icmp slt i32 %idx.postloop, %len +; CHECK-NEXT: br i1 %abc.postloop, label %in.bounds.postloop, label %out.of.bounds + +; CHECK-LABEL: in.bounds.postloop: +; CHECK-NEXT: %addr.postloop = getelementptr i32* %arr, i32 %idx.postloop +; CHECK-NEXT: store i32 0, i32* %addr.postloop +; CHECK-NEXT: %next.postloop = icmp slt i32 %idx.next.postloop, %n +; CHECK-NEXT: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit + + +define void @single_access_no_preloop_with_offset(i32 *%arr, i32 *%a_len_ptr, i32 %n) { + entry: + %len = load i32* %a_len_ptr, !range !0 + %first.itr.check = icmp sgt i32 %n, 0 + br i1 %first.itr.check, label %loop, label %exit + + loop: + %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, 1 + %idx.for.abc = add i32 %idx, 4 + %abc = icmp slt i32 %idx.for.abc, %len + br i1 %abc, label %in.bounds, label %out.of.bounds + + in.bounds: + %addr = getelementptr i32* %arr, i32 %idx.for.abc + store i32 0, i32* %addr + %next = icmp slt i32 %idx.next, %n + br i1 %next, label %loop, label %exit + + out.of.bounds: + ret void + + exit: + ret void +} + +; CHECK-LABEL: single_access_no_preloop_with_offset + +; CHECK-LABEL: loop.preheader: +; CHECK: [[safe_range_end:[^ ]+]] = sub i32 %len, 4 +; CHECK: [[exit_main_loop_at_cmp:[^ ]+]] = icmp slt i32 %n, [[safe_range_end]] +; CHECK: [[exit_main_loop_at:[^ ]+]] = select i1 [[exit_main_loop_at_cmp]], i32 %n, i32 [[safe_range_end]] +; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at]] +; CHECK: br i1 [[enter_main_loop]], label %loop, label %main.pseudo.exit + +; CHECK-LABEL: loop: +; CHECK: br i1 true, label %in.bounds, label %out.of.bounds + +; CHECK-LABEL: in.bounds: +; CHECK: [[continue_main_loop:[^ ]+]] = icmp slt i32 %idx.next, [[exit_main_loop_at]] +; CHECK: br i1 [[continue_main_loop]], label %loop, label %main.exit.selector + +; CHECK-LABEL: main.pseudo.exit: +; CHECK: %idx.copy = phi i32 [ 0, %loop.preheader ], [ %idx.next, %main.exit.selector ] +; CHECK: br label %postloop + +; CHECK-LABEL: loop.postloop: +; CHECK: %idx.postloop = phi i32 [ %idx.next.postloop, %in.bounds.postloop ], [ %idx.copy, %postloop ] + +; CHECK-LABEL: in.bounds.postloop: +; CHECK: %next.postloop = icmp slt i32 %idx.next.postloop, %n +; CHECK: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit + +!0 = !{i32 0, i32 2147483647} Index: llvm/trunk/test/Transforms/IRCE/single-access-with-preloop.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/single-access-with-preloop.ll +++ llvm/trunk/test/Transforms/IRCE/single-access-with-preloop.ll @@ -0,0 +1,59 @@ +; RUN: opt -irce -S < %s | FileCheck %s + +define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32 %offset) { + entry: + %len = load i32* %a_len_ptr, !range !0 + %first.itr.check = icmp sgt i32 %n, 0 + br i1 %first.itr.check, label %loop, label %exit + + loop: + %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, 1 + %array.idx = add i32 %idx, %offset + %abc.high = icmp slt i32 %array.idx, %len + %abc.low = icmp sge i32 %array.idx, 0 + %abc = and i1 %abc.low, %abc.high + br i1 %abc, label %in.bounds, label %out.of.bounds + + in.bounds: + %addr = getelementptr i32* %arr, i32 %array.idx + store i32 0, i32* %addr + %next = icmp slt i32 %idx.next, %n + br i1 %next, label %loop, label %exit + + out.of.bounds: + ret void + + exit: + ret void +} + +; CHECK-LABEL: loop.preheader: +; CHECK: [[safe_start:[^ ]+]] = sub i32 0, %offset +; CHECK: [[safe_end:[^ ]+]] = sub i32 %len, %offset +; CHECK: [[exit_preloop_at_cond:[^ ]+]] = icmp slt i32 %n, [[safe_start]] +; CHECK: [[exit_preloop_at:[^ ]+]] = select i1 [[exit_preloop_at_cond]], i32 %n, i32 [[safe_start]] +; CHECK: [[exit_mainloop_at_cond:[^ ]+]] = icmp slt i32 %n, [[safe_end]] +; CHECK: [[exit_mainloop_at:[^ ]+]] = select i1 [[exit_mainloop_at_cond]], i32 %n, i32 [[safe_end]] + +; CHECK-LABEL: in.bounds: +; CHECK: [[continue_mainloop_cond:[^ ]+]] = icmp slt i32 %idx.next, [[exit_mainloop_at]] +; CHECK: br i1 [[continue_mainloop_cond]], label %loop, label %main.exit.selector + +; CHECK-LABEL: main.exit.selector: +; CHECK: [[mainloop_its_left:[^ ]+]] = icmp slt i32 %idx.next, %n +; CHECK: br i1 [[mainloop_its_left]], label %main.pseudo.exit, label %exit.loopexit + +; CHECK-LABEL: in.bounds.preloop: +; CHECK: [[continue_preloop_cond:[^ ]+]] = icmp slt i32 %idx.next.preloop, [[exit_preloop_at]] +; CHECK: br i1 [[continue_preloop_cond]], label %loop.preloop, label %preloop.exit.selector + +; CHECK-LABEL: preloop.exit.selector: +; CHECK: [[preloop_its_left:[^ ]+]] = icmp slt i32 %idx.next.preloop, %n +; CHECK: br i1 [[preloop_its_left]], label %preloop.pseudo.exit, label %exit.loopexit + +; CHECK-LABEL: in.bounds.postloop: +; CHECK: %next.postloop = icmp slt i32 %idx.next.postloop, %n +; CHECK: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit + +!0 = !{i32 0, i32 2147483647} Index: llvm/trunk/test/Transforms/IRCE/unhandled.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/unhandled.ll +++ llvm/trunk/test/Transforms/IRCE/unhandled.ll @@ -0,0 +1,37 @@ +; RUN: opt -irce-print-changed-loops -irce -S < %s 2>&1 | FileCheck %s + +; Demonstrates that we don't currently handle the general expression +; `A * I + B'. + +define void @general_affine_expressions(i32 *%arr, i32 *%a_len_ptr, i32 %n, + i32 %scale, i32 %offset) { +; CHECK-NOT: constrained Loop at depth + entry: + %len = load i32* %a_len_ptr, !range !0 + %first.itr.check = icmp sgt i32 %n, 0 + br i1 %first.itr.check, label %loop, label %exit + + loop: + %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, 1 + %idx.mul = mul i32 %idx, %scale + %array.idx = add i32 %idx.mul, %offset + %abc.high = icmp slt i32 %array.idx, %len + %abc.low = icmp sge i32 %array.idx, 0 + %abc = and i1 %abc.low, %abc.high + br i1 %abc, label %in.bounds, label %out.of.bounds + + in.bounds: + %addr = getelementptr i32* %arr, i32 %array.idx + store i32 0, i32* %addr + %next = icmp slt i32 %idx.next, %n + br i1 %next, label %loop, label %exit + + out.of.bounds: + ret void + + exit: + ret void +} + +!0 = !{i32 0, i32 2147483647} Index: llvm/trunk/test/Transforms/IRCE/with-parent-loops.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/with-parent-loops.ll +++ llvm/trunk/test/Transforms/IRCE/with-parent-loops.ll @@ -0,0 +1,344 @@ +; RUN: opt -verify-loop-info -irce-print-changed-loops -irce < %s 2>&1 | FileCheck %s + +; This test checks if we update the LoopInfo correctly in the presence +; of parents, uncles and cousins. + +; Function Attrs: alwaysinline +define void @inner_loop(i32* %arr, i32* %a_len_ptr, i32 %n) #0 { +; CHECK: irce: in function inner_loop: constrained Loop at depth 1 containing: %loop
,%in.bounds + +entry: + %len = load i32* %a_len_ptr, !range !0 + %first.itr.check = icmp sgt i32 %n, 0 + br i1 %first.itr.check, label %loop, label %exit + +loop: ; preds = %in.bounds, %entry + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, 1 + %abc = icmp slt i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds + +in.bounds: ; preds = %loop + %addr = getelementptr i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp slt i32 %idx.next, %n + br i1 %next, label %loop, label %exit + +out.of.bounds: ; preds = %loop + ret void + +exit: ; preds = %in.bounds, %entry + ret void +} + +; Function Attrs: alwaysinline +define void @with_parent(i32* %arr, i32* %a_len_ptr, i32 %n, i32 %parent.count) #0 { +; CHECK: irce: in function with_parent: constrained Loop at depth 2 containing: %loop.i
,%in.bounds.i + +entry: + br label %loop + +loop: ; preds = %inner_loop.exit, %entry + %idx = phi i32 [ 0, %entry ], [ %idx.next, %inner_loop.exit ] + %idx.next = add i32 %idx, 1 + %next = icmp ult i32 %idx.next, %parent.count + %len.i = load i32* %a_len_ptr, !range !0 + %first.itr.check.i = icmp sgt i32 %n, 0 + br i1 %first.itr.check.i, label %loop.i, label %exit.i + +loop.i: ; preds = %in.bounds.i, %loop + %idx.i = phi i32 [ 0, %loop ], [ %idx.next.i, %in.bounds.i ] + %idx.next.i = add i32 %idx.i, 1 + %abc.i = icmp slt i32 %idx.i, %len.i + br i1 %abc.i, label %in.bounds.i, label %out.of.bounds.i + +in.bounds.i: ; preds = %loop.i + %addr.i = getelementptr i32* %arr, i32 %idx.i + store i32 0, i32* %addr.i + %next.i = icmp slt i32 %idx.next.i, %n + br i1 %next.i, label %loop.i, label %exit.i + +out.of.bounds.i: ; preds = %loop.i + br label %inner_loop.exit + +exit.i: ; preds = %in.bounds.i, %loop + br label %inner_loop.exit + +inner_loop.exit: ; preds = %exit.i, %out.of.bounds.i + br i1 %next, label %loop, label %exit + +exit: ; preds = %inner_loop.exit + ret void +} + +; Function Attrs: alwaysinline +define void @with_grandparent(i32* %arr, i32* %a_len_ptr, i32 %n, i32 %parent.count, i32 %grandparent.count) #0 { +; CHECK: irce: in function with_grandparent: constrained Loop at depth 3 containing: %loop.i.i
,%in.bounds.i.i + +entry: + br label %loop + +loop: ; preds = %with_parent.exit, %entry + %idx = phi i32 [ 0, %entry ], [ %idx.next, %with_parent.exit ] + %idx.next = add i32 %idx, 1 + %next = icmp ult i32 %idx.next, %grandparent.count + br label %loop.i + +loop.i: ; preds = %inner_loop.exit.i, %loop + %idx.i = phi i32 [ 0, %loop ], [ %idx.next.i, %inner_loop.exit.i ] + %idx.next.i = add i32 %idx.i, 1 + %next.i = icmp ult i32 %idx.next.i, %parent.count + %len.i.i = load i32* %a_len_ptr, !range !0 + %first.itr.check.i.i = icmp sgt i32 %n, 0 + br i1 %first.itr.check.i.i, label %loop.i.i, label %exit.i.i + +loop.i.i: ; preds = %in.bounds.i.i, %loop.i + %idx.i.i = phi i32 [ 0, %loop.i ], [ %idx.next.i.i, %in.bounds.i.i ] + %idx.next.i.i = add i32 %idx.i.i, 1 + %abc.i.i = icmp slt i32 %idx.i.i, %len.i.i + br i1 %abc.i.i, label %in.bounds.i.i, label %out.of.bounds.i.i + +in.bounds.i.i: ; preds = %loop.i.i + %addr.i.i = getelementptr i32* %arr, i32 %idx.i.i + store i32 0, i32* %addr.i.i + %next.i.i = icmp slt i32 %idx.next.i.i, %n + br i1 %next.i.i, label %loop.i.i, label %exit.i.i + +out.of.bounds.i.i: ; preds = %loop.i.i + br label %inner_loop.exit.i + +exit.i.i: ; preds = %in.bounds.i.i, %loop.i + br label %inner_loop.exit.i + +inner_loop.exit.i: ; preds = %exit.i.i, %out.of.bounds.i.i + br i1 %next.i, label %loop.i, label %with_parent.exit + +with_parent.exit: ; preds = %inner_loop.exit.i + br i1 %next, label %loop, label %exit + +exit: ; preds = %with_parent.exit + ret void +} + +; Function Attrs: alwaysinline +define void @with_sibling(i32* %arr, i32* %a_len_ptr, i32 %n, i32 %parent.count) #0 { +; CHECK: irce: in function with_sibling: constrained Loop at depth 2 containing: %loop.i
,%in.bounds.i +; CHECK: irce: in function with_sibling: constrained Loop at depth 2 containing: %loop.i6
,%in.bounds.i9 + +entry: + br label %loop + +loop: ; preds = %inner_loop.exit12, %entry + %idx = phi i32 [ 0, %entry ], [ %idx.next, %inner_loop.exit12 ] + %idx.next = add i32 %idx, 1 + %next = icmp ult i32 %idx.next, %parent.count + %len.i = load i32* %a_len_ptr, !range !0 + %first.itr.check.i = icmp sgt i32 %n, 0 + br i1 %first.itr.check.i, label %loop.i, label %exit.i + +loop.i: ; preds = %in.bounds.i, %loop + %idx.i = phi i32 [ 0, %loop ], [ %idx.next.i, %in.bounds.i ] + %idx.next.i = add i32 %idx.i, 1 + %abc.i = icmp slt i32 %idx.i, %len.i + br i1 %abc.i, label %in.bounds.i, label %out.of.bounds.i + +in.bounds.i: ; preds = %loop.i + %addr.i = getelementptr i32* %arr, i32 %idx.i + store i32 0, i32* %addr.i + %next.i = icmp slt i32 %idx.next.i, %n + br i1 %next.i, label %loop.i, label %exit.i + +out.of.bounds.i: ; preds = %loop.i + br label %inner_loop.exit + +exit.i: ; preds = %in.bounds.i, %loop + br label %inner_loop.exit + +inner_loop.exit: ; preds = %exit.i, %out.of.bounds.i + %len.i1 = load i32* %a_len_ptr, !range !0 + %first.itr.check.i2 = icmp sgt i32 %n, 0 + br i1 %first.itr.check.i2, label %loop.i6, label %exit.i11 + +loop.i6: ; preds = %in.bounds.i9, %inner_loop.exit + %idx.i3 = phi i32 [ 0, %inner_loop.exit ], [ %idx.next.i4, %in.bounds.i9 ] + %idx.next.i4 = add i32 %idx.i3, 1 + %abc.i5 = icmp slt i32 %idx.i3, %len.i1 + br i1 %abc.i5, label %in.bounds.i9, label %out.of.bounds.i10 + +in.bounds.i9: ; preds = %loop.i6 + %addr.i7 = getelementptr i32* %arr, i32 %idx.i3 + store i32 0, i32* %addr.i7 + %next.i8 = icmp slt i32 %idx.next.i4, %n + br i1 %next.i8, label %loop.i6, label %exit.i11 + +out.of.bounds.i10: ; preds = %loop.i6 + br label %inner_loop.exit12 + +exit.i11: ; preds = %in.bounds.i9, %inner_loop.exit + br label %inner_loop.exit12 + +inner_loop.exit12: ; preds = %exit.i11, %out.of.bounds.i10 + br i1 %next, label %loop, label %exit + +exit: ; preds = %inner_loop.exit12 + ret void +} + +; Function Attrs: alwaysinline +define void @with_cousin(i32* %arr, i32* %a_len_ptr, i32 %n, i32 %parent.count, i32 %grandparent.count) #0 { +; CHECK: irce: in function with_cousin: constrained Loop at depth 3 containing: %loop.i.i
,%in.bounds.i.i +; CHECK: irce: in function with_cousin: constrained Loop at depth 3 containing: %loop.i.i10
,%in.bounds.i.i13 + +entry: + br label %loop + +loop: ; preds = %with_parent.exit17, %entry + %idx = phi i32 [ 0, %entry ], [ %idx.next, %with_parent.exit17 ] + %idx.next = add i32 %idx, 1 + %next = icmp ult i32 %idx.next, %grandparent.count + br label %loop.i + +loop.i: ; preds = %inner_loop.exit.i, %loop + %idx.i = phi i32 [ 0, %loop ], [ %idx.next.i, %inner_loop.exit.i ] + %idx.next.i = add i32 %idx.i, 1 + %next.i = icmp ult i32 %idx.next.i, %parent.count + %len.i.i = load i32* %a_len_ptr, !range !0 + %first.itr.check.i.i = icmp sgt i32 %n, 0 + br i1 %first.itr.check.i.i, label %loop.i.i, label %exit.i.i + +loop.i.i: ; preds = %in.bounds.i.i, %loop.i + %idx.i.i = phi i32 [ 0, %loop.i ], [ %idx.next.i.i, %in.bounds.i.i ] + %idx.next.i.i = add i32 %idx.i.i, 1 + %abc.i.i = icmp slt i32 %idx.i.i, %len.i.i + br i1 %abc.i.i, label %in.bounds.i.i, label %out.of.bounds.i.i + +in.bounds.i.i: ; preds = %loop.i.i + %addr.i.i = getelementptr i32* %arr, i32 %idx.i.i + store i32 0, i32* %addr.i.i + %next.i.i = icmp slt i32 %idx.next.i.i, %n + br i1 %next.i.i, label %loop.i.i, label %exit.i.i + +out.of.bounds.i.i: ; preds = %loop.i.i + br label %inner_loop.exit.i + +exit.i.i: ; preds = %in.bounds.i.i, %loop.i + br label %inner_loop.exit.i + +inner_loop.exit.i: ; preds = %exit.i.i, %out.of.bounds.i.i + br i1 %next.i, label %loop.i, label %with_parent.exit + +with_parent.exit: ; preds = %inner_loop.exit.i + br label %loop.i6 + +loop.i6: ; preds = %inner_loop.exit.i16, %with_parent.exit + %idx.i1 = phi i32 [ 0, %with_parent.exit ], [ %idx.next.i2, %inner_loop.exit.i16 ] + %idx.next.i2 = add i32 %idx.i1, 1 + %next.i3 = icmp ult i32 %idx.next.i2, %parent.count + %len.i.i4 = load i32* %a_len_ptr, !range !0 + %first.itr.check.i.i5 = icmp sgt i32 %n, 0 + br i1 %first.itr.check.i.i5, label %loop.i.i10, label %exit.i.i15 + +loop.i.i10: ; preds = %in.bounds.i.i13, %loop.i6 + %idx.i.i7 = phi i32 [ 0, %loop.i6 ], [ %idx.next.i.i8, %in.bounds.i.i13 ] + %idx.next.i.i8 = add i32 %idx.i.i7, 1 + %abc.i.i9 = icmp slt i32 %idx.i.i7, %len.i.i4 + br i1 %abc.i.i9, label %in.bounds.i.i13, label %out.of.bounds.i.i14 + +in.bounds.i.i13: ; preds = %loop.i.i10 + %addr.i.i11 = getelementptr i32* %arr, i32 %idx.i.i7 + store i32 0, i32* %addr.i.i11 + %next.i.i12 = icmp slt i32 %idx.next.i.i8, %n + br i1 %next.i.i12, label %loop.i.i10, label %exit.i.i15 + +out.of.bounds.i.i14: ; preds = %loop.i.i10 + br label %inner_loop.exit.i16 + +exit.i.i15: ; preds = %in.bounds.i.i13, %loop.i6 + br label %inner_loop.exit.i16 + +inner_loop.exit.i16: ; preds = %exit.i.i15, %out.of.bounds.i.i14 + br i1 %next.i3, label %loop.i6, label %with_parent.exit17 + +with_parent.exit17: ; preds = %inner_loop.exit.i16 + br i1 %next, label %loop, label %exit + +exit: ; preds = %with_parent.exit17 + ret void +} + +; Function Attrs: alwaysinline +define void @with_uncle(i32* %arr, i32* %a_len_ptr, i32 %n, i32 %parent.count, i32 %grandparent.count) #0 { +; CHECK: irce: in function with_uncle: constrained Loop at depth 2 containing: %loop.i
,%in.bounds.i +; CHECK: irce: in function with_uncle: constrained Loop at depth 3 containing: %loop.i.i
,%in.bounds.i.i + +entry: + br label %loop + +loop: ; preds = %with_parent.exit, %entry + %idx = phi i32 [ 0, %entry ], [ %idx.next, %with_parent.exit ] + %idx.next = add i32 %idx, 1 + %next = icmp ult i32 %idx.next, %grandparent.count + %len.i = load i32* %a_len_ptr, !range !0 + %first.itr.check.i = icmp sgt i32 %n, 0 + br i1 %first.itr.check.i, label %loop.i, label %exit.i + +loop.i: ; preds = %in.bounds.i, %loop + %idx.i = phi i32 [ 0, %loop ], [ %idx.next.i, %in.bounds.i ] + %idx.next.i = add i32 %idx.i, 1 + %abc.i = icmp slt i32 %idx.i, %len.i + br i1 %abc.i, label %in.bounds.i, label %out.of.bounds.i + +in.bounds.i: ; preds = %loop.i + %addr.i = getelementptr i32* %arr, i32 %idx.i + store i32 0, i32* %addr.i + %next.i = icmp slt i32 %idx.next.i, %n + br i1 %next.i, label %loop.i, label %exit.i + +out.of.bounds.i: ; preds = %loop.i + br label %inner_loop.exit + +exit.i: ; preds = %in.bounds.i, %loop + br label %inner_loop.exit + +inner_loop.exit: ; preds = %exit.i, %out.of.bounds.i + br label %loop.i4 + +loop.i4: ; preds = %inner_loop.exit.i, %inner_loop.exit + %idx.i1 = phi i32 [ 0, %inner_loop.exit ], [ %idx.next.i2, %inner_loop.exit.i ] + %idx.next.i2 = add i32 %idx.i1, 1 + %next.i3 = icmp ult i32 %idx.next.i2, %parent.count + %len.i.i = load i32* %a_len_ptr, !range !0 + %first.itr.check.i.i = icmp sgt i32 %n, 0 + br i1 %first.itr.check.i.i, label %loop.i.i, label %exit.i.i + +loop.i.i: ; preds = %in.bounds.i.i, %loop.i4 + %idx.i.i = phi i32 [ 0, %loop.i4 ], [ %idx.next.i.i, %in.bounds.i.i ] + %idx.next.i.i = add i32 %idx.i.i, 1 + %abc.i.i = icmp slt i32 %idx.i.i, %len.i.i + br i1 %abc.i.i, label %in.bounds.i.i, label %out.of.bounds.i.i + +in.bounds.i.i: ; preds = %loop.i.i + %addr.i.i = getelementptr i32* %arr, i32 %idx.i.i + store i32 0, i32* %addr.i.i + %next.i.i = icmp slt i32 %idx.next.i.i, %n + br i1 %next.i.i, label %loop.i.i, label %exit.i.i + +out.of.bounds.i.i: ; preds = %loop.i.i + br label %inner_loop.exit.i + +exit.i.i: ; preds = %in.bounds.i.i, %loop.i4 + br label %inner_loop.exit.i + +inner_loop.exit.i: ; preds = %exit.i.i, %out.of.bounds.i.i + br i1 %next.i3, label %loop.i4, label %with_parent.exit + +with_parent.exit: ; preds = %inner_loop.exit.i + br i1 %next, label %loop, label %exit + +exit: ; preds = %with_parent.exit + ret void +} + +attributes #0 = { alwaysinline } + +!0 = !{i32 0, i32 2147483647}