Index: include/llvm/Transforms/Utils/LoopEditor.h =================================================================== --- /dev/null +++ include/llvm/Transforms/Utils/LoopEditor.h @@ -0,0 +1,379 @@ +//===-- LoopEditor.h - High-level loop transformations --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The LoopEditor provides a toolkit for performing high-level transforms on +// loops. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_UTILS_LOOPEDITOR_H +#define LLVM_TRANSFORMS_SCALAR_UTILS_LOOPEDITOR_H + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace llvm { + +class BasicBlock; +class DataLayout; +class DominatorTree; +class Loop; +class LoopInfo; +class PHINode; +class SCEV; +class ScalarEvolution; +class Value; + +/// \brief This class provides a toolkit to create high-level operations on +/// loops. +/// +/// The functionality consists of a set of core functions which are designed +/// to be composable, and a set of helper routines that stitch together +/// the core functions to provide higher-level constructions such as loop +/// versioning, or loop peeling. +/// +/// Many functions can take a Delegate object, which allows the user to be +/// notified when different events happen (such as for example "an instruction +/// has been interleaved"). This allows the core functionality to remain +/// simple and not need too much configuration, while allowing the user +/// the flexibility to hook into the internals. Some delegate methods offer +/// the user the chance to influence the edit operation. +/// +/// All of the core functions are designed to keep LoopInfo, ScalarEvolution +/// and DominatorTree up to date. +class LoopEditor { + /// Internal class representing a PHI node inside the header block of the + /// loop. + class PHIID { + protected: + /// Default constructor purely for operator[] on maps. + PHIID() {} + PHIID(PHINode *BO, Value *StartV, PHINode *Val) : + BasedOn(BO), StartValue(StartV), Val(Val) { + } + /// If a loop is cloned, its clone will have this member of each of its + /// recurrences and inductions set to the original recurrence or induction. + /// + /// This allows LoopEditor to reason about sets of PHIs and how to merge + /// them together when stitching the outputs of loops to the input of other + /// loops (such as in LoopEditor::addIncoming). + PHINode *BasedOn; + /// For an induction or a reduction, the "start value" of the PHI (the value + /// incoming from the preheader). + Value *StartValue; + /// The PHInode itself. + PHINode *Val; + // This class is deliberately opaque to users, but allow LoopEditor full + // access. + friend class LoopEditor; + }; +public: + /// A semi-opaque descriptor for a reduction. + class Reduction : private PHIID { + // FIXME: Add recurrence here. + Reduction(Value *StartV, PHINode *Val) : + PHIID(nullptr, StartV, Val) { + } + public: + /// Only available for operator[] on maps. + Reduction() { assert(0 && "Unreachable!"); } + bool operator < (const Reduction &R) const { + return Val < R.Val; + } + /// Return the start value of this reduction. + Value *getStartValue() const { return StartValue; } + /// Return the PHI in the loop header that identifies this reduction. + PHINode *getPHI() const { return Val; } + /// Emit code to reduce Op1 and Op2 to one value. + Value *createOp(IRBuilder<> &IRB, Value *Op1, Value *Op2) const; + // Open up to LoopEditor. + friend class LoopEditor; + }; + /// A semi-opaque descriptor for an induction. + class Induction : private PHIID { + Induction(Value *StartV, PHINode *Val) : + PHIID(nullptr, StartV, Val) { + } + public: + /// Only available for operator[] on maps. + Induction() { assert(0 && "Unreachable!"); } + bool operator < (const Induction &R) const { + return Val < R.Val; + } + /// Return the start value of this induction. + Value *getStartValue() const { return StartValue; } + /// Return the PHI in the loop header that identifies this induction. + PHINode *getPHI() const { return Val; } + // Open up to LoopEditor. + friend class LoopEditor; + }; + +private: + // + // Immutable/analysis state + // + Loop *L; + ScalarEvolution *SE; + const DataLayout *DL; + LoopInfo *LI; + DominatorTree *DT; + bool Analyzed; + + // + // Mutable, cached state + // + SmallVector Reductions; + DenseMap ReductionsByValue; + SmallVector Inductions; + DenseMap InductionsByValue; + const SCEV *TripCountSCEV; + Value *BackedgeCountComparisonValue; + Instruction *BackedgeCountComparison; + BasicBlock *Bypass; + Value *ExecutedTripCountValue; + + void ensureStartValuesAvailable(); + void ensureCanonicalInductionVariable(); + void ensureBypassCreated(); + void analyze(); + void addStartPHIIncoming(PHINode *PN, Value *V, BasicBlock *BB); + bool sameProvenance(const PHIID &A, const PHIID &B); + void makeValuesAvailableInBlocks(ArrayRef Values, + ArrayRef Blocks); + PHINode *getCanonicalInductionVariable(); + void verify(const char *Txt); + void computeBackedgeCountComparison(); + Value *getStartingValue(); + Value *determineStartValueForPHI(PHINode *PN); + void removePHINodeIncomingValues(BasicBlock *In, BasicBlock *For); + void replacePHINodeIncomingValues(BasicBlock *In, BasicBlock *For, + BasicBlock *With); + BasicBlock *getDedicatedExitingBlock(); + BasicBlock *computeImmediateDominator(BasicBlock *B); + LLVMContext &getContext(); + + /// Hidden constructor, only used by LoopEditor::clone() to initialize + /// the Bypass block. + LoopEditor(Loop *L, BasicBlock *Bypass, ScalarEvolution *SE, + const DataLayout *DL, LoopInfo *LI, DominatorTree *DT); +public: + /// Delegates should be subclassed by users. They can optionally chain + /// to other delegates for composability. + /// + /// Delegates provide many callback functions, but only a subset if any + /// will be invoked for any one core function call. + /// + /// It is forbidden to modify the content of the loop when servicing one + /// of the "notify*" callbacks. + struct Delegate { + Delegate *Next; + + Delegate() : Next(nullptr) {} + Delegate(Delegate *Next) : Next(Next) {} + virtual void anchor(); // Provide a home for the vtable. + + /// A call to interleave() is about to happen, for the given iteration. + /// Iteration starts at 1 (the zero'th iteration would correspond to + /// the original loop content). + /// + /// Called by: widenAndInterleave() + virtual void notifyInterleaveIterationStarting(unsigned Iteration) { + if (Next) Next->notifyInterleaveIterationStarting(Iteration); + } + /// \c OldInst has been cloned to \c NewInst, during a call to + /// \c interleave() + /// + /// Called by: interleave() + virtual void notifyInstructionInterleaved(Instruction *OldInst, + Instruction *NewInst) { + if (Next) Next->notifyInstructionInterleaved(OldInst, NewInst); + } + /// \c OldInst has been peeled out of the loop as \c NewInst. + /// The peel iteration was \c Iteration, which starts from zero. + /// + /// Called by: peelAfter() + virtual void notifyInstructionPeeled(Instruction *OldInst, + Instruction *NewInst, + unsigned Iteration) { + if (Next) Next->notifyInstructionPeeled(OldInst, NewInst, Iteration); + } + }; + + /// A callback type that takes a Value* and IRBuilder, and returns a Value*. + typedef std::function&)> ValueToValueCB; + + /// Contructs a new loop editor, editing L. + LoopEditor(Loop *L, ScalarEvolution *SE, const DataLayout *DL, LoopInfo *LI, + DominatorTree *DT); + + /// + /// Queries + /// + + /// Returns true if the loop is analyzable by the loop editor. + bool isValid() const; + + /// Returns true if I is contained within blocks the loop editor owns. + /// This is equivalent to Loop::contains(), but it also checks the optional + /// bypass and dedicated exit blocks that are outside of Loop's knowledge. + bool contains(Instruction *I); + + /// Allow accesses to the underlying Loop using ->. + Loop *operator -> () const { return L; } + + /// Returns true if getReductionByPHI(V) would succeed. + bool isValidReduction(Value *V); + + /// Retrieves an ID by which an existing reduction can be referenced. 'V' + /// must be a reduction PHINode. Asserts on failure. + const Reduction getReductionByPHI(Value *V); + + /// Retrieves all known reductions. + ArrayRef getAllReductions(); + + /// Retrieves the outgoing value of a reduction - its value on exit from the + /// loop. + Value *getOutgoingReduction(const Reduction &ID); + + /// Retrieves a Reduction valid for this loop, if one is found to be based + /// on or derived from the given reduction. This will happen if this loop has + /// been cloned from another loop previously. + Reduction *getMatchingReduction(const Reduction &R); + + /// Retrieves all known inductions. + ArrayRef getAllInductions(); + + /// Retrieves the outgoing value of an induction - its value on exit from the + /// loop. + Value *getOutgoingInduction(const Induction &ID); + + /// Retrieves a Induction valid for this loop, if one is found to be based + /// on or derived from the given induction. This will happen if this loop has + /// been cloned from another loop previously. + Induction *getMatchingInduction(const Induction &R); + + /// Retrieves the executed trip count at the end of the loop. This may be + /// zero if the loop was bypassed by a predicate, or less than the normal + /// trip count if the loop was widened. + Value *getExecutedTripCount(); + + /// Retrieves the trip count of the loop as a SCEV. Note that this involves + /// adding one to the backedge count and as such may overflow when expanded. + const SCEV *getTripCount(); + + /// + /// Mutations + /// + + /// Produce a new version of this loop. The new loop is returned in + /// LoopEditor. + /// + /// The new loop is inserted into the CFG on IncomingEdge (IncomingEdge is + /// updated), so if IncomingEdge is A -> B, the result of this function will + /// be A -> LOOP -> B. + LoopEditor clone(Use &IncomingEdge, const Twine &NameSuffix = ""); + + /// A "widened" loop iterates over the same range but with a wider step. + /// For example the AddRec {5,1}<%L> widened by a factor 4 becomes {5,4}<%L>. + /// + /// This is a requirement for loop interleaving and loop vectorization, but + /// importantly this does not actually do any unrolling or vectorization. + /// Instead, all induction variables have their steps altered and the trip + /// count is modified. + /// + /// Reductions are not altered. + void widen(unsigned Factor); + + /// Set the loop trip count. + void setTripCount(const SCEV *TripCount, bool CheckForOverflow); + + /// Adds a new reduction variable starting at StartValue. The reduction does + /// not do anything until it is connected with connectReduction(). + Reduction addReduction(Value *StartValue); + + /// Connects the backedge of a reduction added with addReduction(). + void connectReduction(const Reduction ID, Value *V); + + /// Creates a new copy of all instructions in the loop body, except control flow + /// instructions. Does not modify the control flow in the loop at all. + /// + /// The inserted instructions are interleaved. + /// + /// 'InductionF' is called when a use of an induction variable is seen. + /// 'ReductionUseF' is called when a use of a reduction variable is seen. + /// 'ReductionDefF' is called when a feed of a reduction variable is seen. The + /// arguments are the newly created Value and the Reduction + /// it fed in the original loop. + /// FIXME: Change these callbacks to invokes on the delegate. + void interleave(ValueToValueCB InductionF, ValueToValueCB ReductionUseF, + std::function ReductionDefF, + Delegate *D); + + /// Returns a dedicated exiting block, creating one if it does not exist. A + /// dedicated exiting block is one where the only incoming arcs are from + /// the loop latch or optionally the bypass block (The bypass block is a + /// check to see if the loop would actually be executed at least once, as + /// the trip count check is on the backedge). + BasicBlock *getOrCreateDedicatedExitingBlock(); + + /// Changes the block that is executed after this loop. + void updateExitBlock(BasicBlock *BB); + + /// Add an incoming edge from the loop L, with the given mappings between our + /// and L's reductions and inductions. + /// + /// The InductionIDs and Reductions can either reference this loop or L's. + void addIncoming(LoopEditor &L, + const std::map &Reductions, + const std::map &Inductions); + + /// Add an incoming edge from the basic block BB. + void addIncoming(BasicBlock *BB); + + /// Remove an incoming edge from BB. The edge must exist. + void removeIncoming(BasicBlock *BB); + + /// + /// Macro-mutations - These are all helper functions implemented in terms of + /// the mutations above. + /// + + /// Widens the loop by \c Factor and performs interleaved loop unrolling. + /// + /// The reductions in the original loop get duplicated \c Factor times, + /// then reduced at the end of the loop into one value and returned in Ret. + void widenAndInterleaveLoop(unsigned Factor, + std::map &Ret, + Delegate *D=nullptr); + + /// Versions, widens, and interleaves the loop by \c Factor. + /// + /// The original loop is cloned and a widened loop created, which branches + /// to the original scalar loop as a tail. + /// + /// PredBB is created as a block to determine whether to branch to the widened + /// loop or the scalar one (independent of any trip count and overflow checks). + /// It is initially set up with just one branch: + /// br i1 true, label %versioned-loop, label %scalar-loop + LoopEditor versionWidenAndInterleaveLoop(unsigned Factor, BasicBlock *&PredBB, + Delegate *D=nullptr); + + /// Peels \c Iterations iterations out of the loop and places them after the + /// loop. The trip count of the loop is decreased to compensate. + void peelAfter(unsigned Iterations, Delegate *D=nullptr); +}; +} // end namespace llvm + +#endif Index: lib/Transforms/Utils/LoopEditor.cpp =================================================================== --- /dev/null +++ lib/Transforms/Utils/LoopEditor.cpp @@ -0,0 +1,1003 @@ +//===-- LoopEditor.cpp - High-level loop transformations ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The LoopEditor provides a toolkit for performing high-level transforms on +// loops. +// +//===----------------------------------------------------------------------===// + +#include "LoopEditor.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/PrettyStackTrace.h" + +#define DEBUG_TYPE "loopeditor" +using namespace llvm; + +void LoopEditor::Delegate::anchor() {} + +// +// Helper functions. +// +Value *LoopEditor::Reduction::createOp(IRBuilder<> &IRB, Value *Op1, Value *Op2) const { + assert(0 && "Implement!"); +} + +LLVMContext &LoopEditor::getContext() { + return L->getHeader()->getParent()->getContext(); +} + +void LoopEditor::ensureStartValuesAvailable() { + SmallVector StartValues; + SmallVector IncomingBlocks; + for (auto &I : Inductions) + StartValues.push_back(I.StartValue); + for (auto &I : Reductions) + StartValues.push_back(I.StartValue); + for (auto *BB : predecessors(Bypass ? Bypass : L->getLoopPreheader())) + IncomingBlocks.push_back(BB); + makeValuesAvailableInBlocks(StartValues, IncomingBlocks); +} + +void LoopEditor::ensureCanonicalInductionVariable() { + PHINode *IndVar = getCanonicalInductionVariable(); + assert(IndVar && "Canonical variable insertion not implemented!"); + + // FIXME: We may *have* a canonical indvar, but we also need to check that it + // is indeed used properly (compared with in the latch). +} + +void LoopEditor::ensureBypassCreated() { + if (Bypass) + return; + DEBUG(verify("Verification on entry to LoopEditor::ensureBypassCreated()")); + assert(L->getLoopPreheader()->getSinglePredecessor() && "Multi-predecessor " + "preheaders not implemented!"); + BasicBlock *Preheader = L->getLoopPreheader(); + + Bypass = BasicBlock::Create(getContext(), "edit.bypass", + Preheader->getParent(), + Preheader); + auto *T = Preheader->getSinglePredecessor()->getTerminator(); + T->replaceUsesOfWith(Preheader, Bypass); + + // Initialize the bypass with a tautological compare just to keep everything + // connected. + IRBuilder<> IRB(Bypass); + IRB.CreateCondBr(IRB.getFalse(), L->getExitBlock(), Preheader); + + DT->addNewBlock(Bypass, T->getParent()); + DT->changeImmediateDominator(Preheader, Bypass); + auto *ExitDom = DT->findNearestCommonDominator(Bypass, L->getExitBlock()); + DT->changeImmediateDominator(L->getExitBlock(), ExitDom); + if (L->getParentLoop()) + L->getParentLoop()->addBasicBlockToLoop(Bypass, *LI); + + // Move any PHI nodes from the preheader to the bypass. + for (auto I = Preheader->begin(); isa(I); I = Preheader->begin()) + I->moveBefore(Bypass->begin()); + + // If there are any reduction PHIs in the exit block, they need to have an + // incoming value for the bypass too. + BasicBlock *ExitB = L->getExitBlock(); + for (auto I = ExitB->begin(); isa(I); ++I) { + PHINode *PN = cast(I); + // They're in our exit block, so they *must* be a reduction if they use + // a value defined in our loop. + Value *V = PN->getIncomingValueForBlock(L->getLoopLatch()); + if (!isa(V) || !L->contains(cast(V))) + continue; + + // This value will be the outgoing reduction edge. Find the original + // PHI. + Reduction *R = nullptr; + for (auto *U : V->users()) + if (isa(U) && + cast(U)->getParent() == L->getHeader()) { + assert(ReductionsByValue.count(U) && "Not a known reduction?"); + R = &ReductionsByValue[U]; + } + assert(R && "Not a known reduction?"); + PN->addIncoming(R->StartValue, Bypass); + } + + DEBUG(verify("Verification on exit from LoopEditor::ensureBypassCreated()")); +} + +PHINode *LoopEditor::getCanonicalInductionVariable() { + // Loop::getCanonicalInductionVariable() only detects PHIs starting at zero, + // but we also deal with loops whose start value comes from a PHI in + // the preheader (and the default start value is zero). + // + // Code copied from LoopInfo.cpp. + BasicBlock *H = L->getHeader(); + + BasicBlock *Incoming = nullptr, *Backedge = nullptr; + pred_iterator PI = pred_begin(H); + assert(PI != pred_end(H) && + "Loop must have at least one backedge!"); + Backedge = *PI++; + if (PI == pred_end(H)) return nullptr; // dead loop + Incoming = *PI++; + if (PI != pred_end(H)) return nullptr; // multiple backedges? + + if (L->contains(Incoming)) { + if (L->contains(Backedge)) + return nullptr; + std::swap(Incoming, Backedge); + } else if (!L->contains(Backedge)) + return nullptr; + + // Loop over all of the PHI nodes, looking for a canonical indvar. + for (BasicBlock::iterator I = H->begin(); isa(I); ++I) { + PHINode *PN = cast(I); + if (ConstantInt *CI = + dyn_cast(PN->getIncomingValueForBlock(Incoming))) { + if (!CI->isNullValue()) + continue; + } else if (InductionsByValue.count(PN) && + isa(InductionsByValue[PN].StartValue)) { + if (!cast(InductionsByValue[PN].StartValue)->isNullValue()) + continue; + } else { + continue; + } + if (Instruction *Inc = + dyn_cast(PN->getIncomingValueForBlock(Backedge))) + if (Inc->getOpcode() == Instruction::Add && + Inc->getOperand(0) == PN) + if (ConstantInt *CI = dyn_cast(Inc->getOperand(1))) + if (CI->equalsInt(1)) + return PN; + } + return nullptr; +} + +void LoopEditor::computeBackedgeCountComparison() { + ensureCanonicalInductionVariable(); + BackedgeCountComparison = + cast(cast(L->getLoopLatch()->getTerminator()) + ->getCondition()); + + // We only detect post-inc comparisons here. Importantly if this is a + // post-inc expression we know that the computation of the trip count cannot + // overflow. + if (SE->getSCEV(BackedgeCountComparison->getOperand(0)) == TripCountSCEV) + BackedgeCountComparisonValue = BackedgeCountComparison->getOperand(0); + else if (SE->getSCEV(BackedgeCountComparison->getOperand(1)) == TripCountSCEV) + BackedgeCountComparisonValue = BackedgeCountComparison->getOperand(1); + else + assert(0 && "Unknown backedge count comparison!"); +} + +Value *LoopEditor::getStartingValue() { + ensureStartValuesAvailable(); + PHINode *PN = getCanonicalInductionVariable(); + + // First, get the starting value of PN. + Value *StartV = PN->getIncomingValueForBlock(L->getLoopPreheader()); + // Is this a PHI in the preheader? + if (PHINode *StartPHI = dyn_cast(StartV)) + return StartPHI; + + BasicBlock *HeadB = Bypass ? Bypass : L->getLoopPreheader(); + + PHINode *StartPHI = PHINode::Create(PN->getType(), 2, "edit.startphi", + HeadB->getFirstInsertionPt()); + for (auto *Pred : predecessors(HeadB)) + StartPHI->addIncoming(StartV, Pred); + + PN->setIncomingValue(PN->getBasicBlockIndex(L->getLoopPreheader()), StartPHI); + DEBUG(verify("Verification on exit from LoopEditor::getStartingValue()")); + return StartPHI; +} + +BasicBlock *LoopEditor::computeImmediateDominator(BasicBlock *B) { + // Find the common dominator of all the blocks. + BasicBlock *Dom = nullptr; + for (auto *BB : predecessors(B)) { + if (!Dom) + Dom = BB; + else + Dom = DT->findNearestCommonDominator(Dom, BB); + } + return Dom; +} + +void LoopEditor::makeValuesAvailableInBlocks(ArrayRef Values, + ArrayRef Blocks) { + // Find the common dominator of all the blocks. + BasicBlock *Dom = nullptr; + for (auto *BB : Blocks) { + if (!Dom) + Dom = BB; + else + Dom = DT->findNearestCommonDominator(Dom, BB); + } + + // Now, are all the values already available at the end of the dominator? + for (auto *V : Values) { + if (!isa(V)) + continue; + if (DT->dominates(cast(V), Dom->getTerminator())) + continue; + // No, this value is not. Can we shift it? + // FIXME: can we make this recursive? we bail out too early currently. + for (auto &O : cast(V)->operands()) + if (isa(O.get()) && + !DT->dominates(cast(O.get()), Dom->getTerminator())) { + assert(0 && "Unable to hoist all values!"); + } + // OK, its operands are available. Hoist it. + cast(V)->moveBefore(Dom->getTerminator()); + } +} + +void LoopEditor::addStartPHIIncoming(PHINode *PN, Value *V, BasicBlock *BB) { + // First, get the starting value of PN. + Value *StartV = PN->getIncomingValueForBlock(L->getLoopPreheader()); + + // Is this a PHI in the preheader? + if (PHINode *StartPHI = dyn_cast(StartV)) { + // Update it. + StartPHI->addIncoming(V, BB); + return; + } + + BasicBlock *HeadB = Bypass ? Bypass : L->getLoopPreheader(); + + PHINode *StartPHI = PHINode::Create(PN->getType(), 2, "edit.startphi", + HeadB->getFirstInsertionPt()); + for (auto *Pred : predecessors(HeadB)) + StartPHI->addIncoming((Pred == BB) ? V : StartV, Pred); + PN->setIncomingValue(PN->getBasicBlockIndex(L->getLoopPreheader()), StartPHI); + +} + +bool LoopEditor::sameProvenance(const PHIID &A, const PHIID &B) { + return A.Val == B.Val || A.BasedOn == B.Val || A.Val == B.BasedOn || + (A.BasedOn && B.BasedOn && A.BasedOn == B.BasedOn); +} + +Value *LoopEditor::determineStartValueForPHI(PHINode *PN) { + Value *StartV = PN->getIncomingValueForBlock(L->getLoopPreheader()); + if (!isa(StartV)) + return StartV; + + // If the PHI: + // - is located in the preheader and + // - for the incoming values that are constants those constants are the same + // then this is a StartPHI and we can say that the start value of this phi + // is the constant. + auto *FirstBlock = Bypass ? Bypass : L->getLoopPreheader(); + auto *StartPN = cast(StartV); + if (StartPN->getParent() != FirstBlock) + return StartPN; + + Value *C = nullptr; + for (auto &V : StartPN->incoming_values()) { + if (!C) + C = V; + else if (C != V) + return StartPN; + } + if (C) + return C; + return StartPN; +} + +void LoopEditor::replacePHINodeIncomingValues(BasicBlock *In, BasicBlock *For, + BasicBlock *With) { + for (auto I = In->begin(); isa(I); ++I) { + PHINode *PN = cast(I); + // We only delete the first incoming value for 'For'. + if (PN->getBasicBlockIndex(For) != -1) { + unsigned Idx = PN->getBasicBlockIndex(For); + Value *V = PN->getIncomingValue(Idx); + PN->removeIncomingValue(Idx, false); + PN->addIncoming(V, With); + } + } +} + +void LoopEditor::removePHINodeIncomingValues(BasicBlock *In, BasicBlock *For) { + for (auto I = In->begin(); isa(I); ++I) { + PHINode *PN = cast(I); + // We only delete the first incoming value for 'For'. + if (PN->getBasicBlockIndex(For) != -1) + PN->removeIncomingValue(PN->getBasicBlockIndex(For), false); + } +} + +BasicBlock *LoopEditor::getDedicatedExitingBlock() { + BasicBlock *ExitB = L->getExitBlock(); + if (ExitB && Bypass && ExitB->hasNUses(2) && + ExitB->isUsedInBasicBlock(Bypass) && + ExitB->isUsedInBasicBlock(L->getLoopLatch())) + return ExitB; + else if (ExitB && !Bypass && ExitB->hasOneUse() && + ExitB->isUsedInBasicBlock(L->getLoopLatch())) + return ExitB; + return nullptr; +} + +void LoopEditor::analyze() { + // The primary thing we need to analyze is the PHI nodes. PHIs must either + // be classified as an induction or as a reduction. + for (auto I = L->getHeader()->begin(); isa(*I); ++I) { + PHINode *PN = cast(I); + ConstantInt *CI; + RecurrenceDescriptor RD; + if (isInductionPHI(PN, SE, CI)) { + Induction ID(determineStartValueForPHI(PN), PN); + Inductions.push_back(ID); + InductionsByValue.insert(std::make_pair(PN, ID)); + } else if (RecurrenceDescriptor::isReductionPHI(PN, L, RD)) { + Reduction ID(determineStartValueForPHI(PN), PN); + Reductions.push_back(ID); + ReductionsByValue.insert(std::make_pair(PN, ID)); + } else { + DEBUG(dbgs() << "LoopEditor: unidentified PHI!: " << *PN); + return; + } + } + + TripCountSCEV = SE->getBackedgeTakenCount(L); + if (isa(TripCountSCEV)) + return; + TripCountSCEV = SE->getAddExpr(TripCountSCEV, + SE->getConstant(TripCountSCEV->getType(), 1)); + + Analyzed = true; +} + +void LoopEditor::verify(const char *Text) { + PrettyStackTraceString Trace(Text); + + assert(verifyFunction(*L->getHeader()->getParent(), &dbgs()) == false); + DT->verifyDomTree(); + LI->verify(); + SE->verifyAnalysis(); +} + +// +// Public API +// + +LoopEditor::LoopEditor(Loop *L, ScalarEvolution *SE, const DataLayout *DL, + LoopInfo *LI, DominatorTree *DT) + : L(L), SE(SE), DL(DL), LI(LI), DT(DT), Analyzed(false), TripCountSCEV(nullptr), + BackedgeCountComparisonValue(nullptr), Bypass(nullptr), + ExecutedTripCountValue(nullptr) { + analyze(); +} + +LoopEditor::LoopEditor(Loop *L, BasicBlock *Bypass, ScalarEvolution *SE, + const DataLayout *DL, LoopInfo *LI, DominatorTree *DT) + : L(L), SE(SE), DL(DL), LI(LI), DT(DT), Analyzed(false), TripCountSCEV(nullptr), + BackedgeCountComparisonValue(nullptr), Bypass(Bypass), + ExecutedTripCountValue(nullptr) { + analyze(); +} + +bool LoopEditor::isValid() const { + return Analyzed; +} + +bool LoopEditor::contains(Instruction *I) { + if (L->contains(I)) + return true; + if (I->getParent() == Bypass || I->getParent() == getDedicatedExitingBlock()) + return true; + return false; +} + +bool LoopEditor::isValidReduction(Value *V) { + return ReductionsByValue.count(V); +} + +const LoopEditor::Reduction LoopEditor::getReductionByPHI(Value *V) { + assert(ReductionsByValue.count(V)); + return ReductionsByValue[V]; +} + +ArrayRef LoopEditor::getAllReductions() { + return Reductions; +} + +Value *LoopEditor::getOutgoingReduction(const Reduction &ID) { + assert(L->contains(ID.Val) && "Unknown reduction!"); + // We always provide outgoing reductions in LCSSA form to make them + // easier to re-analyze. + Value *V = ID.Val->getIncomingValueForBlock(L->getLoopLatch()); + + BasicBlock *DEB = getOrCreateDedicatedExitingBlock(); + + // Do any of the PHIs in the DEB already use V? If so, just return that. + for (auto I = DEB->begin(); isa(I); ++I) + if (cast(I)->getIncomingValueForBlock(L->getLoopLatch()) == V) + return I; + + // Create a new PHI. + PHINode *PN = PHINode::Create(V->getType(), 2, "reduction.merge", + DEB->getFirstInsertionPt()); + PN->addIncoming(V, L->getLoopLatch()); + PN->addIncoming(ID.getStartValue(), Bypass); + + DEBUG(verify("Verification on exit of LoopEditor::getOutgoingReduction()")); + return PN; +} + +LoopEditor::Reduction *LoopEditor::getMatchingReduction(const Reduction &R) { + for (auto &Red : Reductions) + if (Red.BasedOn == R.Val || Red.BasedOn == R.BasedOn) + return &Red; + return nullptr; +} + +ArrayRef LoopEditor::getAllInductions() { + return Inductions; +} + +Value *LoopEditor::getOutgoingInduction(const Induction &ID) { + assert(L->contains(ID.Val) && "Unknown induction!"); + Value *V = ID.Val->getIncomingValueForBlock(L->getLoopLatch()); + BasicBlock *DEB = getOrCreateDedicatedExitingBlock(); + + // Do any of the PHIs in the DEB already use V? If so, just return that. + for (auto I = DEB->begin(); isa(I); ++I) + if (cast(I)->getIncomingValueForBlock(L->getLoopLatch()) == V) + return I; + + // Create a new PHI. + PHINode *PN = PHINode::Create(V->getType(), 2, "induction.merge", + DEB->getFirstInsertionPt()); + PN->addIncoming(V, L->getLoopLatch()); + PN->addIncoming(ID.getStartValue(), Bypass); + + DEBUG(verify("Verification on exit of LoopEditor::getOutgoingInduction()")); + return PN; +} + +LoopEditor::Induction *LoopEditor::getMatchingInduction(const Induction &I) { + for (auto &Ind : Inductions) + if (Ind.BasedOn == I.Val || Ind.BasedOn == I.BasedOn) + return &Ind; + return nullptr; +} + +Value *LoopEditor::getExecutedTripCount() { + // If we have a valid trip count already, use that. + if (ExecutedTripCountValue) + return ExecutedTripCountValue; + + // Otherwise, we need to construct it. If we go into the loop we have + // the loop induction variable - what if we have a bypass? + ensureCanonicalInductionVariable(); + ExecutedTripCountValue = InductionsByValue[getCanonicalInductionVariable()].Val + ->getIncomingValueForBlock(L->getLoopLatch()); + if (!Bypass) + return ExecutedTripCountValue; + + // We'll have to PHI zero (from the bypass) and the trip count together. + BasicBlock *BB = getOrCreateDedicatedExitingBlock(); + assert(BB->getNumUses() == 2 && "Expected two incoming arcs to exit block!"); + + IRBuilder<> IRB(BB->getFirstInsertionPt()); + PHINode *PN = IRB.CreatePHI(TripCountSCEV->getType(), 2, "executed_trip_count"); + PN->addIncoming(ConstantInt::get(cast(TripCountSCEV->getType()), 0), + Bypass); + PN->addIncoming(ExecutedTripCountValue, L->getLoopLatch()); + ExecutedTripCountValue = PN; + + DEBUG(verify("Verification on exit from LoopEditor::getExecutedTripCount()")); + return ExecutedTripCountValue; +} + +const SCEV *LoopEditor::getTripCount() { + return TripCountSCEV; +} + +void LoopEditor::setTripCount(const SCEV *TC, bool CheckForOverflow) { + DEBUG(verify("Verification on entry to LoopEditor::setTripCount()")); + // FIXME: Change "checkforoverflow" to a negated "don't check for overflow". + // As we have a post-inc comparison value, we may be able to determine that + // overflow doesn't need checking. + // + // If we don't know the trip count as a value, we'll need to teach ourselves + // it first. + if (!BackedgeCountComparisonValue) { + ensureCanonicalInductionVariable(); + computeBackedgeCountComparison(); + } + TripCountSCEV = TC; + + auto *OldBEComparisonValue = BackedgeCountComparisonValue; + Type *Ty = TripCountSCEV->getType(); + + // Cache these here so LoopInfo doesn't get confused when we rip out the + // bypass below. + BasicBlock *ExitB = L->getExitBlock(); + BasicBlock *PreheaderB = L->getLoopPreheader(); + + // Now check that the loop will be executed at least once. + ensureBypassCreated(); + Bypass->getTerminator()->eraseFromParent(); + + IRBuilder<> IRB(Bypass); + IRB.CreateCondBr(IRB.getFalse(), ExitB, PreheaderB); + + SCEVExpander Expander(*SE, *DL, "backedgecount"); + Value *StartV = getStartingValue(); + Value *TCV = Expander.expandCodeFor(TripCountSCEV, Ty, + Bypass->getFirstInsertionPt()); + BackedgeCountComparisonValue = TCV; + + IRB.SetInsertPoint(Bypass->getTerminator()); + // If the two values are different types, zero-extend to the largest. + if (StartV->getType()->getScalarSizeInBits() > + TCV->getType()->getScalarSizeInBits()) + TCV = IRB.CreateZExt(TCV, StartV->getType()); + else if (StartV->getType()->getScalarSizeInBits() < + TCV->getType()->getScalarSizeInBits()) + StartV = IRB.CreateZExt(StartV, TCV->getType()); + + Value *Check = IRB.CreateICmpUGE(StartV, TCV); + if (CheckForOverflow) { + auto *CI = ConstantInt::get(cast(TCV->getType()), 0 ); + Check = IRB.CreateOr(Check, IRB.CreateICmpEQ(TCV, CI)); + } + cast(Bypass->getTerminator())->setCondition(Check); + + BackedgeCountComparison->replaceUsesOfWith(OldBEComparisonValue, + BackedgeCountComparisonValue); + + // We've updated the trip count. This makes SCEVs invalid. + SE->forgetLoop(L); + DEBUG(verify("Verification on exit from LoopEditor::setTripCount()")); +} + +LoopEditor LoopEditor::clone(Use &IncomingEdge, + const Twine &NameSuffix) { + DEBUG(verify("Verification on entry to LoopEditor::clone()")); + SmallVector NewBlocks; + ValueToValueMapTy VM; + BasicBlock *Preheader = L->getLoopPreheader(); + BasicBlock *InsertPt = L->getLoopPreheader(); + BasicBlock *Dominator = cast(IncomingEdge.getUser())->getParent(); + BasicBlock *DedicatedExitB = getDedicatedExitingBlock(); + BasicBlock *LastBlock = DedicatedExitB ? DedicatedExitB : L->getExitingBlock(); + BasicBlock *ExitBlock = DedicatedExitB ? + DedicatedExitB->getTerminator()->getSuccessor(0) : + L->getExitBlock(); + + if (Bypass) { + BasicBlock *BB = CloneBasicBlock(Bypass, VM, NameSuffix); + BB->insertInto(InsertPt->getParent(), InsertPt); + VM[Bypass] = BB; + DT->addNewBlock(BB, Dominator); + NewBlocks.push_back(BB); + if (L->getParentLoop()) + L->getParentLoop()->addBasicBlockToLoop(BB, *LI); + } + Loop *NewL = cloneLoopWithPreheader(InsertPt, + Bypass ? cast(VM[Bypass]) : Dominator, + L, VM, NameSuffix, LI, DT, NewBlocks); + if (DedicatedExitB && !L->contains(DedicatedExitB)) { + BasicBlock *BB = CloneBasicBlock(DedicatedExitB, VM, NameSuffix); + BB->insertInto(InsertPt->getParent(), InsertPt); + VM[DedicatedExitB] = BB; + DT->addNewBlock(BB, Bypass ? cast(VM[Bypass]) : + cast(VM[L->getExitingBlock()])); + NewBlocks.push_back(BB); + if (L->getParentLoop()) + L->getParentLoop()->addBasicBlockToLoop(BB, *LI); + } + remapInstructionsInBlocks(NewBlocks, VM); + + BasicBlock *BB = cast(IncomingEdge.get()); + // There's an edge between Dominator and BB, and we want to insert a loop + // in the middle of it: + // + // Dom Dom + // | => \ + // | LOOP + // BB / + // BB + // So we need to change any incoming PHI values on that edge. + replacePHINodeIncomingValues(BB, Dominator, cast(VM[LastBlock])); + // If it was pointing into the old loop, we'll also have to remove the cloned + // incomings. + if (L->contains(BB) || BB == Bypass || BB == Preheader || BB == DedicatedExitB) + removePHINodeIncomingValues(cast(VM[BB]), Dominator); + + // We also need to change any PHI nodes in our new entry block to point + // to our new predecessor, Dom. + BasicBlock *FirstBlock = Bypass ? Bypass : Preheader; + for (auto *P : predecessors(FirstBlock)) + replacePHINodeIncomingValues(cast(VM[FirstBlock]), + P, Dominator); + + // Set the incoming and outgoing edges. + IncomingEdge.set(cast(VM[FirstBlock])); + auto *T = cast(VM[LastBlock])->getTerminator(); + T->replaceUsesOfWith(ExitBlock, BB); + + assert(NewL->getLoopPreheader() && "New loop is malformed!"); + assert(NewL->getHeader() && "New loop is malformed!"); + + DT->changeImmediateDominator(ExitBlock, + computeImmediateDominator(ExitBlock)); + + LoopEditor LE(NewL, + Bypass ? cast(VM[Bypass]) : nullptr, + SE, DL, LI, DT); + assert(LE.isValid() && "Generated loopeditor is not valid!"); + + // Remap the inductions and reductions. + for (auto &I : Inductions) + LE.InductionsByValue[VM[I.Val]].BasedOn = I.Val; + for (auto &I : LE.Inductions) + I.BasedOn = LE.InductionsByValue[I.Val].BasedOn; + for (auto &I : Reductions) + LE.ReductionsByValue[VM[I.Val]].BasedOn = I.Val; + for (auto &I : LE.Reductions) + I.BasedOn = LE.ReductionsByValue[I.Val].BasedOn; + + DEBUG(verify("Verification on exit from LoopEditor::clone()")); + return LE; +} + +void LoopEditor::widen(unsigned Factor) { + DEBUG(verify("Verification on entry to LoopEditor::widen()")); + const SCEV *NewTC = SE->getUDivExpr(TripCountSCEV, + SE->getConstant(TripCountSCEV->getType(), + Factor)); + setTripCount(NewTC, true); + + for (auto &I : Inductions) + if (I.Val != getCanonicalInductionVariable()) { + Value *OldV = I.Val->getIncomingValueForBlock(L->getLoopLatch()); + Type *Ty = OldV->getType(); + // FIXME: this is wrong! doesn't work for -1 or non-unit. + Value *NewV = BinaryOperator::Create(Instruction::Add, + OldV, + ConstantInt::get(Ty, Factor - 1), + OldV->getName(), + L->getLoopLatch()->getTerminator()); + I.Val->setIncomingValue(I.Val->getBasicBlockIndex(L->getLoopLatch()), + NewV); + } + + SE->forgetLoop(L); + DEBUG(verify("Verification on exit from LoopEditor::widen()")); +} + +LoopEditor::Reduction LoopEditor::addReduction(Value *StartValue) { + PHINode *PN = PHINode::Create(StartValue->getType(), 2, "rdx.loopedit", + L->getHeader()->getFirstInsertionPt()); + PN->addIncoming(StartValue, L->getLoopPreheader()); + PN->addIncoming(PN, L->getLoopLatch()); + + Reduction R(StartValue, PN); + Reductions.push_back(R); + ReductionsByValue[PN] = R; + + DEBUG(verify("Verification on exit from LoopEditor::addReduction()")); + return R; +} + +void LoopEditor::connectReduction(const Reduction ID, Value *V) { + ID.Val->setIncomingValue(ID.Val->getBasicBlockIndex(L->getLoopLatch()), + V); + DEBUG(verify("Verification on exit from LoopEditor::connectReduction()")); +} + +void LoopEditor::interleave(ValueToValueCB InductionF, + ValueToValueCB ReductionUseF, + std::function ReductionDefF, + Delegate *D) { + // Precache all the instructions we want to clone, so we're resilient to + // changes in the function. + SmallVector InstsToClone; + SmallVector InstsToRemap; + for (auto *BB : L->getBlocks()) { + if (BB == L->getLoopPreheader()) + continue; + for (auto &I : *BB) + if (!isa(&I)) + InstsToClone.push_back(&I); + } + + // Now do the clone. + ValueToValueMapTy VM; + for (auto *I : InstsToClone) { + if (InductionsByValue.count(I)) { + IRBuilder<> IRB(I->getParent()->getFirstInsertionPt()); + VM[I] = InductionF(I, IRB); + } else if (ReductionsByValue.count(I)) { + IRBuilder<> IRB(I->getParent()->getFirstInsertionPt()); + VM[I] = ReductionUseF(I, IRB); + } else { + Instruction *NewI = I->clone(); + NewI->insertAfter(I); + InstsToRemap.push_back(NewI); + VM[I] = NewI; + } + } + for (auto *I : InstsToRemap) + RemapInstruction(I, VM, RF_IgnoreMissingEntries); + + for (auto &R : Reductions) + ReductionDefF(VM[getOutgoingReduction(R)], R); + + if (D) + for (auto *I : InstsToClone) + // FIXME: cast to instruction not right here - should be Value. + D->notifyInstructionInterleaved(I, cast(VM[I])); + + DEBUG(verify("Verification on exit from LoopEditor::interleave()")); +} + +BasicBlock *LoopEditor::getOrCreateDedicatedExitingBlock() { + assert(L->getExitingBlock() && "Expect only one latch!"); + if (getDedicatedExitingBlock()) + return getDedicatedExitingBlock(); + BasicBlock *ExitB = L->getExitBlock(); + + // OK, time to create one. + BasicBlock *NewExitB = BasicBlock::Create(getContext(), + "loopedit.dedicatedexit", + ExitB->getParent(), + ExitB); + IRBuilder<> IRB(NewExitB); + IRB.CreateBr(ExitB); + L->getLoopLatch()->getTerminator()->replaceUsesOfWith(ExitB, NewExitB); + if (Bypass) + Bypass->getTerminator()->replaceUsesOfWith(ExitB, NewExitB); + + // If any PHIs in ExitB refer to values computed within our loop, + // create an equivalent PHI in NewExitB and point the original PHI at it. + for (auto I = ExitB->begin(); isa(I); ++I) { + PHINode *PN = cast(I); + PHINode *NewPN = cast(I->clone()); + NewPN->insertBefore(NewExitB->getFirstInsertionPt()); + + // Remove any edges from predecessors that aren't our bypass or latch + for (auto *P : predecessors(ExitB)) + if (P != Bypass && P != L->getLoopLatch() && + NewPN->getBasicBlockIndex(P) != -1) + NewPN->removeIncomingValue(NewPN->getBasicBlockIndex(P)); + + PN->removeIncomingValue(PN->getBasicBlockIndex(L->getLoopLatch())); + if (Bypass) + PN->removeIncomingValue(PN->getBasicBlockIndex(Bypass)); + PN->addIncoming(NewPN, NewExitB); + } + + DT->addNewBlock(NewExitB, Bypass ? Bypass : L->getLoopLatch()); + // Because we know ExitB has predecessors other than within our loop (else + // it would be defined as a dedicated exiting block), its order in the + // DomTree is explicitly not affected. + if (L->getParentLoop()) + L->getParentLoop()->addBasicBlockToLoop(NewExitB, *LI); + + DEBUG(verify("Verification on exit from LoopEditor::getOrCreateDedicatedExitingBlock()")); + return NewExitB; +} + +void LoopEditor::updateExitBlock(BasicBlock *BB) { + BasicBlock *EB = getOrCreateDedicatedExitingBlock(); + BasicBlock *CurrentExitB = EB->getTerminator()->getSuccessor(0); + EB->getTerminator()->setSuccessor(0, BB); + + BasicBlock *NewDom = nullptr; + for (auto *Pred : predecessors(CurrentExitB)) + NewDom = NewDom ? DT->findNearestCommonDominator(NewDom, Pred) : Pred; + assert (NewDom); + DT->changeImmediateDominator(CurrentExitB, NewDom); + + removePHINodeIncomingValues(CurrentExitB, EB); +} + +void LoopEditor::addIncoming( + LoopEditor &LE, + const std::map &OtherReductions, + const std::map &OtherInductions) { + DEBUG(verify("Verification on entry to LoopEditor::addIncoming(LoopEditor&)")); + assert(Reductions.size() == OtherReductions.size() && "Reductions size mismatch!"); + assert(Inductions.size() == OtherInductions.size() && "Inductions size mismatch!"); + + ensureStartValuesAvailable(); + ensureCanonicalInductionVariable(); + + BasicBlock *BB = LE.getOrCreateDedicatedExitingBlock(); + + LE.updateExitBlock(Bypass ? Bypass : L->getLoopPreheader()); + + // Try and match up the reductions and inductions. + unsigned SeenReductions = 0, SeenInductions = 0; + for (auto &OtherKV : OtherReductions) + for (auto &ThisR : Reductions) + if (sameProvenance(ThisR, OtherKV.first)) { + SeenReductions++; + addStartPHIIncoming(ThisR.Val, OtherKV.second, BB); + } + for (auto &OtherKV : OtherInductions) + for (auto &ThisI : Inductions) + if (sameProvenance(ThisI, OtherKV.first)) { + SeenInductions++; + addStartPHIIncoming(ThisI.Val, OtherKV.second, BB); + } + assert(SeenReductions == Reductions.size() && "Not all reductions matched!"); + assert(SeenInductions == Inductions.size() && "Not all inductions matched!"); + + DEBUG(verify("Verification on exit from LoopEditor::addIncoming(LoopEditor&)")); +} + +void LoopEditor::addIncoming(BasicBlock *BB) { + DEBUG(verify("Verification on entry to LoopEditor::addIncoming(BasicBlock*)")); + ensureStartValuesAvailable(); + + for (auto &R : Reductions) + addStartPHIIncoming(R.Val, R.StartValue, BB); + for (auto &I : Inductions) + addStartPHIIncoming(I.Val, I.StartValue, BB); + + DEBUG(verify("Verification on exit from LoopEditor::addIncoming(BasicBlock*)")); +} + +void LoopEditor::removeIncoming(BasicBlock *BB) { + BasicBlock *StartB = Bypass ? Bypass : L->getLoopPreheader(); + for (auto I = StartB->begin(); isa(&*I); ++I) + cast(I)->removeIncomingValue(BB); + + DEBUG(verify("Verification on exit from LoopEditor::removeIncoming()")); +} + +// +// Macro-mutators +// + +void LoopEditor::widenAndInterleaveLoop(unsigned Factor, + std::map &Ret, + Delegate *D) { + widen(Factor); + + std::map > RedMap; + for (auto ID : getAllReductions()) + for (unsigned I = 1; I < Factor; ++I) + RedMap[ID].push_back(addReduction(ID.getStartValue())); + + for (unsigned Iter = 1; Iter < Factor; ++Iter) { + if (D) D->notifyInterleaveIterationStarting(Iter); + interleave( + [=](Value *V, IRBuilder<> &IRB) { + return IRB.CreateAdd(V, ConstantInt::get(cast(V->getType()), + Iter)); + }, + [&](Value *V, IRBuilder<> &IRB) { + return RedMap[getReductionByPHI(V)][Iter - 1].getPHI(); + }, + [&](Value *V, const Reduction OldR) { + connectReduction(RedMap[OldR][Iter - 1], V); + }, + D); + } + + auto *ExitBB = getOrCreateDedicatedExitingBlock(); + IRBuilder<> IRB(ExitBB->getTerminator()); + + for (auto &KV : RedMap) { + Value *V = getOutgoingReduction(KV.first); + for (auto &ID : KV.second) + V = ID.createOp(IRB, V, getOutgoingReduction(ID)); + Ret[KV.first] = V; + } +} + +LoopEditor LoopEditor::versionWidenAndInterleaveLoop(unsigned Factor, + BasicBlock *&PredBB, + Delegate *D) { + BasicBlock *Preheader = L->getLoopPreheader(); + BasicBlock *EntryFrom = Preheader->getSinglePredecessor(); + assert(EntryFrom && "Couldn't get entry predecessor!"); + + // Create a block to hold any predicates, and connect it to + // the original loop to keep it in the CFG. + removeIncoming(EntryFrom); + PredBB = BasicBlock::Create(getContext(), "wide.pred.check", + Preheader->getParent(), + Preheader); + auto *BI = BranchInst::Create(Preheader, Preheader, + ConstantInt::getTrue(getContext()), + PredBB); + EntryFrom->getTerminator()->replaceUsesOfWith(Preheader, PredBB); + DT->addNewBlock(PredBB, EntryFrom); + DT->changeImmediateDominator(Preheader, PredBB); + addIncoming(PredBB); + + // Get the zero'th use of Preheader in BI. That's the Use that we'll + // hang the cloned loop off of. + Use &U = BI->getOperandUse(2); + assert(U.get() == Preheader); + + // Clone the original loop. + LoopEditor WideL = clone(U, ".wide"); + + // Now widen the loop. + std::map ReducedReductions; + WideL.widenAndInterleaveLoop(Factor, ReducedReductions); + + // Now connect the wide exit to the original (our) entry. + std::map Inductions; + for (auto ID : WideL.getAllInductions()) + Inductions[ID] = WideL.getOutgoingInduction(ID); + addIncoming(WideL, ReducedReductions, Inductions); + + return WideL; +} + +void LoopEditor::peelAfter(unsigned Iterations, Delegate *D) { + // To peel an iteration, we reduce the loop iteration count, + // clone it, then reduce the clone's iteration count to one to effectively + // unloop it. + setTripCount(SE->getMinusSCEV(getTripCount(), + SE->getConstant(getTripCount()->getType(), + Iterations)), + true /*FIXME, what?*/); + + BasicBlock *ExitB = getOrCreateDedicatedExitingBlock(); + for (unsigned Iteration = 0; Iteration < Iterations; ++Iteration) { + // A dedicated exit block will always only have one successor. + Use &U = ExitB->getTerminator()->getOperandUse(0); + LoopEditor NewLE = clone(U, Twine(".peel-")+Twine(Iteration)); + NewLE.setTripCount(SE->getConstant(getTripCount()->getType(), 1), false); + + // Connect up the reductions and inductions. + std::map Inductions; + for (auto ID : getAllInductions()) { + Inductions[ID] = getOutgoingInduction(ID); + auto *Match = NewLE.getMatchingInduction(ID); + // FIXME: This isn't very elegant. Find a better way to do this. + assert(Match); + std::vector V; + for (auto &U : Inductions[ID]->uses()) + if (!contains(cast(U.getUser()))) + V.push_back(&U); + for (auto *U : V) + U->set(NewLE.getOutgoingInduction(*Match)); + } + std::map Reductions; + for (auto ID : getAllReductions()) { + Reductions[ID] = getOutgoingReduction(ID); + auto *Match = NewLE.getMatchingReduction(ID); + assert(Match); + std::vector V; + for (auto &U : Reductions[ID]->uses()) + if (!contains(cast(U.getUser()))) + V.push_back(&U); + for (auto *U : V) + U->set(NewLE.getOutgoingReduction(*Match)); + } + + NewLE.addIncoming(*this, Reductions, Inductions); + } +}