Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -167,6 +167,7 @@ void initializeLoopDeletionPass(PassRegistry&); void initializeLoopExtractorPass(PassRegistry&); void initializeLoopInfoWrapperPassPass(PassRegistry&); +void initializeLoopInterchangePass(PassRegistry&); void initializeLoopInstSimplifyPass(PassRegistry&); void initializeLoopRotatePass(PassRegistry&); void initializeLoopSimplifyPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -96,6 +96,7 @@ (void) llvm::createLICMPass(); (void) llvm::createLazyValueInfoPass(); (void) llvm::createLoopExtractorPass(); + (void) llvm::createLoopInterchangePass(); (void) llvm::createLoopSimplifyPass(); (void) llvm::createLoopStrengthReducePass(); (void) llvm::createLoopRerollPass(); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -130,6 +130,13 @@ // Pass *createLICMPass(); + +//===----------------------------------------------------------------------===// +// +// LoopInterchange - This pass is interchanges loops to give better cache hits. +// +Pass *createLoopInterchangePass(); + //===----------------------------------------------------------------------===// // // LoopStrengthReduce - This pass is strength reduces GEP instructions that use Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -17,6 +17,7 @@ LoopDeletion.cpp LoopIdiomRecognize.cpp LoopInstSimplify.cpp + LoopInterchange.cpp LoopRerollPass.cpp LoopRotation.cpp LoopStrengthReduce.cpp Index: lib/Transforms/Scalar/LoopInterchange.cpp =================================================================== --- lib/Transforms/Scalar/LoopInterchange.cpp +++ lib/Transforms/Scalar/LoopInterchange.cpp @@ -0,0 +1,700 @@ +//===- LoopInterchange.cpp - Loop interchange pass +//--------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This Pass handles loop interchange transform. This interchanges the inner +// and outer loop if interchanging can result in better cache hits. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Dominators.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "loop-interchange" + +namespace { + +typedef std::pair LoopPair; + +unsigned getInnerLoopCount(Loop &L, unsigned level) { + int lev = 0; + if (L.empty()) + return level + 1; + for (Loop *InnerL : L) + lev = getInnerLoopCount(*InnerL, level + 1); + return lev; +} + +static void populateWorklist(Loop &L, SmallVector &V) { + // TODO: Currently only handled for loops depth of 2. + // Also handle loop depth of 2 more appropriatly. + if (getInnerLoopCount(L, 0) == 2) { + Loop *Inner; + for (Loop *InnerL : L) + Inner = InnerL; + V.push_back(std::make_pair(&L, Inner)); + } +} + +/// LoopInterchangeLegality checks if it is legal to interchange the loop. +class LoopInterchangeLegality { +public: + LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE, + DependenceAnalysis *DA) + : OuterLoop(Outer), InnerLoop(Inner), SE(SE), DA(DA) {} + + /// Check if the loops can be interchanged. + bool canInterchangeLoops(); + + bool currentLimitations(); + +private: + bool checkDependence(Loop *Outer, DependenceAnalysis *DA); + bool tightlyNested(Loop *Outer, Loop *Inner); + + Loop *OuterLoop; + Loop *InnerLoop; + + /// Scev analysis. + ScalarEvolution *SE; + /// Dependence analysis. + DependenceAnalysis *DA; +}; + +/// LoopInterchangeProfitability checks if it is profitable to interchange the +/// loop. +class LoopInterchangeProfitability { +public: + LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE) + : OuterLoop(Outer), InnerLoop(Inner), SE(SE) {} + + /// Check if the loop interchange is profitable + bool isProfitable(); + +private: + int getInstrOrderCost(PHINode *IV); + + Loop *OuterLoop; + Loop *InnerLoop; + + /// Scev analysis. + ScalarEvolution *SE; +}; + +/// LoopInterchangeTransform interchanges the loop +class LoopInterchangeTransform { +public: + LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE, + LoopInfo *LI, DominatorTree *DT) + : outerLoop(Outer), innerLoop(Inner), SE(SE), LI(LI), DT(DT) { + initialize(); + } + + /// Interchange OuterLoop and InnerLoop. + bool transform(); + void initialize(); + +private: + void splitInnerLoopLatch(DominatorTree *, LoopInfo *, Instruction *); + void splitInnerLoopHeader(DominatorTree *, LoopInfo *); + bool adjustLoopLinks(DominatorTree *DT); + bool adjustOuterLoopPreheader(); + bool adjustInnerLoopPreheader(); + + Loop *outerLoop; + Loop *innerLoop; + BasicBlock *innerLoopHeader; + BasicBlock *outerLoopHeader; + BasicBlock *innerLoopLatch; + BasicBlock *outerLoopLatch; + BasicBlock *outerLoopPreHeader; + BasicBlock *innerLoopPreHeader; + PHINode *innerIndexVar; + PHINode *outerIndexVar; + BasicBlock *outerLoopSuccessor; + // Instruction* innerIndexVarInc; + BasicBlock *innerLoopLatchPred; + BasicBlock *innerLoopHeaderSucc; + std::vector> interchangedLoops; + /// Scev analysis. + ScalarEvolution *SE; + LoopInfo *LI; + DominatorTree *DT; +}; + +// Main LoopInterchange Pass +struct LoopInterchange : public FunctionPass { + static char ID; + ScalarEvolution *SE; + LoopInfo *LI; + DependenceAnalysis *DA; + DominatorTree *DT; + LoopInterchange() + : FunctionPass(ID), SE(nullptr), LI(nullptr), DA(nullptr), DT(nullptr) { + initializeLoopInterchangePass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequiredID(LCSSAID); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + } + + bool runOnFunction(Function &F) override { + SE = &getAnalysis(); + LI = &getAnalysis().getLoopInfo(); + DA = &getAnalysis(); + auto *DTWP = getAnalysisIfAvailable(); + DT = DTWP ? &DTWP->getDomTree() : nullptr; + // Build up a worklist of loop pairs to analyze. + // [TODO] Currently only supports loop with level 2. + // Handle for loops greater than level 2. + SmallVector Worklist; + + for (Loop *L : *LI) + populateWorklist(*L, Worklist); + + DEBUG(dbgs() << "Worlist size = " << Worklist.size() << "\n"); + + bool Changed = false; + while (!Worklist.empty()) + Changed |= processLoop(Worklist.pop_back_val()); + + return Changed; + } + + bool processLoop(LoopPair P) { + + // Check if it is legal to interchange loop + LoopInterchangeLegality LIL(P.first, P.second, SE, DA); + if (!LIL.canInterchangeLoops()) { + DEBUG(dbgs() << "Not interchanging Loops. Cannot prove legality\n"); + return false; + } + + DEBUG(dbgs() << "Loops are legal to interchange\n"); + + LoopInterchangeProfitability LIP(P.first, P.second, SE); + if (!LIP.isProfitable()) { + DEBUG(dbgs() << "Interchanging Loops not profitable\n"); + return false; + } + + LoopInterchangeTransform LIT(P.first, P.second, SE, LI, DT); + LIT.transform(); + DEBUG(dbgs() << "Loops interchanged\n"); + return true; + } +}; + +} // end of namespace + +bool LoopInterchangeLegality::checkDependence(Loop *Outer, + DependenceAnalysis *DA) { + + typedef SmallVector ValueVector; + // Holds Load and Store *instructions*. + ValueVector MemInstr; + // For each block. + for (Loop::block_iterator bb = Outer->block_begin(), be = Outer->block_end(); + bb != be; ++bb) { + // Scan the BB and collect legal loads and stores. + for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e; + ++it) { + Instruction *I = dyn_cast(it); + if (!I) + return false; + LoadInst *Ld = dyn_cast(it); + StoreInst *St = dyn_cast(it); + if (!St && !Ld) + continue; + if (Ld && !Ld->isSimple()) + return false; + if (St && !St->isSimple()) + return false; + MemInstr.push_back(I); + } + } + + DEBUG(dbgs() << "Found " << MemInstr.size() + << " Loads and stores to analyze\n"); + + ValueVector::iterator I, IE, J, JE; + for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) { + for (J = I, JE = MemInstr.end(); J != JE; ++J) { + Instruction *Src = dyn_cast(*I); + Instruction *Des = dyn_cast(*J); + if (Src == Des) + continue; + if (auto D = DA->depends(Src, Des, true)) { + // TODO: Fix his handle only anti/output dep for now. + if (D->isFlow()) { + // TODO: Flow dependency can be interchanged?? + DEBUG(dbgs() << "Flow dependece not handled"); + return false; + } + if (D->isAnti()) { + DEBUG(dbgs() << "Found Anti dependece \n"); + unsigned Levels = D->getLevels(); + for (unsigned II = 1; II <= Levels; ++II) { + const SCEV *Distance = D->getDistance(II); + const SCEVConstant *SCEVConst = + dyn_cast_or_null(Distance); + if (SCEVConst) { + const ConstantInt *CI = SCEVConst->getValue(); + if (!CI || (!CI->isNegative() && !CI->isZeroValue())) + return false; + } else { + unsigned Direction = D->getDirection(II); + if (Direction == Dependence::DVEntry::LT || + Direction == Dependence::DVEntry::LE || + Direction == Dependence::DVEntry::EQ) + continue; + return false; + } + } + } + } + } + } + + return true; +} + +bool LoopInterchangeLegality::currentLimitations() { + + BasicBlock *innerLoopHeader = InnerLoop->getHeader(); + BasicBlock *outerLoopLatch = OuterLoop->getLoopLatch(); + BasicBlock *innerLoopLatch = InnerLoop->getLoopLatch(); + BasicBlock *outerLoopHeader = OuterLoop->getHeader(); + BasicBlock *ExitBlock; + unsigned userCount = 0; + int PhiCount = 0; + PHINode *PHI; + + for (auto I = innerLoopHeader->begin(), E = innerLoopHeader->end(); I != E; + ++I) { + if (isa(I)) { + PHI = dyn_cast(I); + PhiCount++; + } + if (PhiCount > 1) + return true; + } + // TODO: Current limitation: Since we split the inner loop latch at the point + // were + // induction variable is incremented (induction.next); We cannot have more + // than 1 user of induction.next since it would result in broken code after + // split. + // e.g. + // for(i=0;i(PHI->getIncomingValue(1)); + for (auto UI = innerIndexVarInc->user_begin(), + UE = innerIndexVarInc->user_end(); + UI != UE; ++UI) { + Instruction *II = dyn_cast(*UI); + BasicBlock *BB = II->getParent(); + if (BB == innerLoopLatch) + userCount++; + if (userCount > 1) + return true; + } + + // TODO: Current limitation: LCSSA PHI nodes not handled yet in transform + // return failure. + BranchInst *BI = dyn_cast(outerLoopLatch->getTerminator()); + if (!BI) + return true; + + if (BI->getSuccessor(0) == outerLoopHeader) + ExitBlock = BI->getSuccessor(1); + else + ExitBlock = BI->getSuccessor(0); + + // We have an lcssa phi node return as current limitation + if (isa(ExitBlock->begin())) + return true; + + return false; +} + +bool LoopInterchangeLegality::canInterchangeLoops() { + + // We must have a loop in canonical form. Loops with indirectbr in them cannot + // be canonicalized. + if (!OuterLoop->getLoopPreheader() || !InnerLoop->getLoopPreheader()) { + DEBUG(dbgs() << "loop control flow is not understood"); + return false; + } + + // ScalarEvolution needs to be able to find the exit count. + const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(OuterLoop); + const SCEV *ExitCountInner = SE->getBackedgeTakenCount(InnerLoop); + if (ExitCountOuter == SE->getCouldNotCompute() || + ExitCountInner == SE->getCouldNotCompute()) { + DEBUG(dbgs() << "Could not determine number of loop iterations\n"); + return false; + } + + // We must have a single backedge. + if (OuterLoop->getNumBackEdges() != 1 || InnerLoop->getNumBackEdges() != 1) { + DEBUG(dbgs() << "loop control flow is not understood by vectorizer"); + return false; + } + + // We must have a single exiting block. + if (!OuterLoop->getExitingBlock() || !InnerLoop->getExitingBlock()) { + DEBUG(dbgs() << "loop control flow is not understood by vectorizer"); + return false; + } + + // TODO: The loops could not be interchanged due to current limitations in the + // transform module. + if (currentLimitations()) { + DEBUG(dbgs() << "Not legal because of current transform limitation\n"); + return false; + } + + // TODO: Do Additional checks to see if the Loop is valid before jumping into + // checkDependence. + + return checkDependence(OuterLoop, DA); +} + +int LoopInterchangeProfitability::getInstrOrderCost(PHINode *IV) { + unsigned goodOrder, badOrder; + badOrder = goodOrder = 0; + for (auto IB = IV->user_begin(), IE = IV->user_end(); IB != IE; ++IB) { + Instruction *UseInstr = cast(*IB); + if (isa(UseInstr)) { + GetElementPtrInst *GEP = dyn_cast(UseInstr); + if (GEP->getOperand(GEP->getNumOperands() - 1) == IV) + goodOrder += 1; + else + badOrder += 1; + } + } + return goodOrder - badOrder; +} + +bool LoopInterchangeProfitability::isProfitable() { + // TODO: Add Better Profitibility Checks + // e.g + // 1) The outer loop is small and inner loop is large dont interchange. + // 2) If reordering results in inner loop having stride of 1 etc. + + int Cost = 0; + for (BasicBlock::iterator I = InnerLoop->getHeader()->begin(); + isa(I); ++I) { + Cost += getInstrOrderCost(cast(I)); + } + DEBUG(dbgs() << "Cost = " << Cost << "\n"); + if (Cost < 0) + return true; + return false; +} + +bool LoopInterchangeTransform::transform() { + DEBUG(dbgs() << "transform\n"); + bool transformed = false; + + for (BasicBlock::iterator I = innerLoop->getHeader()->begin(); + isa(I); ++I) { + innerIndexVar = dyn_cast(I); + break; + } + + Instruction *innerIndexVarInc = nullptr; + innerIndexVarInc = dyn_cast(innerIndexVar->getIncomingValue(1)); + if (!innerIndexVarInc) + return false; + + // Split at the place were the induction variable is incremented/decremented. + // TODO: This splitting logic may not work always. Fix this. + splitInnerLoopLatch(DT, LI, innerIndexVarInc); + DEBUG(dbgs() << "splitInnerLoopLatch Done\n"); + splitInnerLoopHeader(DT, LI); + DEBUG(dbgs() << "splitInnerLoopHeader Done\n"); + + transformed |= adjustLoopLinks(DT); + if (!transformed) + DEBUG(dbgs() << "adjustLoopLinks Failed\n"); + return true; +} + +void LoopInterchangeTransform::initialize() { + innerLoopHeader = innerLoop->getHeader(); + outerLoopHeader = outerLoop->getHeader(); + innerLoopLatch = innerLoop->getLoopLatch(); + outerLoopLatch = outerLoop->getLoopLatch(); + outerLoopPreHeader = outerLoop->getLoopPreheader(); + innerLoopPreHeader = innerLoop->getLoopPreheader(); + innerIndexVar = innerLoop->getCanonicalInductionVariable(); + outerIndexVar = outerLoop->getCanonicalInductionVariable(); +} + +void LoopInterchangeTransform::splitInnerLoopLatch(DominatorTree *DT, + LoopInfo *LI, + Instruction *inc) { + BasicBlock::iterator I = innerLoopLatch->begin(); + BasicBlock::iterator E = innerLoopLatch->end(); + for (; I != E; ++I) { + if (inc == I) + break; + } + + // Split the inner loop latch out. + innerLoopLatchPred = innerLoopLatch; + innerLoopLatch = innerLoopLatch->splitBasicBlock(I); + innerLoop->addBasicBlockToLoop(innerLoopLatch, *LI); +} + +void LoopInterchangeTransform::splitInnerLoopHeader(DominatorTree *DT, + LoopInfo *LI) { + + // Split the inner loop header out. + innerLoopHeaderSucc = + innerLoopHeader->splitBasicBlock(innerLoopHeader->getFirstNonPHI()); + + DEBUG(dbgs() << "Output of splitInnerLoopHeader innerLoopHeaderSucc & " + "innerLoopHeader \n"); +} + +bool LoopInterchangeTransform::adjustOuterLoopPreheader() { + // Adjust the outerLoop preheader to jump to inner loop preheader. + BranchInst *outerLoopHeaderBI = + dyn_cast(outerLoopHeader->getTerminator()); + BranchInst *outerLoopPreHeaderBI = + dyn_cast(outerLoopPreHeader->getTerminator()); + BasicBlock *TrueBlock; + BasicBlock *FalseBlock; + if (outerLoopHeaderBI->isUnconditional()) { + BranchInst::Create(innerLoopPreHeader, outerLoopPreHeaderBI); + } else { + if (outerLoopHeaderBI->getSuccessor(0) == innerLoopPreHeader) { + TrueBlock = innerLoopPreHeader; + BranchInst *ExitInst = + dyn_cast(outerLoopLatch->getTerminator()); + if (!ExitInst) + return false; + if (ExitInst->isUnconditional()) { + FalseBlock = dyn_cast(ExitInst->getSuccessor(0)); + } else { + if (ExitInst->getSuccessor(0) == outerLoopHeader) { + FalseBlock = dyn_cast(ExitInst->getSuccessor(1)); + } else if (ExitInst->getSuccessor(1) == outerLoopHeader) { + FalseBlock = dyn_cast(ExitInst->getSuccessor(0)); + } else { + return false; + } + } + } else if (outerLoopHeaderBI->getSuccessor(1) == innerLoopPreHeader) { + FalseBlock = innerLoopPreHeader; + BranchInst *ExitInst = + dyn_cast(outerLoopLatch->getTerminator()); + if (!ExitInst) + return false; + if (ExitInst->isUnconditional()) { + BasicBlock *B = dyn_cast(ExitInst->getSuccessor(0)); + outerLoopHeaderBI->setSuccessor(0, B); + } else { + if (ExitInst->getSuccessor(0) == outerLoopHeader) { + TrueBlock = dyn_cast(ExitInst->getSuccessor(1)); + } else if (ExitInst->getSuccessor(1) == outerLoopHeader) { + TrueBlock = dyn_cast(ExitInst->getSuccessor(0)); + } else { + return false; + } + } + } + BranchInst::Create(TrueBlock, FalseBlock, outerLoopHeaderBI->getCondition(), + outerLoopPreHeaderBI); + } + outerLoopPreHeaderBI->eraseFromParent(); + return true; +} + +bool LoopInterchangeTransform::adjustInnerLoopPreheader() { + + SmallVector LoopPhis; + SmallVector Instr; + BranchInst *innerPreheaderBI = + dyn_cast(innerLoopPreHeader->getTerminator()); + BranchInst *innerHeaderBI = + dyn_cast(innerLoopHeader->getTerminator()); + BranchInst *outerHeaderBI = + dyn_cast(outerLoopHeader->getTerminator()); + BranchInst *innerLoopLatchPredBI = + dyn_cast(innerLoopLatchPred->getTerminator()); + BranchInst *outerLoopLatchBI = + dyn_cast(outerLoopLatch->getTerminator()); + BranchInst *innerLoopLatchBI = + dyn_cast(innerLoopLatch->getTerminator()); + BranchInst *innerLoopHeaderSuccBI = + dyn_cast(innerLoopHeaderSucc->getTerminator()); + + BasicBlock *LoopNestExit; + if (!innerPreheaderBI || !innerHeaderBI || !innerPreheaderBI || + !innerLoopLatchPredBI || !outerLoopLatchBI || !innerLoopLatchBI || + !innerLoopHeaderSuccBI) + return false; + BranchInst::Create(innerLoopHeader, innerPreheaderBI); + innerPreheaderBI->eraseFromParent(); + + BranchInst::Create(outerLoopHeader, innerHeaderBI); + innerHeaderBI->eraseFromParent(); + + // Update outerLoopHeader PHI nodes + // Collect all Phi nodes and instructions in the outer loop header. + for (auto I = outerLoopHeader->begin(), E = outerLoopHeader->end(); I != E; + ++I) { + LoopPhis.push_back(cast(I)); + } + + // Adjust Phi nodes for the outer loop header which now becomes inner loop + // header + while (!LoopPhis.empty()) { + PHINode *CurrIV = LoopPhis.pop_back_val(); + unsigned numIncomingBlocks = CurrIV->getNumOperands(); + for (unsigned i = 0; i < numIncomingBlocks; ++i) { + if (CurrIV->getIncomingBlock(i) == outerLoopPreHeader) { + CurrIV->setIncomingBlock(i, innerLoopHeader); + } + } + } + + // TODO: Check correctness of this. What if preheaders data is used in phi + // nodes in inner header? + for (auto I = innerLoopPreHeader->begin(), E = innerLoopPreHeader->end(); + I != E; ++I) { + Instruction *Ins = I; + if (isa(Ins)) + break; + Instr.push_back(I); + } + + for (auto I = Instr.begin(), E = Instr.end(); I != E; ++I) { + Instruction *Ins = *I; + Ins->moveBefore(outerLoopHeader->getTerminator()); + } + + if (outerHeaderBI->getSuccessor(0) == innerLoopPreHeader) { + outerHeaderBI->setSuccessor(0, innerLoopHeaderSucc); + if (!outerHeaderBI->isUnconditional()) + outerHeaderBI->setSuccessor(1, innerLoopLatch); + } else { + if (outerHeaderBI->getSuccessor(1) == innerLoopPreHeader) { + outerHeaderBI->setSuccessor(1, innerLoopHeaderSucc); + outerHeaderBI->setSuccessor(0, innerLoopLatch); + } else { + assert(0); + } + } + + if (innerLoopHeaderSuccBI->getSuccessor(0) == innerLoopLatch) { + innerLoopHeaderSuccBI->setSuccessor(0, outerLoopLatch); + } else if (!innerLoopHeaderSuccBI->isUnconditional() && + innerLoopHeaderSuccBI->getSuccessor(1) == innerLoopLatch) { + innerLoopHeaderSuccBI->setSuccessor(1, outerLoopLatch); + } + + // TODO: Update phi nodes if any in innerLoopLatch + + if (innerLoopLatchPredBI->getSuccessor(0) == innerLoopLatch) { + innerLoopLatchPredBI->setSuccessor(0, outerLoopLatch); + } else if (innerLoopLatchPredBI->getSuccessor(1) == innerLoopLatch) { + innerLoopLatchPredBI->setSuccessor(1, outerLoopLatch); + } else { + assert(0); + } + + // TODO: Update phi nodes if any in outerLoopLatch + + // Connect outerLoopLatch to inner loop latch. + if (outerLoopLatchBI->getSuccessor(0) == outerLoopHeader) { + LoopNestExit = outerLoopLatchBI->getSuccessor(1); + outerLoopLatchBI->setSuccessor(1, innerLoopLatch); + } else if (outerLoopLatchBI->getSuccessor(1) == outerLoopHeader) { + LoopNestExit = outerLoopLatchBI->getSuccessor(0); + outerLoopLatchBI->setSuccessor(0, innerLoopLatch); + } else { + assert(0); + } + + // TODO: Handle LCSSA Phi nodes + + if (innerLoopLatchBI->getSuccessor(0) == innerLoopHeader) { + innerLoopLatchBI->setSuccessor(1, LoopNestExit); + } else if (innerLoopLatchBI->getSuccessor(1) == innerLoopHeader) { + innerLoopLatchBI->setSuccessor(0, LoopNestExit); + } else { + assert(0); + } + + return true; +} + +bool LoopInterchangeTransform::adjustLoopLinks(DominatorTree *DT) { + + DEBUG(dbgs() << "adjustLoopLinks called\n"); + // Adjust the outerLoop preheader to jump to inner loop preheader. + adjustOuterLoopPreheader(); + adjustInnerLoopPreheader(); + + return true; +} + +char LoopInterchange::ID = 0; +INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange", + "Interchanges loops for cache reuse", false, false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(DependenceAnalysis) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) + +INITIALIZE_PASS_END(LoopInterchange, "loop-interchange", + "Interchanges loops for cache reuse", false, false) + +Pass *llvm::createLoopInterchangePass() { return new LoopInterchange(); } Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -46,6 +46,7 @@ initializeLICMPass(Registry); initializeLoopDeletionPass(Registry); initializeLoopInstSimplifyPass(Registry); + initializeLoopInterchangePass(Registry); initializeLoopRotatePass(Registry); initializeLoopStrengthReducePass(Registry); initializeLoopRerollPass(Registry);