This is an archive of the discontinued LLVM Phabricator instance.

[LoopInterchange] Turn into a loop pass.
ClosedPublic

Authored by fhahn on Sep 5 2018, 12:10 PM.

Download Raw Diff

Details

Reviewers

chandlerc
efriedma
mcrosier
javed.absar
xbolva00

Commits

rG0694c159f74d: [LoopInterchange] Turn into a loop pass.
rL343308: [LoopInterchange] Turn into a loop pass.

Summary

This patch turns LoopInterchange into a loop pass. It now only
considers top-level loops and tries to move the innermost loop to the
optimal position within the loop nest. By only looking at top-level
loops, we might miss a few opportunities the function pass would get
(e.g. if we have a loop nest of 3 loops, in the function pass
we might process loops at level 1 and 2 and move the inner most loop to
level 1, and then we process loops at levels 0, 1, 2 and interchange
again, because we now have a different inner loop). But I think it would
be better to handle such cases by picking the best inner loop from the
start and avoid re-visiting the same loops again.

The biggest advantage of it being a function pass is that it interacts
nicely with the other loop passes. Without this patch, there are some
performance regressions on AArch64 with loop interchanging enabled,
where no loops were interchanged, but we missed out on some other loop
optimizations.

It also removes the SimplifyCFG run. We are just changing branches, so
the CFG should not be more complicated, besides the additional 'unique'
preheaders this pass might create.

Diff Detail

Repository: rL LLVM

Event Timeline

fhahn created this revision.Sep 5 2018, 12:10 PM

Herald added a reviewer: javed.absar. · View Herald TranscriptSep 5 2018, 12:10 PM

Herald added subscribers: kristof.beyls, mehdi_amini. · View Herald Transcript

fhahn added inline comments.Sep 5 2018, 12:10 PM

lib/Transforms/Scalar/LoopInterchange.cpp
434 ↗	(On Diff #164097)	Not needed, I'll drop it.

Rebased after rL341537

Thanks, looks great as LoopPass.

This revision is now accepted and ready to land.Sep 11 2018, 6:08 AM

fhahn added a parent revision: D52026: [LoopInterchange] Preserve ScalarEvolution, by forgetting about interchanged loops..Sep 13 2018, 5:25 AM

Further testing surfaced a few cases where ScalarEvolution and LCSSA are not preserved properly. I'll fix those issues first.

fhahn added a parent revision: D52154: [LoopInterchange] Preserve LCSSA..Sep 16 2018, 1:19 PM

Rebased after the required fixes went in. Unless there are any additional concerns, I plan to go ahead and commit this tomorrow.

Closed by commit rL343308: [LoopInterchange] Turn into a loop pass. (authored by fhahn). · Explain WhySep 28 2018, 2:48 AM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

llvm/

trunk/

lib/

Transforms/

IPO/

PassManagerBuilder.cpp

6 lines

Scalar/

LoopInterchange.cpp

57 lines

Diff 167447

llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp

Show First 20 Lines • Show All 369 Lines • ▼ Show 20 Lines	void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createCFGSimplificationPass());		MPM.add(createCFGSimplificationPass());
addInstructionCombiningPass(MPM);		addInstructionCombiningPass(MPM);
// We resume loop passes creating a second loop pipeline here.		// We resume loop passes creating a second loop pipeline here.
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars		MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.		MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
addExtensionsToPM(EP_LateLoopOptimizations, MPM);		addExtensionsToPM(EP_LateLoopOptimizations, MPM);
MPM.add(createLoopDeletionPass()); // Delete dead loops		MPM.add(createLoopDeletionPass()); // Delete dead loops

if (EnableLoopInterchange) {		if (EnableLoopInterchange)
// FIXME: These are function passes and break the loop pass pipeline.
MPM.add(createLoopInterchangePass()); // Interchange loops		MPM.add(createLoopInterchangePass()); // Interchange loops
MPM.add(createCFGSimplificationPass());
}
if (!DisableUnrollLoops)		if (!DisableUnrollLoops)
MPM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops		MPM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);		addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
// This ends the loop pass pipelines.		// This ends the loop pass pipelines.

if (OptLevel > 1) {		if (OptLevel > 1) {
MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds		MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
MPM.add(NewGVN ? createNewGVNPass()		MPM.add(NewGVN ? createNewGVNPass()
▲ Show 20 Lines • Show All 680 Lines • Show Last 20 Lines

llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp

Show All 11 Lines
// patterns.		// patterns.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "llvm/ADT/STLExtras.h"		#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"		#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"		#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"		#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/DependenceAnalysis.h"		#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"		#include "llvm/Analysis/LoopInfo.h"
		#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"		#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"		#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"		#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/BasicBlock.h"		#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"		#include "llvm/IR/Constants.h"
#include "llvm/IR/DiagnosticInfo.h"		#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"		#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"		#include "llvm/IR/Function.h"
▲ Show 20 Lines • Show All 235 Lines • ▼ Show 20 Lines	for (unsigned Row = 0; Row < NumRows; ++Row) {
if (InnerDep == '' \|\| OuterDep == '')		if (InnerDep == '' \|\| OuterDep == '')
return false;		return false;
if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep))		if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep))
return false;		return false;
}		}
return true;		return true;
}		}

static void populateWorklist(Loop &L, SmallVector<LoopVector, 8> &V) {		static LoopVector populateWorklist(Loop &L) {
LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "		LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
<< L.getHeader()->getParent()->getName() << " Loop: %"		<< L.getHeader()->getParent()->getName() << " Loop: %"
<< L.getHeader()->getName() << '\n');		<< L.getHeader()->getName() << '\n');
LoopVector LoopList;		LoopVector LoopList;
Loop *CurrentLoop = &L;		Loop *CurrentLoop = &L;
const std::vector<Loop > Vec = &CurrentLoop->getSubLoops();		const std::vector<Loop > Vec = &CurrentLoop->getSubLoops();
while (!Vec->empty()) {		while (!Vec->empty()) {
// The current loop has multiple subloops in it hence it is not tightly		// The current loop has multiple subloops in it hence it is not tightly
// nested.		// nested.
// Discard all loops above it added into Worklist.		// Discard all loops above it added into Worklist.
if (Vec->size() != 1) {		if (Vec->size() != 1)
LoopList.clear();		return {};
return;
}
LoopList.push_back(CurrentLoop);		LoopList.push_back(CurrentLoop);
CurrentLoop = Vec->front();		CurrentLoop = Vec->front();
Vec = &CurrentLoop->getSubLoops();		Vec = &CurrentLoop->getSubLoops();
}		}
LoopList.push_back(CurrentLoop);		LoopList.push_back(CurrentLoop);
V.push_back(std::move(LoopList));		return LoopList;
}		}

static PHINode getInductionVariable(Loop L, ScalarEvolution *SE) {		static PHINode getInductionVariable(Loop L, ScalarEvolution *SE) {
PHINode *InnerIndexVar = L->getCanonicalInductionVariable();		PHINode *InnerIndexVar = L->getCanonicalInductionVariable();
if (InnerIndexVar)		if (InnerIndexVar)
return InnerIndexVar;		return InnerIndexVar;
if (L->getLoopLatch() == nullptr \|\| L->getLoopPredecessor() == nullptr)		if (L->getLoopLatch() == nullptr \|\| L->getLoopPredecessor() == nullptr)
return nullptr;		return nullptr;
▲ Show 20 Lines • Show All 117 Lines • ▼ Show 20 Lines	private:

LoopInfo *LI;		LoopInfo *LI;
DominatorTree *DT;		DominatorTree *DT;
BasicBlock *LoopExit;		BasicBlock *LoopExit;
bool InnerLoopHasReduction;		bool InnerLoopHasReduction;
};		};

// Main LoopInterchange Pass.		// Main LoopInterchange Pass.
struct LoopInterchange : public FunctionPass {		struct LoopInterchange : public LoopPass {
static char ID;		static char ID;
ScalarEvolution *SE = nullptr;		ScalarEvolution *SE = nullptr;
LoopInfo *LI = nullptr;		LoopInfo *LI = nullptr;
DependenceInfo *DI = nullptr;		DependenceInfo *DI = nullptr;
DominatorTree *DT = nullptr;		DominatorTree *DT = nullptr;
bool PreserveLCSSA;		bool PreserveLCSSA;

/// Interface to emit optimization remarks.		/// Interface to emit optimization remarks.
OptimizationRemarkEmitter *ORE;		OptimizationRemarkEmitter *ORE;

LoopInterchange() : FunctionPass(ID) {		LoopInterchange() : LoopPass(ID) {
initializeLoopInterchangePass(*PassRegistry::getPassRegistry());		initializeLoopInterchangePass(*PassRegistry::getPassRegistry());
}		}

void getAnalysisUsage(AnalysisUsage &AU) const override {		void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DependenceAnalysisWrapperPass>();		AU.addRequired<DependenceAnalysisWrapperPass>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();		AU.addRequired<OptimizationRemarkEmitterWrapperPass>();

AU.addPreserved<DominatorTreeWrapperPass>();		getLoopAnalysisUsage(AU);
AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addPreservedID(LCSSAID);
}		}

bool runOnFunction(Function &F) override {		bool runOnLoop(Loop *L, LPPassManager &LPM) override {
if (skipFunction(F))		if (skipLoop(L) \|\| L->getParentLoop())
return false;

SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();		SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();		LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();		DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();		DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();		ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);

// Build up a worklist of loop pairs to analyze.		return processLoopList(populateWorklist(*L));
SmallVector<LoopVector, 8> Worklist;

for (Loop L : LI)
populateWorklist(*L, Worklist);

LLVM_DEBUG(dbgs() << "Worklist size = " << Worklist.size() << "\n");
bool Changed = true;
while (!Worklist.empty()) {
LoopVector LoopList = Worklist.pop_back_val();
Changed = processLoopList(LoopList, F);
}
return Changed;
}		}

bool isComputableLoopNest(LoopVector LoopList) {		bool isComputableLoopNest(LoopVector LoopList) {
for (Loop *L : LoopList) {		for (Loop *L : LoopList) {
const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);		const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
if (ExitCountOuter == SE->getCouldNotCompute()) {		if (ExitCountOuter == SE->getCouldNotCompute()) {
LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");		LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
return false;		return false;
Show All 11 Lines	struct LoopInterchange : public LoopPass {
}		}

unsigned selectLoopForInterchange(const LoopVector &LoopList) {		unsigned selectLoopForInterchange(const LoopVector &LoopList) {
// TODO: Add a better heuristic to select the loop to be interchanged based		// TODO: Add a better heuristic to select the loop to be interchanged based
// on the dependence matrix. Currently we select the innermost loop.		// on the dependence matrix. Currently we select the innermost loop.
return LoopList.size() - 1;		return LoopList.size() - 1;
}		}

bool processLoopList(LoopVector LoopList, Function &F) {		bool processLoopList(LoopVector LoopList) {
bool Changed = false;		bool Changed = false;
unsigned LoopNestDepth = LoopList.size();		unsigned LoopNestDepth = LoopList.size();
if (LoopNestDepth < 2) {		if (LoopNestDepth < 2) {
LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");		LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
return false;		return false;
}		}
if (LoopNestDepth > MaxLoopNestDepth) {		if (LoopNestDepth > MaxLoopNestDepth) {
LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "		LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "
▲ Show 20 Lines • Show All 1,018 Lines • ▼ Show 20 Lines	if (Changed)
adjustLoopPreheaders();		adjustLoopPreheaders();
return Changed;		return Changed;
}		}

char LoopInterchange::ID = 0;		char LoopInterchange::ID = 0;

INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange",		INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange",
"Interchanges loops for cache reuse", false, false)		"Interchanges loops for cache reuse", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)		INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)		INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)		INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)

INITIALIZE_PASS_END(LoopInterchange, "loop-interchange",		INITIALIZE_PASS_END(LoopInterchange, "loop-interchange",
"Interchanges loops for cache reuse", false, false)		"Interchanges loops for cache reuse", false, false)

Pass *llvm::createLoopInterchangePass() { return new LoopInterchange(); }		Pass *llvm::createLoopInterchangePass() { return new LoopInterchange(); }