Index: include/llvm-c/Transforms/IPO.h =================================================================== --- include/llvm-c/Transforms/IPO.h +++ include/llvm-c/Transforms/IPO.h @@ -49,7 +49,6 @@ /** See llvm::createGlobalDCEPass function. */ void LLVMAddGlobalDCEPass(LLVMPassManagerRef PM); -void LLVMAddProactiveFusionPass(LLVMPassManagerRef PM); /** See llvm::createGlobalOptimizerPass function. */ void LLVMAddGlobalOptimizerPass(LLVMPassManagerRef PM); Index: include/llvm-c/Transforms/Scalar.h =================================================================== --- include/llvm-c/Transforms/Scalar.h +++ include/llvm-c/Transforms/Scalar.h @@ -34,7 +34,6 @@ /** See llvm::createAggressiveDCEPass function. */ void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM); -void LLVMAddProactiveFusionPass(LLVMPassManagerRef PM); /** See llvm::createAlignmentFromAssumptionsPass function. */ void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM); Index: include/llvm/Analysis/DependenceAnalysis.h =================================================================== --- include/llvm/Analysis/DependenceAnalysis.h +++ include/llvm/Analysis/DependenceAnalysis.h @@ -925,9 +925,6 @@ initializeDependenceAnalysisPass(*PassRegistry::getPassRegistry()); } - void initDependenceAnalysis( - Function &F, AliasAnalysis *AA, ScalarEvolution *SE, - LoopInfo *LI); bool runOnFunction(Function &F) override; void releaseMemory() override; void getAnalysisUsage(AnalysisUsage &) const override; Index: include/llvm/Analysis/LoopInfo.h =================================================================== --- include/llvm/Analysis/LoopInfo.h +++ include/llvm/Analysis/LoopInfo.h @@ -680,7 +680,6 @@ /// runOnFunction - Calculate the natural loop information. /// bool runOnFunction(Function &F) override; - void createRaw(DominatorTree &DT); void verifyAnalysis() const override; Index: include/llvm/Analysis/Passes.h =================================================================== --- include/llvm/Analysis/Passes.h +++ include/llvm/Analysis/Passes.h @@ -29,7 +29,7 @@ // createGlobalsModRefPass - This pass provides alias and mod/ref info for // global values that do not have their addresses taken. // - Pass *createGlobalsModRefPass(bool); + Pass *createGlobalsModRefPass(); //===--------------------------------------------------------------------===// // Index: include/llvm/Analysis/ScalarEvolution.h =================================================================== --- include/llvm/Analysis/ScalarEvolution.h +++ include/llvm/Analysis/ScalarEvolution.h @@ -229,13 +229,7 @@ /// LI - The loop information for the function we are currently analyzing. /// - public: LoopInfo *LI; - /// DT - The dominator tree. - /// - DominatorTree *DT; - - private: /// The DataLayout information for the target we are targeting. /// @@ -245,6 +239,10 @@ /// TargetLibraryInfo *TLI; + /// DT - The dominator tree. + /// + DominatorTree *DT; + /// CouldNotCompute - This SCEV is used to represent unknown trip /// counts and things. SCEVCouldNotCompute CouldNotCompute; @@ -917,12 +915,6 @@ const SCEV *ElementSize) const; bool runOnFunction(Function &F) override; - void initScalarEvolution(Function &F, - LoopInfo *L, - DataLayoutPass *D, - TargetLibraryInfo *TL, - DominatorTree* Dom); - void releaseMemory() override; void getAnalysisUsage(AnalysisUsage &AU) const override; void print(raw_ostream &OS, const Module* = nullptr) const override; Index: include/llvm/IR/LLVMContext.h =================================================================== --- include/llvm/IR/LLVMContext.h +++ include/llvm/IR/LLVMContext.h @@ -58,8 +58,7 @@ MD_noalias = 8, // "noalias", MD_nontemporal = 9, // "nontemporal" MD_mem_parallel_loop_access = 10, // "llvm.mem.parallel_loop_access" - MD_nonnull = 11, // "nonnull" - MD_nonaddr_taken_global = 12 + MD_nonnull = 11 // "nonnull" }; /// getMDKindID - Return a unique non-zero ID for the specified metadata kind. Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -227,8 +227,6 @@ void initializePrintFunctionPassWrapperPass(PassRegistry&); void initializePrintModulePassWrapperPass(PassRegistry&); void initializePrintBasicBlockPassPass(PassRegistry&); -void initializeProactiveFusionIPOPass(PassRegistry&); -void initializeProactiveFusionPass(PassRegistry&); void initializeProcessImplicitDefsPass(PassRegistry&); void initializePromotePassPass(PassRegistry&); void initializePruneEHPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -80,12 +80,10 @@ (void) llvm::createGCOVProfilerPass(); (void) llvm::createInstrProfilingPass(); (void) llvm::createFunctionInliningPass(); - (void) llvm::createProactiveFusionIPOPass(); - (void) llvm::createProactiveFusionPass(); (void) llvm::createAlwaysInlinerPass(); (void) llvm::createGlobalDCEPass(); (void) llvm::createGlobalOptimizerPass(); - (void) llvm::createGlobalsModRefPass(false); + (void) llvm::createGlobalsModRefPass(); (void) llvm::createIPConstantPropagationPass(); (void) llvm::createIPSCCPPass(); (void) llvm::createIndVarSimplifyPass(); Index: include/llvm/Transforms/IPO.h =================================================================== --- include/llvm/Transforms/IPO.h +++ include/llvm/Transforms/IPO.h @@ -90,7 +90,6 @@ Pass *createFunctionInliningPass(); Pass *createFunctionInliningPass(int Threshold); Pass *createFunctionInliningPass(unsigned OptLevel, unsigned SizeOptLevel); -Pass *createProactiveFusionIPOPass(); //===----------------------------------------------------------------------===// /// createAlwaysInlinerPass - Return a new pass object that inlines only Index: include/llvm/Transforms/IPO/PassManagerBuilder.h =================================================================== --- include/llvm/Transforms/IPO/PassManagerBuilder.h +++ include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -119,9 +119,6 @@ bool LoopVectorize; bool RerollLoops; bool LoadCombine; - bool ProactiveLoopFusion; - bool ProactiveLoopFusionAnalysis; - bool ProactiveLoopOverrider; bool DisableGVNLoadPRE; bool VerifyInput; bool VerifyOutput; Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -404,7 +404,6 @@ // LoadCombine - Combine loads into bigger loads. // BasicBlockPass *createLoadCombinePass(); -FunctionPass *createProactiveFusionPass(); } // End llvm namespace Index: lib/Analysis/DependenceAnalysis.cpp =================================================================== --- lib/Analysis/DependenceAnalysis.cpp +++ lib/Analysis/DependenceAnalysis.cpp @@ -181,16 +181,6 @@ } } -void DependenceAnalysis::initDependenceAnalysis( - Function &F, AliasAnalysis *AA, ScalarEvolution *SE, - LoopInfo *LI) { - this->F = &F; - this->AA = AA; - this->SE = SE; - this->LI = LI; -} - - void DependenceAnalysis::print(raw_ostream &OS, const Module*) const { dumpExampleDependence(OS, F, const_cast(this)); @@ -3371,17 +3361,8 @@ DstIdx = DstGEP->idx_begin(); SrcIdx != SrcEnd; ++SrcIdx, ++DstIdx, ++P) { -// Pair[P].Src = SE->getSCEV(*SrcIdx); -// Pair[P].Dst = SE->getSCEV(*DstIdx); - if (SE->isSCEVable((*SrcIdx)->getType()) - && SE->isSCEVable((*DstIdx)->getType())) { - Pair[P].Src = SE->getSCEV(*SrcIdx); - Pair[P].Dst = SE->getSCEV(*DstIdx); - } - else { - return make_unique(Src, Dst); - } - + Pair[P].Src = SE->getSCEV(*SrcIdx); + Pair[P].Dst = SE->getSCEV(*DstIdx); unifySubscriptType(&Pair[P]); } } Index: lib/Analysis/IPA/GlobalsModRef.cpp =================================================================== --- lib/Analysis/IPA/GlobalsModRef.cpp +++ lib/Analysis/IPA/GlobalsModRef.cpp @@ -26,8 +26,6 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" @@ -43,7 +41,7 @@ STATISTIC(NumReadMemFunctions, "Number of functions that only read memory"); STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects"); -namespace llvm { +namespace { /// FunctionRecord - One instance of this structure is stored for every /// function in the program. Later, the entries for these functions are /// removed if the function is found to call an external function (in which @@ -90,15 +88,10 @@ /// FunctionInfo - For each function, keep track of what globals are /// modified or read. std::map FunctionInfo; - bool addMetadata; public: static char ID; - GlobalsModRef(bool addmeta) : ModulePass(ID), addMetadata(addmeta) { - initializeGlobalsModRefPass(*PassRegistry::getPassRegistry()); - } GlobalsModRef() : ModulePass(ID) { - addMetadata = false; initializeGlobalsModRefPass(*PassRegistry::getPassRegistry()); } @@ -206,7 +199,7 @@ "globalsmodref-aa", "Simple mod/ref analysis for globals", false, true, false) -Pass *llvm::createGlobalsModRefPass(bool addmeta=false) { return new GlobalsModRef(addmeta); } +Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); } /// AnalyzeGlobals - Scan through the users of all of the internal /// GlobalValue's in the program. If none of them have their "address taken" @@ -225,19 +218,11 @@ } for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { + I != E; ++I) if (I->hasLocalLinkage()) { if (!AnalyzeUsesOfPointer(I, Readers, Writers)) { // Remember that we are tracking this global, and the mod/ref fns NonAddressTakenGlobals.insert(I); - if (addMetadata) { - MDString *n = MDString::get(M.getContext(), StringRef("NATG")); - Metadata *Vals[] = {n}; - MDNode *Nod = MDNode::get(M.getContext(), Vals); - for (User *U : (&*I)->users()) - if (Instruction *K = dyn_cast(U)) - K->setMetadata(LLVMContext::MD_nonaddr_taken_global, Nod); - } for (unsigned i = 0, e = Readers.size(); i != e; ++i) FunctionInfo[Readers[i]].GlobalInfo[I] |= Ref; @@ -254,7 +239,6 @@ } Readers.clear(); Writers.clear(); } - } } /// AnalyzeUsesOfPointer - Look at all of the users of the specified pointer. Index: lib/Analysis/LoopInfo.cpp =================================================================== --- lib/Analysis/LoopInfo.cpp +++ lib/Analysis/LoopInfo.cpp @@ -618,10 +618,6 @@ return false; } -void LoopInfo::createRaw(DominatorTree& DT) { - LI.Analyze(DT); -} - /// updateUnloop - The last backedge has been removed from a loop--now the /// "unloop". Find a new parent for the blocks contained within unloop and /// update the loop tree. We don't necessarily have valid dominators at this Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -7864,21 +7864,6 @@ initializeScalarEvolutionPass(*PassRegistry::getPassRegistry()); } -void ScalarEvolution::initScalarEvolution(Function &F, - LoopInfo *L, - DataLayoutPass *D, - TargetLibraryInfo *TL, - DominatorTree* Dom) { - - this->F = &F; - LI = L; - if (D) DL = &D->getDataLayout(); - else DL = nullptr; - TLI = TL; - DT = Dom; -} - - bool ScalarEvolution::runOnFunction(Function &F) { this->F = &F; AC = &getAnalysis().getAssumptionCache(F); Index: lib/IR/LLVMContext.cpp =================================================================== --- lib/IR/LLVMContext.cpp +++ lib/IR/LLVMContext.cpp @@ -93,11 +93,6 @@ unsigned NonNullID = getMDKindID("nonnull"); assert(NonNullID == MD_nonnull && "nonnull kind id drifted"); (void)NonNullID; - - unsigned NATG = getMDKindID("NATG"); - assert(NATG == MD_nonaddr_taken_global && "nonnull kind id drifted"); - (void)NATG; - } LLVMContext::~LLVMContext() { delete pImpl; } Index: lib/Transforms/IPO/CMakeLists.txt =================================================================== --- lib/Transforms/IPO/CMakeLists.txt +++ lib/Transforms/IPO/CMakeLists.txt @@ -17,7 +17,6 @@ MergeFunctions.cpp PartialInlining.cpp PassManagerBuilder.cpp - ProactiveFusionIPO.cpp PruneEH.cpp StripDeadPrototypes.cpp StripSymbols.cpp Index: lib/Transforms/IPO/IPO.cpp =================================================================== --- lib/Transforms/IPO/IPO.cpp +++ lib/Transforms/IPO/IPO.cpp @@ -32,7 +32,6 @@ initializeIPCPPass(Registry); initializeAlwaysInlinerPass(Registry); initializeSimpleInlinerPass(Registry); - initializeProactiveFusionIPOPass(Registry); initializeInternalizePassPass(Registry); initializeLoopExtractorPass(Registry); initializeBlockExtractorPassPass(Registry); @@ -72,10 +71,6 @@ unwrap(PM)->add(createFunctionInliningPass()); } -void LLVMAddProactiveFusionIPOPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createProactiveFusionIPOPass()); -} - void LLVMAddAlwaysInlinerPass(LLVMPassManagerRef PM) { unwrap(PM)->add(llvm::createAlwaysInlinerPass()); } Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -78,20 +78,7 @@ EnableMLSM("mlsm", cl::init(true), cl::Hidden, cl::desc("Enable motion of merged load and store")); -static cl::opt -EnableProactiveLoopFusionAnalysis("proactive-loop-fusion-analysis", - cl::init(false), cl::Hidden, - cl::desc("Run proactive loop fusion analysis")); -static cl::opt -EnableProactiveLoopFusion("proactive-loop-fusion", - cl::init(false), cl::Hidden, - cl::desc("Run proactive loop fusion")); - - PassManagerBuilder::PassManagerBuilder() { - ProactiveLoopOverrider = false; - ProactiveLoopFusionAnalysis = EnableProactiveLoopFusionAnalysis; - ProactiveLoopFusion = EnableProactiveLoopFusion; OptLevel = 2; SizeLevel = 0; LibraryInfo = nullptr; @@ -224,11 +211,6 @@ if (OptLevel > 2) MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args - if (ProactiveLoopFusionAnalysis) { - MPM.add(createGlobalsModRefPass(true)); // IP alias analysis. - MPM.add(createProactiveFusionIPOPass()); - } - // Start of function pass. // Break up aggregate allocas, using SSAUpdater. if (UseNewSROA) @@ -277,36 +259,26 @@ addExtensionsToPM(EP_ScalarOptimizerLate, MPM); - if (ProactiveLoopFusion){ - MPM.add(createCFGSimplificationPass()); - if (EnableMLSM) - MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds - MPM.add(createGVNPass(false)); // Remove redundancies - MPM.add(createProactiveFusionPass()); - } - if (RerollLoops) MPM.add(createLoopRerollPass()); - if (!ProactiveLoopFusionAnalysis || ProactiveLoopOverrider) { - if (!RunSLPAfterLoopVectorization) { - if (SLPVectorize) - MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. - - if (BBVectorize) { - MPM.add(createBBVectorizePass()); - MPM.add(createInstructionCombiningPass()); - addExtensionsToPM(EP_Peephole, MPM); - if (OptLevel > 1 && UseGVNAfterVectorization) - MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies - else - MPM.add(createEarlyCSEPass()); // Catch trivial redundancies - - // BBVectorize may have significantly shortened a loop body; unroll again. - if (!DisableUnrollLoops) - MPM.add(createLoopUnrollPass()); - } - } - } + if (!RunSLPAfterLoopVectorization) { + if (SLPVectorize) + MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + + if (BBVectorize) { + MPM.add(createBBVectorizePass()); + MPM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, MPM); + if (OptLevel > 1 && UseGVNAfterVectorization) + MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + else + MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + + // BBVectorize may have significantly shortened a loop body; unroll again. + if (!DisableUnrollLoops) + MPM.add(createLoopUnrollPass()); + } + } if (LoadCombine) MPM.add(createLoadCombinePass()); @@ -327,55 +299,49 @@ if (ExtraVectorizerPasses) MPM.add(createLoopRotatePass()); - if (!ProactiveLoopFusionAnalysis || ProactiveLoopOverrider) { - MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize)); - } + MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize)); // FIXME: Because of #pragma vectorize enable, the passes below are always // inserted in the pipeline, even when the vectorizer doesn't run (ex. when // on -O1 and no #pragma is found). Would be good to have these two passes // as function calls, so that we can only pass them when the vectorizer // changed the code. MPM.add(createInstructionCombiningPass()); - if (!ProactiveLoopFusionAnalysis || ProactiveLoopOverrider) { - if (OptLevel > 1 && ExtraVectorizerPasses) { - // At higher optimization levels, try to clean up any runtime overlap and - // alignment checks inserted by the vectorizer. We want to track correllated - // runtime checks for two inner loops in the same outer loop, fold any - // common computations, hoist loop-invariant aspects out of any outer loop, - // and unswitch the runtime checks if possible. Once hoisted, we may have - // dead (or speculatable) control flows or more combining opportunities. - MPM.add(createEarlyCSEPass()); - MPM.add(createCorrelatedValuePropagationPass()); - MPM.add(createInstructionCombiningPass()); - MPM.add(createLICMPass()); - MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); - MPM.add(createCFGSimplificationPass()); - MPM.add(createInstructionCombiningPass()); - } + if (OptLevel > 1 && ExtraVectorizerPasses) { + // At higher optimization levels, try to clean up any runtime overlap and + // alignment checks inserted by the vectorizer. We want to track correllated + // runtime checks for two inner loops in the same outer loop, fold any + // common computations, hoist loop-invariant aspects out of any outer loop, + // and unswitch the runtime checks if possible. Once hoisted, we may have + // dead (or speculatable) control flows or more combining opportunities. + MPM.add(createEarlyCSEPass()); + MPM.add(createCorrelatedValuePropagationPass()); + MPM.add(createInstructionCombiningPass()); + MPM.add(createLICMPass()); + MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); + MPM.add(createCFGSimplificationPass()); + MPM.add(createInstructionCombiningPass()); + } - if (RunSLPAfterLoopVectorization) { - if (SLPVectorize) { - MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. - if (OptLevel > 1 && ExtraVectorizerPasses) { - MPM.add(createEarlyCSEPass()); - } + if (RunSLPAfterLoopVectorization) { + if (SLPVectorize) { + MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + if (OptLevel > 1 && ExtraVectorizerPasses) { + MPM.add(createEarlyCSEPass()); } + } - if (BBVectorize) { - MPM.add(createBBVectorizePass()); - MPM.add(createInstructionCombiningPass()); - addExtensionsToPM(EP_Peephole, MPM); - if (OptLevel > 1 && UseGVNAfterVectorization) - MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies - else - MPM.add(createEarlyCSEPass()); // Catch trivial redundancies - - // BBVectorize may have significantly shortened a loop body; unroll again. - if (!ProactiveLoopFusionAnalysis || ProactiveLoopOverrider) - if (!DisableUnrollLoops) - MPM.add(createLoopUnrollPass()); - - } + if (BBVectorize) { + MPM.add(createBBVectorizePass()); + MPM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, MPM); + if (OptLevel > 1 && UseGVNAfterVectorization) + MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + else + MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + + // BBVectorize may have significantly shortened a loop body; unroll again. + if (!DisableUnrollLoops) + MPM.add(createLoopUnrollPass()); } } @@ -383,9 +349,8 @@ MPM.add(createCFGSimplificationPass()); MPM.add(createInstructionCombiningPass()); - if (!ProactiveLoopFusionAnalysis || ProactiveLoopOverrider) - if (!DisableUnrollLoops) - MPM.add(createLoopUnrollPass()); // Unroll small loops + if (!DisableUnrollLoops) + MPM.add(createLoopUnrollPass()); // Unroll small loops // After vectorization and unrolling, assume intrinsics may tell us more // about pointer alignments. @@ -444,10 +409,6 @@ PM.add(createPruneEHPass()); // Remove dead EH info. - if (ProactiveLoopFusionAnalysis) { - PM.add(createGlobalsModRefPass(true)); - PM.add(createProactiveFusionIPOPass()); - } // Optimize globals again if we ran the inliner. if (RunInliner) PM.add(createGlobalOptimizerPass()); @@ -470,7 +431,7 @@ // Run a few AA driven optimizations here and now, to cleanup the code. PM.add(createFunctionAttrsPass()); // Add nocapture. - PM.add(createGlobalsModRefPass(false)); // IP alias analysis. + PM.add(createGlobalsModRefPass()); // IP alias analysis. PM.add(createLICMPass()); // Hoist loop invariants. if (EnableMLSM) @@ -484,21 +445,12 @@ // More loops are countable; try to optimize them. PM.add(createIndVarSimplifyPass()); PM.add(createLoopDeletionPass()); - if (ProactiveLoopFusion) { - PM.add(createJumpThreadingPass()); - PM.add(createCFGSimplificationPass()); - if (LoadCombine) - PM.add(createLoadCombinePass()); - PM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies. - PM.add(createProactiveFusionPass()); - } - if (!ProactiveLoopFusionAnalysis || ProactiveLoopOverrider) { - PM.add(createLoopVectorizePass(true, LoopVectorize)); - // More scalar chains could be vectorized due to more alias information - if (RunSLPAfterLoopVectorization) - if (SLPVectorize) - PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. - } + PM.add(createLoopVectorizePass(true, LoopVectorize)); + + // More scalar chains could be vectorized due to more alias information + if (RunSLPAfterLoopVectorization) + if (SLPVectorize) + PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. // After vectorization, assume intrinsics may tell us more about pointer // alignments. Index: lib/Transforms/IPO/ProactiveFusionIPO.cpp =================================================================== --- lib/Transforms/IPO/ProactiveFusionIPO.cpp +++ /dev/null @@ -1,556 +0,0 @@ -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/ilist.h" -#include "llvm/ADT/SCCIterator.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/DependenceAnalysis.h" -#include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/CallSite.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DiagnosticInfo.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/IPO.h" -#include "llvm/Transforms/Utils/Local.h" - -#include -#define DEBUG_TYPE "proactive-fusion-ipo" - -using namespace llvm; -namespace { -class CallAnalyzer; -//STATISTIC(NumLoopsFused, "Number of loops fused"); -STATISTIC(NumFunctionsUnswitched, "Number of functions unswitched by loop fusion pass"); - -struct ProactiveFusionIPO : public ModulePass { - static char ID; - explicit ProactiveFusionIPO(); - // require and preserve call graph? - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - } - bool runOnModule(Module &M) override; - DataLayoutPass* DLP; - TargetLibraryInfo* TLI; - AliasAnalysis* AA; - using llvm::Pass::doFinalization; - bool doFinalization(Module &M) override; - DenseSet processedFns; - bool unswitchFunction(Function* F); - bool backtrackStoreDeps(Value* VOp, DenseSet& peelSet); - bool testInstsEntryBB(BasicBlock* BB, DenseSet& peelSet); - bool analyzeCondDeps(Instruction *op, DenseSet& peelSet); - void duplicatePeelList(SmallVector& peeled, - ValueToValueMapTy& VMap, - BasicBlock *BB); - void copyMetadata (Instruction *from, Instruction *to); - bool loopSafeToFuse(Loop *L); -}; - -bool ProactiveFusionIPO::backtrackStoreDeps(Value* VOp, - DenseSet& peelSet) { - if (isa(VOp) || isa(VOp)) { - return true; - } else if (LoadInst *ldins = dyn_cast(VOp)) { - if (!ldins->isSimple()) return false; - if (isa(ldins->getPointerOperand())) { - peelSet.insert(ldins); - return true; - } else - return false; - } else { - if (BinaryOperator *ins = dyn_cast(VOp)) { - if (ins->mayWriteToMemory() - || ins->mayReadFromMemory() - || ins->mayThrow() - || ins->isUsedOutsideOfBlock(ins->getParent())) - return false; - else { - peelSet.insert(ins); - bool usere = true; - for (Instruction::op_iterator OB = ins->op_begin(), OE = ins->op_end(); - usere && (OB != OE); ++OB) { - usere = usere && backtrackStoreDeps(*OB, peelSet); - if (!usere) return false; - } - return usere; - } - } else return false; - } -} - -bool ProactiveFusionIPO::analyzeCondDeps(Instruction *op, - DenseSet& peelSet) { - for (Instruction::op_iterator OB = op->op_begin(), OE = op->op_end(); - OB != OE; ++OB) { - if (LoadInst *L = dyn_cast(OB)) { - if (!isa(L->getPointerOperand())) - return false; - if (!L->isSimple()) return false; - peelSet.insert(L); - } else if (!(isa(OB) || isa(OB))) { - return false; - } - } - peelSet.insert(op); - return true; -} - -bool ProactiveFusionIPO::testInstsEntryBB(BasicBlock* BB, - DenseSet& peelSet) { - for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); - BI != BE; ++BI) { - if (StoreInst *SI = dyn_cast(BI)) { - // Here we analyze the operands of the store. - // If he store deps are based on global - // Vars, we will allow peeling of the store - // Along with its deps. - if (!SI->isSimple()) return false; - for (unsigned opn = 0; opn < SI->getNumOperands(); ++opn) { - if (!backtrackStoreDeps(SI->getOperand(opn),peelSet)) return false; - } - peelSet.insert(SI); - } else if (CallInst *CI = dyn_cast(BI)) { - CallSite CS(cast(CI)); - if (Function *cF = CS.getCalledFunction()) { - // ignore some pseudo calls - if (cF->getName() == "llvm.lifetime.end" - || cF->getName() == "llvm.lifetime.start") { - continue; - } - } - return false; - } else { - // everything else is ok to leave back - } - } - return true; -} - -void ProactiveFusionIPO::copyMetadata(Instruction *from, Instruction *to) { - if (!from->hasMetadata()) - return; - - // Otherwise, enumerate and copy over metadata from the old instruction to the - // new one. - SmallVector,4> TheMDs; - from->getAllMetadataOtherThanDebugLoc(TheMDs); - for (const auto &MD : TheMDs) - to->setMetadata(MD.first, MD.second); - - to->setDebugLoc(from->getDebugLoc()); -} - -void ProactiveFusionIPO::duplicatePeelList(SmallVector& peeled, - ValueToValueMapTy& VMap, - BasicBlock *BB) { - for (SmallVector::iterator pi = peeled.begin(), pe = peeled.end(); - pi != pe; ++pi) { - Value *val = *pi; - if (StoreInst *si = dyn_cast(val)) { - Value *vop = si->getValueOperand(); - if (VMap.find(vop) != VMap.end()) { - vop = VMap[vop]; - } - StoreInst *ns = new StoreInst(vop,si->getPointerOperand(),false,si->getAlignment(),BB); - copyMetadata(si,ns); - VMap[si] = ns; - } else if (LoadInst *li = dyn_cast(val)) { - LoadInst *nl = new LoadInst(li->getPointerOperand(),"plf",false,li->getAlignment(),BB); - VMap[li] = nl; - } else if (BinaryOperator *bi = dyn_cast(val)) { - Value *bopnd1 = bi->getOperand(0); - Value *bopnd2 = bi->getOperand(1); - if (VMap.find(bopnd1) != VMap.end()) { - bopnd1 = VMap[bopnd1]; - } - if (VMap.find(bopnd2) != VMap.end()) { - bopnd2 = VMap[bopnd2]; - } - BinaryOperator *nb = BinaryOperator::Create(bi->getOpcode(), bopnd1, bopnd2,"plf",BB); - copyMetadata(bi,nb); - VMap[bi] = nb; - } else if (CmpInst *ci = dyn_cast(val)) { - Value *lhs = ci->getOperand(0); - Value *rhs = ci->getOperand(1); - if (VMap.find(lhs) != VMap.end()) { - lhs = VMap[lhs]; - } - if (VMap.find(rhs) != VMap.end()) { - rhs = VMap[rhs]; - } - CmpInst *nc = CmpInst::Create(ci->getOpcode(),ci->getPredicate(), - lhs,rhs, "plf",BB); - copyMetadata(ci,nc); - VMap[ci] = nc; - } - } -} - -bool ProactiveFusionIPO::unswitchFunction(Function* F) { - // First, verify that this function is an unswitching candidate... - std::vector UsersL(F->user_begin(), F->user_end()); - if (UsersL.empty()) return false; - BasicBlock* entryBlock = F->begin(); - BranchInst *BR = dyn_cast(entryBlock->getTerminator()); - if (!BR || BR->isUnconditional()) return false; - if (isa(BR)) return false; - BasicBlock* returnBlock = nullptr; - unsigned returnCount = 0; - ReturnInst *RI = nullptr; - for (succ_iterator SI = succ_begin(entryBlock), - SE = succ_end(entryBlock); SI != SE; ++SI) - if (isa((*SI)->getTerminator())) { - returnBlock = *SI; - returnCount++; - } - - if (returnCount != 1) return false; - - // sanity in return block - for (BasicBlock::iterator BI = returnBlock->begin(), - BE = returnBlock->end(); - BI != BE; ++BI) { - Value *val = BI; - if (!(isa(val) || isa(val))) { - if (CallInst *CI = dyn_cast(val)) { - CallSite CS(cast(CI)); - if (Function *cF = CS.getCalledFunction()) { - if (cF->getName() == "llvm.lifetime.end" - || cF->getName() == "llvm.lifetime.start") { - continue; - } - } - } - return false; - } - } - - ValueToValueMapTy VMap; - Function* duplicateFunction = F; - - std::vector Users(duplicateFunction->user_begin(), - duplicateFunction->user_end()); - for (std::vector::iterator UI = Users.begin(), UE = Users.end(); - UI != UE; ++UI) { - if (!isa(*UI)) { - return false; - } - } - if (Users.size() == 0) return false; - - DenseSet peelSet; - Instruction *cond = dyn_cast(BR->getCondition()); - if(!analyzeCondDeps(cond, peelSet)) { - return false; - } - if (!testInstsEntryBB(entryBlock, peelSet)) { - return false; - } - RI = cast(returnBlock->getTerminator()); - Value *retval = nullptr; - PHINode *retPhi = nullptr; - if (RI->getReturnValue()) { - if (PHINode *PN = dyn_cast(RI->getReturnValue())) { - if (PN->getBasicBlockIndex(entryBlock) == -1) { - return false; - } - retval = dyn_cast(PN->getIncomingValue( - PN->getBasicBlockIndex( - entryBlock))); - if (!retval) return false; - retPhi = PN; - } else { - retval = dyn_cast(RI->getReturnValue()); - if (!retval) return false; - } - } - - SmallVector peeled; - for (BasicBlock::iterator BI = entryBlock->begin(), - BE = entryBlock->end(); BI != BE; ++BI) { - Value *val = BI; - if (peelSet.find(val) != peelSet.end()) { - // if load/store/binary op - if (!(isa(val) || - isa(val) || - isa (val) || - isa(val))) { - return false; - } - for (User* U : val->users()) { - if (Instruction *I = dyn_cast(U)) { - if (I->getParent() != entryBlock) return false; - } - } - peeled.push_back(val); - } - } - - ValueToValueMapTy VMap2; - Function * duplicateFunction2 = CloneFunction(F, VMap2, - /*ModuleLevelChanges=*/false); - duplicateFunction->getParent()->getFunctionList().push_back(duplicateFunction2); - - BasicBlock *EBT = entryBlock->getTerminator()->getSuccessor(0); - BasicBlock *EBF = entryBlock->getTerminator()->getSuccessor(1); - SmallVector callsiteOld; - for (std::vector::iterator UI = Users.begin(), UE = Users.end(); - UI != UE; ++UI) { - if (CallInst *CI = dyn_cast(*UI)) { - if (CI->getParent()->getTerminator()) { - BasicBlock *NewCallConditionalBB = CI->getParent(); - BasicBlock::iterator BCI = CI; - BCI++; - if (BCI == NewCallConditionalBB->end()) continue; - BasicBlock *NewCallLandingBlock = - NewCallConditionalBB->splitBasicBlock(BCI, "plf.unsw.lander"); - BasicBlock *NewCallBlock = - NewCallConditionalBB->splitBasicBlock(CI, "plf.unsw.caller"); - if(NewCallConditionalBB->getTerminator()) - NewCallConditionalBB->getTerminator()->eraseFromParent(); - ValueToValueMapTy VMap2; - duplicatePeelList(peeled,VMap2,NewCallConditionalBB); - if (EBT == returnBlock) { - BranchInst::Create( - NewCallLandingBlock, - NewCallBlock, - VMap2[cond], - NewCallConditionalBB); - } else { - BranchInst::Create( - NewCallBlock, - NewCallLandingBlock, - VMap2[cond], - NewCallConditionalBB); - } - if (RI->getReturnValue()) { - PHINode *PN = PHINode::Create(CI->getType(), - 2,"plf",NewCallLandingBlock->begin()); - CI->replaceAllUsesWith(PN); - PN->addIncoming(retval,NewCallConditionalBB); - PN->addIncoming(CI,CI->getParent()); - } - } else { - callsiteOld.push_back(CI); - } - } - } - if (callsiteOld.size() > 0) { - for (SmallVector::iterator COI = callsiteOld.begin(), - COE = callsiteOld.end(); COI != COE; ++COI) { - CallInst *CI = cast(*COI); - CI->setCalledFunction(duplicateFunction2); - dbgs() << "changed = " << *CI << '\n'; - } - }/* else { - duplicateFunction2->eraseFromParent(); - }*/ - - entryBlock->getTerminator()->eraseFromParent(); - if (EBT == returnBlock) - BranchInst::Create(EBF,entryBlock); - else BranchInst::Create(EBT,entryBlock); - NumFunctionsUnswitched++; - for (SmallVector::reverse_iterator SI = peeled.rbegin(), - SE = peeled.rend(); SI != SE; ++SI) { - Instruction *inst = cast(*SI); - inst->eraseFromParent(); - } - if (retPhi) { - retPhi->removeIncomingValue(entryBlock); - } - return true; -} - -bool ProactiveFusionIPO::loopSafeToFuse(Loop *L) { - for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) { - - if (isa((*I)->getTerminator())) - return false; - - for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) { - if (isa(BI)) { - return false; - } - } - } - return true; -} - -bool ProactiveFusionIPO::runOnModule(Module &M) { - DLP = getAnalysisIfAvailable(); - TLI = &getAnalysis(); - CallGraph &CG = - getAnalysis().getCallGraph(); - AA = &getAnalysis(); - // SCC walks are expensive, so not - // attempting this for large modules. - if (M.size() > 50) return false; - SmallVector allc; - DenseMap singleSCCNodes; - for (scc_iterator SCCI = scc_begin(&CG); - !SCCI.isAtEnd(); ++SCCI) { - const std::vector &nextSCC = *SCCI; - if ((!(nextSCC.size() == 1 && SCCI.hasLoop()) && - (!(SCCI.hasLoop()))) && (nextSCC.size() == 1)) { - // a single node SCC to base a top-level walk - std::vector::const_iterator ITmp - = nextSCC.begin(); - Function *F = (*ITmp)->getFunction(); - if (F) { - if (F->isDeclaration()) continue; - singleSCCNodes[F] = true; - DominatorTree DT; - DT.recalculate(*F); - for (Function::iterator BB = F->begin(), FE = F->end(); - BB != FE; ++BB) { - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); - I != E;I++) { - CallInst* CI = dyn_cast(I); - CallSite CS(cast(I)); - if (CS && CI) { - if (CS.getCalledFunction()) { - if (CS.getCalledFunction()->isDeclaration()) { - break; - } else { - allc.push_back(cast(I)); - } - } - } - } - } - } - } - } - - DenseMap processedFns; - int whichc = 0; - for (SmallVector::iterator CII = allc.begin(), CEE = allc.end(); - CII != CEE; ++CII) { - CallInst *CI = dyn_cast(*CII); - CallSite CS(cast(*CII)); - if (CS && CI) { - SmallVector callsites; - Function& cF = *CS.getCalledFunction(); - bool allsingle = true; - if (processedFns.find(&cF) != processedFns.end()) { - allsingle = processedFns[&cF]; - } else { - allsingle = true; - for (Function::iterator BI = cF.begin(), BE = cF.end(); - allsingle && BI != BE; ++BI) { - BasicBlock *BB = BI; - if (BB && BB->getTerminator()) { - for (BasicBlock::iterator I = BI->begin(), E = BI->end(); - I != E; ++I) { - CallSite CS(cast(I)); - if (CS) { - if (CS.getCalledFunction()) { - if (CS.getCalledFunction()->isDeclaration()) { - allsingle = false; - break; - } - callsites.push_back(CS); - } else { - allsingle = false; - break; - } - } - } - } - } - - if (allsingle) { - DominatorTree DT; - DT.recalculate(cF); - LoopInfo LI; - LI.createRaw(DT); - if (LI.empty()) { - allsingle = false; - } - for (LoopInfo::iterator li = LI.begin(), le = LI.end(); - allsingle && li != le; ++li) { - Loop* L = *li; - allsingle = allsingle && (L->empty() == true) - && ((loopSafeToFuse(L)) == true); - } - ScalarEvolution SE; - SE.initScalarEvolution(cF, &LI, DLP, TLI, &DT); - SE.releaseMemory(); - LI.releaseMemory(); - } - processedFns[&cF] = allsingle; - } - if (allsingle) { - // we need to do additional checks and tune-up - // the called function - InlineFunctionInfo IFI; - //InlineFunctionRelaxVar(CI, IFI, true); - InlineFunction(CI, IFI, true); - } - } - whichc++; - } - SmallVector fnlist; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - if(!I->isDeclaration()) { - Function *F = I; - fnlist.push_back(F); - } - } - - for (SmallVector::iterator SI = fnlist.begin(), - SE = fnlist.end(); SI != SE; ++SI) { - Function *F = *SI; - /* cloned something: gen_aux_info_record - cloned something: tidy_fallthru_edges - cloned something: end_final - cloned something: find_single_use - cloned something: regmove_optimize - cloned something: fixup_abnormal_edges - cloned something: schedule_insns - cloned something: timevar_push - - */ - if (singleSCCNodes.count(F) != 0) - unswitchFunction(F); - } - return true; -} - - -bool ProactiveFusionIPO::doFinalization(Module &M) { - return false; -} -} - -char ProactiveFusionIPO::ID = 0; -ProactiveFusionIPO::ProactiveFusionIPO() : ModulePass(ID) {} -INITIALIZE_PASS_BEGIN(ProactiveFusionIPO, "proactivefusionipo", - "Proactive Loop Fusion Call Graph Walker/Inliner", false, false) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) -INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DataLayoutPass) -INITIALIZE_PASS_END(ProactiveFusionIPO, "proactivefusionipo", - "Proactive Loop Fusion IPO", false, false) - -namespace llvm { -Pass *createProactiveFusionIPOPass() { return new ProactiveFusionIPO(); } -} - Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -25,7 +25,6 @@ MemCpyOptimizer.cpp MergedLoadStoreMotion.cpp PartiallyInlineLibCalls.cpp - ProactiveFusion.cpp Reassociate.cpp Reg2Mem.cpp SCCP.cpp Index: lib/Transforms/Scalar/ProactiveFusion.cpp =================================================================== --- lib/Transforms/Scalar/ProactiveFusion.cpp +++ /dev/null @@ -1,2011 +0,0 @@ -#include "llvm/Transforms/Vectorize.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/ilist.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/IR/Verifier.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AliasSetTracker.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/CodeMetrics.h" -#include "llvm/Analysis/DependenceAnalysis.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DiagnosticInfo.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/PatternMatch.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/raw_os_ostream.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/Local.h" -#include "iostream" -#include "fstream" - -#include -#include -#define DEBUG_TYPE "proactive-fusion" -using namespace llvm; -using namespace llvm::PatternMatch; -namespace { -struct ControlDep { - DenseMap BBIdMap; - Function *F; - DenseMap IdBBMap; - SmallVector, 512 > CDAdjSet; - unsigned BBId; - unsigned RowC; - //std::vector > CDMatrix; - //std::vector > CDClosure; - bool ** CDMatrix; - bool ** CDClosure; - DominatorTree& DT; - LoopInfo& LI; - std::map > CDAdjListB; - void setupCDMatrix(); - void addEdge(BasicBlock* From, BasicBlock* To); - void removeEdge(BasicBlock* From, BasicBlock* To); - void addNode(BasicBlock* BB); - explicit ControlDep(DominatorTree& Dt, LoopInfo& Li) : - BBId(0), RowC(0), CDMatrix(NULL), CDClosure(NULL), DT(Dt), LI(Li) {} - void releaseMemory(); - bool hasEdge(BasicBlock* From, BasicBlock* To); - void buildControlDeps(DominatorTreeBase& PDT); - void setFunction(Function *F) { - this->F = F; - } - void buildCDClosure(); - void PostDomTreeTopoOrder( - std::list* >& pdtList, - DomTreeNodeBase* N); - - void printClosure(BasicBlock* BB); - void printCDVector(bool *A); - - void Copy(bool *A, bool *B) { - for(unsigned CJ = 0; CJ < RowC; ++CJ) { - B[CJ] = A[CJ]; - } - } - - void Or(bool *A, bool *B, bool *C) { - for(unsigned CJ = 0; CJ < RowC; ++CJ) { - C[CJ] = A[CJ] | B[CJ]; - } - } - - bool Zero(bool *A) { - return !(Any(A)); - } - - bool At(bool *A, unsigned Idx) { - if (Idx < RowC) return A[Idx]; - return false; - } - - void SetAt(bool *A, unsigned Idx) { - if (Idx < RowC) A[Idx] = true; - } - - void ResetAt(bool *A, unsigned Idx) { - if (Idx < RowC) A[Idx] = false; - } - - bool Any(bool *A) { - for(unsigned CJ = 0; CJ < RowC; ++CJ) - if (A[CJ] == true) return true; - return false; - } - - void And(bool *A, bool *B, bool *C) { - for(unsigned CJ = 0; CJ < RowC; ++CJ) { - C[CJ] = A[CJ] & B[CJ]; - } - } - - void Not(bool *A, bool *B) { - for(unsigned CJ = 0; CJ < RowC; ++CJ) { - B[CJ] = not A[CJ]; - } - } - - void getNodes(bool *A, std::vector& nodes) { - for(unsigned CJ = 0; CJ < RowC; ++CJ) { - if (A[CJ] == true) { - nodes.push_back(IdBBMap[CJ]); - } - } -} -}; - -struct FuseEdge { - Loop *fstL; - Loop *sndL; - BasicBlock *domfst; - BasicBlock *domnxt; - BasicBlock *CommonPDom; - explicit FuseEdge() {} - FuseEdge(Loop *f, Loop *s, - BasicBlock *df, - BasicBlock *dn, - BasicBlock *CPDom) : - fstL(f), sndL(s), domfst(df), domnxt(dn), - CommonPDom(CPDom) {} -}; - -struct ProactiveFusionImpl { - unsigned fuseCount = 0; - Pass *P; - Function& F; - size_t J; size_t I; size_t RPOJ; - std::set> FusedEdges; - DenseMap visited; - DenseMap Pre; - DenseMap Post; - DenseMap Tree; - DenseMap Forward; - DenseMap Back; - DenseMap Cross; - DenseMap RPO; - DenseMap BBRPO; - MemoryDependenceAnalysis *MA; - AliasAnalysis *AA; - std::list InnerLoops; - DenseMap LoopPhis; - DenseMap iterStep; - DenseMap iterEnd; - std::set iloopH; - std::map LoopFusionMap; - ScalarEvolution *SCE; - DependenceAnalysis DA; - bool deepCompareSCEV(const SCEV* A, const SCEV* B); - bool AnalyzeLoopsAndFuse(Function &F); - void DFSVisitor(Loop* fstL, std::set& visited, - ControlDep& CD, LoopInfo& LI, - DominatorTreeBase& PDT, - bool& Changed); - void FuseTwo(Loop *fstL, - LoopInfo& LI, - DominatorTreeBase& PDT, - bool &Changed); - void DFS_PP(BasicBlock* BB){ - visited[BB] = true; - Pre[BB] = J; - J++; - for(succ_iterator SI = succ_begin(BB), - SE = succ_end(BB); SI != SE; ++SI) { - BasicBlock* SB = *SI; - if (visited[SB] == false) { - DFS_PP(SB); - Tree[BB] = SB; - } else if (Pre[BB] < Pre[SB]) { - Forward[BB] = SB; - } else if (Post.count(SB) == 0) { - Back[BB] = SB; - } else { - Cross[BB] = SB; - } - } - Post[BB] = I; - I++; - RPO[BB] = RPOJ; - BBRPO[RPOJ] = BB; - RPOJ--; - } - - void doDFS(Function& F) { - Tree.clear(); - Post.clear(); - Pre.clear(); - Back.clear(); - Cross.clear(); - Forward.clear(); - RPO.clear(); - BBRPO.clear(); - visited.clear(); - J = 0; - I = 0; - RPOJ = F.size(); - for(Function::iterator FI = F.begin(), FE = F.end(); - FI != FE; ++FI) { - BasicBlock *BB = FI; - visited[BB] = false; - } - RPOJ = F.size(); - for(Function::iterator FI = F.begin(), FE = F.end(); - FI != FE; ++FI) { - BasicBlock *BB = FI; - if (visited[BB] == false) - DFS_PP(BB); - } - } - - void markInstructionUses(Value *S, BasicBlock *Parent, - std::set&visited); - - void DFSPaths(bool &end_reached, - BasicBlock* End, BasicBlock* N, Loop *fstL, Loop *sndL, - std::vector p, - DenseMap>> &Path, - const std::set& allowed); - void DFSBack(bool &end_reached, BasicBlock* End, BasicBlock* N, - Loop *fstL, Loop *sndL, std::set&visited); - - struct LoopRPOCompare{ - DenseMap& RPO; - bool operator()(Loop* a, Loop* b) { - return (RPO[a->getLoopPreheader()] < RPO[b->getLoopPreheader()]); - } - bool operator()(BasicBlock* a, BasicBlock* b) { - return (RPO[a] < RPO[b]); - } - LoopRPOCompare(DenseMap& RL) : RPO(RL) {} - }; - - bool hasLoopsToFuse(Function &F, LoopInfo& LI); - bool loopSafeToFuse(Loop *L); - void findInnerLoops(Loop &L); - - DenseMap HasCall; - /// This enum represents the kinds of inductions that we support. - enum InductionKind { - IK_NoInduction, ///< Not an induction variable. - IK_IntInduction, ///< Integer induction variable. Step = 1. - IK_ReverseIntInduction, ///< Reverse int induction variable. Step = -1. - IK_PtrInduction, ///< Pointer induction var. Step = sizeof(elem). - IK_ReversePtrInduction ///< Reverse ptr indvar. Step = - sizeof(elem). - }; - - InductionKind isInductionVariable(PHINode *Phi) { - Type *PhiTy = Phi->getType(); - // We only handle integer and pointer inductions variables. - if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) - return IK_NoInduction; - - // Check that the PHI is consecutive. - const SCEV *PhiScev = SCE->getSCEV(Phi); - const SCEVAddRecExpr *AR = dyn_cast(PhiScev); - if (!AR) { - return IK_NoInduction; - } - const SCEV *Step = AR->getStepRecurrence(*SCE); - - // Integer inductions need to have a stride of one. - if (PhiTy->isIntegerTy()) { - if (Step->isOne()) - return IK_IntInduction; - if (Step->isAllOnesValue()) - return IK_ReverseIntInduction; - return IK_NoInduction; - } - - // Calculate the pointer stride and check if it is consecutive. - const SCEVConstant *C = dyn_cast(Step); - if (!C) - return IK_NoInduction; - - assert(PhiTy->isPointerTy() && "The PHI must be a pointer"); - return IK_NoInduction; - } - bool isInductionVariable(const Value *V) { - Value *In0 = const_cast(V); - PHINode *PN = dyn_cast_or_null(In0); - if (!PN) - return false; - InductionKind ik = isInductionVariable(PN); - if (ik == IK_NoInduction) return false; - else return true; - } - - explicit ProactiveFusionImpl(Function& aF) : F(aF) { } - bool checkLoops(LoopInfo& LI); -}; - -struct ProactiveFusion : public FunctionPass { - static char ID; - explicit ProactiveFusion(); - bool runOnFunction(Function &F); - AliasAnalysis *AA; - MemoryDependenceAnalysis *MA; - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addRequired(); - AU.addRequiredID(LoopSimplifyID); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - } -}; - -void ControlDep::releaseMemory() { - if (RowC > 0) { - for(unsigned i = 0; i < RowC; i++) - delete [] CDMatrix[i]; - delete [] CDMatrix; - for(unsigned i = 0; i < RowC; i++) - delete [] CDClosure[i]; - delete [] CDClosure; - } -} - -void ControlDep::removeEdge(BasicBlock* From, BasicBlock* To) { - // The From/To edges have the same direction as - // CFG pred/succ. - assert ((BBIdMap.find(From) != BBIdMap.end()) - && (BBIdMap.find(To) != BBIdMap.end())); - int X = BBIdMap[From]; - int Y = BBIdMap[To]; - CDMatrix[X][Y] = false; - //CDMatrix[X][Y] = 1; - std::list& adjlis = CDAdjListB[From]; - adjlis.push_back(To); - for(SmallVectorImpl >::iterator CDI - = CDAdjSet.begin(), CDE = CDAdjSet.end(); CDI != CDE;) { - BasicBlock* BB = CDI->first; - BasicBlock* CB = CDI->second; - ++CDI; - if (BB == To && CB == From) { - CDAdjSet.erase(CDI); - break; - } - } -} - -void ControlDep::addEdge(BasicBlock* From, BasicBlock* To) { - // The From/To edges have the opposite direction to - // corresponding CFG pred->succ edge. - assert ((BBIdMap.find(From) != BBIdMap.end()) - && (BBIdMap.find(To) != BBIdMap.end())); - unsigned X = BBIdMap[From]; - unsigned Y = BBIdMap[To]; - CDMatrix[X][Y] = true; - //CDMatrix[X][Y] = 1; - CDAdjListB[From].push_back(To); - CDAdjSet.push_back(std::make_pair(To,From)); -} - -bool ControlDep::hasEdge(BasicBlock* From, BasicBlock* To) { - if (BBIdMap.find(From) == BBIdMap.end()) return false; - if (BBIdMap.find(To) == BBIdMap.end()) return false; - unsigned X = BBIdMap[From]; - unsigned Y = BBIdMap[To]; - return (CDMatrix[X][Y] == 1); -} - -void ControlDep::PostDomTreeTopoOrder( - std::list* >& pdtList, - DomTreeNodeBase* N) { - for(auto I : *N) - PostDomTreeTopoOrder(pdtList, I); - pdtList.push_back(N); -} - -void ControlDep::setupCDMatrix() { - unsigned numbbs = 0; - for(Function::iterator FI = F->begin(), - FE = F->end(); FI != FE; ++FI) { - numbbs++; - BasicBlock *BB = FI; - if (BBIdMap.find(BB) == BBIdMap.end()) { - BBIdMap[BB] = BBId; - IdBBMap[BBId] = BB; - BBId++; - } - } - unsigned rows = F->size(); - assert (rows == numbbs); - RowC = rows; - CDMatrix = new bool*[rows]; - for(unsigned nume = 0; nume < rows; nume++) { - CDMatrix[nume] = new bool[rows]; - for(unsigned numf = 0; numf < rows; numf++) { - CDMatrix[nume][numf] = false; - } - } - CDClosure = new bool*[rows]; - for(unsigned nume = 0; nume < rows; nume++) { - CDClosure[nume] = new bool[rows]; - for(unsigned numf = 0; numf < rows; numf++) { - CDClosure[nume][numf] = false; - } - } -} - -void ControlDep::buildCDClosure() { - for(unsigned nume = 0; nume < RowC; nume++) { - for(unsigned numf = 0; numf < RowC; numf++) { - CDClosure[nume][numf] = CDMatrix[nume][numf]; - } - CDClosure[nume][nume] = true; - } - - // Warshall's algorithm - for(unsigned i = 0; i < RowC; i++) { - for(unsigned v = 0; v < RowC; v++) { - if(CDClosure[v][i] == false) continue; // optimization - for(unsigned w = 0; w < RowC; w++) { - if(CDClosure[v][i] == true - && CDClosure[i][w] == true) CDClosure[v][w] = true; - } - } - } -} - -void ControlDep::printClosure(BasicBlock* BB) { - unsigned CI = BBIdMap[BB]; - for(unsigned CJ = 0; CJ < RowC; ++CJ) { - bool bitval = CDClosure[CI][CJ]; - if(bitval == true) { - dbgs() << ' '; - IdBBMap[CJ]->printAsOperand(dbgs(),false); - } - } - dbgs() << '\n'; -} - -void ControlDep::printCDVector(bool *A) { - for(unsigned CJ = 0; CJ < RowC; ++CJ) { - bool bitval = A[CJ]; - if (bitval == true) { - dbgs() << *IdBBMap[CJ]; - } - } - dbgs() << "---\n"; -} - -void ControlDep::buildControlDeps(DominatorTreeBase& PDT) { - std::list* > pdtList; - PostDomTreeTopoOrder(pdtList, PDT.getRootNode()); - std::set visited; - while(!pdtList.empty()) { - DomTreeNodeBase* N = pdtList.front(); - pdtList.pop_front(); - BasicBlock* x = N->getBlock(); - if(x) { - visited.insert(x); - for(pred_iterator PI = pred_begin(x), - PE = pred_end(x); PI != PE; ++PI) { - BasicBlock *y = *PI; - DomTreeNodeBase* yN = PDT.getNode(y); - if (yN->getIDom() && (yN->getIDom()->getBlock() != x)) { - addEdge(x,y); - } - } - for(typename DomTreeNodeBase::const_iterator NI = - N->begin(), NE = N->end(); NI != NE; ++NI) { - DomTreeNodeBase* CN = *NI; - if (!CN) continue; - if (!CN->getBlock()) continue; - if (CDAdjListB.find(CN->getBlock()) == CDAdjListB.end()) - continue; - std::list& cdlis = CDAdjListB[CN->getBlock()]; - for(std::list::iterator CDI = cdlis.begin(), - CDE = cdlis.end(); CDI != CDE; ++CDI) { - BasicBlock *CB = *CDI; - if (PDT.getNode(CB)->getIDom() - && PDT.getNode(CB)->getIDom()->getBlock() != x) { - addEdge(x,CB); - } - } - } - } - } -} - -template -inline void PrintDomTreeV3(const DomTreeNodeBase *N, raw_ostream &o) { - for(typename DomTreeNodeBase::const_iterator I = N->begin(), - E = N->end(); I != E; ++I) { - o << '\"'; - N->getBlock()->printAsOperand(o,false); - o << '\"'; - o << " -> "; - const DomTreeNodeBase *M = *I; - o << '\"'; - M->getBlock()->printAsOperand(o,false); - o << "\";\n"; - PrintDomTreeV3(*I, o); - } -} - -bool ProactiveFusionImpl::loopSafeToFuse(Loop *L) { - if(!L->getLoopPreheader()) { - return false; - } - for(Loop::block_iterator I = L->block_begin(), - E = L->block_end(); I != E; ++I) { - if (isa((*I)->getTerminator())) - return false; - - for(BasicBlock::iterator BI = (*I)->begin(), - BE = (*I)->end(); BI != BE; ++BI) { - if (isa(BI)) { - return false; - } - } - } - return true; -} - -static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst) { - for(User *U : Inst->users()) { - Instruction *UI = cast(U); - // This user may be a reduction exit value. - if(!TheLoop->contains(UI)) { - DEBUG( - dbgs() << "LV: Found an outside user for : " << *UI << " In Fn: " - << TheLoop->getHeader()->getParent()->getName() << '\n'); - return true; - } - } - return false; -} - -void ProactiveFusionImpl::findInnerLoops(Loop &L) { - if(L.empty() == true) { - // TODO: beef this up with checks - SmallVector ExitBBs; - L.getExitBlocks(ExitBBs); - if(ExitBBs.size() == 1) { - if(L.getNumBackEdges() == 1) { - if(L.getLoopPreheader() != nullptr) { - bool doit = true; - for(Loop::block_iterator In = L.block_begin(), - E = L.block_end(); In != E; ++In) { - for(BasicBlock::iterator BI = (*In)->begin(), BE = (*In)->end(); - BI != BE; ++BI) { - Instruction *ins = cast(BI); - if (hasOutsideLoopUser(&L,ins)) { - doit = false; - break; - } - } - if (doit == false) { - break; - } - } - if (doit) { - InnerLoops.push_back(&L); - iloopH.insert(L.getLoopPreheader()); - } - } - } - } - } - for(Loop *InnerL : L) - findInnerLoops(*InnerL); -} - -bool ProactiveFusionImpl::hasLoopsToFuse(Function &F, LoopInfo& LI) { - if(LI.empty()) { - return false; - } - for(Loop *L : LI) - findInnerLoops(*L); - int numinnermost = InnerLoops.size(); - if (numinnermost > 1) { - return true; - } - else { - return false; - } -} - -void ProactiveFusionImpl::markInstructionUses(Value *S, BasicBlock *Parent, - std::set&visited) { - visited.insert(S); - if (Instruction *I = dyn_cast(S)) { - for (unsigned opn = 0; opn < I->getNumOperands(); ++opn) { - if (Instruction *U = dyn_cast(I->getOperand(opn))) { - if (U->getParent() == Parent && visited.find(U) == visited.end()) { - markInstructionUses(U, Parent, visited); - } - } - } - } -} - -// Run dfs to collect all edges from start of first loop's -// control dependency head node -// to the postdominator of the exit node of second loop. -// Do not add blocks that have calls in them. -// Skip over the blocks of the two loops being fused. -// On finding the header of the loops, jump directly -// to the loop's exits. -void ProactiveFusionImpl::DFSPaths(bool &end_reached, - BasicBlock* End, BasicBlock* N, Loop *fstL, Loop *sndL, - std::vector p, - DenseMap>> &Path, - const std::set& allowed) { - if (p.size() > 128) return; - p.push_back(N); - if (N == fstL->getHeader()) { - SmallVector ExitBBfst; - fstL->getExitBlocks(ExitBBfst); - BasicBlock *fstLExit = ExitBBfst[0]; - if (HasCall[fstLExit] == false) - DFSPaths(end_reached, End, fstLExit,fstL,sndL,p,Path, allowed); - return; - } else if (N == sndL->getHeader()) { - SmallVector ExitBBsnd; - sndL->getExitBlocks(ExitBBsnd); - BasicBlock *sndLExit = ExitBBsnd[0]; - if (HasCall[sndLExit] == false) - DFSPaths(end_reached, End, sndLExit,fstL,sndL,p,Path, allowed); - return; - } - if (N == End) { - Path[N].push_back(p); - end_reached = true; - return; - } - - if (Back.count(N)) { - return; - } - for (succ_iterator SI = succ_begin(N), SE = succ_end(N); - SI != SE; ++SI) { - BasicBlock* SB = *SI; - if (Back.count(N) > 0 && Back[N] == SB) continue; - if (HasCall[SB] == false) - DFSPaths(end_reached, End, SB,fstL,sndL, p, Path, allowed); - } -} - -// Check that there is an acyclic path from the control -// dep head of fstL and the common post-dominating node -// of both loops. -// Allow Back edges only for the two loops. -void ProactiveFusionImpl::DFSBack (bool &end_reached, - BasicBlock* End, BasicBlock* N, - Loop *fstL, Loop *sndL, - std::set&visited) { - visited.insert(N); - if (Back.count(N) > 0) { - BasicBlock *BE = Back[N]; - if (BE != fstL->getHeader() && BE != sndL->getHeader()) { - return; - } - } - if (N == End) { - end_reached = true; - return; - } - for(succ_iterator SI = succ_begin(N), SE = succ_end(N); - SI != SE; ++SI) { - BasicBlock* SB = *SI; - if (visited.find(SB) == visited.end()) { - if (!end_reached) { - if (Back.count(N) > 0 && Back[N] == SB) continue; - DFSBack(end_reached, End, SB, fstL, sndL, visited); - } - } - } - return; -} - -// Fuse two loops which are inside ifs. -void ProactiveFusionImpl::FuseTwo(Loop *fstL, - LoopInfo& LInfo, - DominatorTreeBase& PDT, - bool& Changed) { - DominatorTree DT; - DT.recalculate(F); - FuseEdge& fe = *LoopFusionMap[fstL]; - Loop *sndL = fe.sndL; - BasicBlock *domfst = fe.domfst; - BasicBlock *domnxt = fe.domnxt; - BasicBlock *PDBB = fe.CommonPDom; - SmallVector ExitBBfst; - fstL->getExitBlocks(ExitBBfst); - SmallVector ExitBBsnd; - sndL->getExitBlocks(ExitBBsnd); - if (ExitBBfst.size() != 1 && ExitBBsnd.size() != 1) return; - BasicBlock* fstLExit = ExitBBfst[0]; - BasicBlock* sndLExit = ExitBBsnd[0]; - - if (std::find(FusedEdges.begin(), FusedEdges.end(), - std::make_pair(fstL->getHeader(), - sndL->getHeader())) != - FusedEdges.end()) { - return; - } - - FusedEdges.insert(std::make_pair(fstL->getHeader(), - sndL->getHeader())); - if (domfst != NULL && domnxt != NULL && PDBB != NULL) { - // {if (i) {loop1}; if (j) {loop2}; L}: - // Post-dom of both loops' exit-blocks: PDBB = L. - // domfst = if(i). domnxt = if(j) - std::vector path; - BasicBlock *fstBB = domfst; - BasicBlock *sndBB = domnxt; - std::vector px; - DenseMap>> Paths; - - std::list workgrp; - workgrp.push_back(fstL->getHeader()); - std::set allowed_blocks; - while(workgrp.size() > 0) { - BasicBlock *p = workgrp.front(); - workgrp.pop_front(); - if (allowed_blocks.find(p) != allowed_blocks.end()) - continue; - allowed_blocks.insert(p); - if (p != domfst) { - for(pred_iterator PI = pred_begin(p), - PE = pred_end(p); PI != PE; ++PI) { - BasicBlock* pb = *PI; - if (Back.count(pb) > 0) continue; - if(allowed_blocks.find(pb) == allowed_blocks.end()) { - if (std::find(workgrp.begin(), workgrp.end(), pb) - == workgrp.end()) { - if (HasCall[pb] == false) { - workgrp.push_back(pb); - } - } - } - } - } - } - workgrp.push_back(fstLExit); - while (workgrp.size() > 0) { - BasicBlock *p = workgrp.front(); - workgrp.pop_front(); - if (allowed_blocks.find(p) != allowed_blocks.end()) - continue; - allowed_blocks.insert(p); - if (p != domnxt) { - for(succ_iterator SI = succ_begin(p), - SE = succ_end(p); SI != SE; ++SI) { - BasicBlock *sb = *SI; - if (Back.count(sb) > 0) continue; - if(allowed_blocks.find(sb) == allowed_blocks.end()) { - if (std::find(workgrp.begin(), workgrp.end(), sb) - == workgrp.end()) { - if (HasCall[sb] == false) - workgrp.push_back(sb); - } - } - } - } - } - workgrp.push_back(sndL->getHeader()); - while(workgrp.size() > 0) { - BasicBlock *p = workgrp.front(); - workgrp.pop_front(); - if (allowed_blocks.find(p) != allowed_blocks.end()) - continue; - allowed_blocks.insert(p); - if (p != domnxt) { - for (pred_iterator PI = pred_begin(p), - PE = pred_end(p); PI != PE; ++PI) { - BasicBlock* pb = *PI; - if (Back.count(pb) > 0) continue; - if(allowed_blocks.find(pb) == allowed_blocks.end()) { - if (std::find(workgrp.begin(), workgrp.end(), pb) - == workgrp.end()) { - if (HasCall[pb] == false) - workgrp.push_back(pb); - } - } - } - } - } - workgrp.push_back(sndLExit); - while (workgrp.size() > 0) { - BasicBlock *p = workgrp.front(); - workgrp.pop_front(); - if (allowed_blocks.find(p) != allowed_blocks.end()) - continue; - allowed_blocks.insert(p); - if (p != PDBB) { - for (succ_iterator SI = succ_begin(p), - SE = succ_end(p); SI != SE; ++SI) { - BasicBlock *sb = *SI; - if (Back.count(sb) > 0) continue; - if(allowed_blocks.find(sb) == allowed_blocks.end()) { - if (std::find(workgrp.begin(), workgrp.end(), sb) - == workgrp.end()) { - if (HasCall[sb] == false) { - workgrp.push_back(sb); - } - } - } - } - } - } - for(Loop::block_iterator I = fstL->block_begin(), - E = fstL->block_end(); (I != E); ++I) { - allowed_blocks.insert(*I); - } - for(Loop::block_iterator I = sndL->block_begin(), - E = sndL->block_end(); (I != E); ++I) { - allowed_blocks.insert(*I); - } - - bool end_reached = false; - DFSPaths(end_reached, PDBB, fstBB, fstL,sndL, px, Paths, allowed_blocks); - if (!end_reached) { - return; - } - std::vector>& PathsAt = Paths[PDBB]; - //TODO: We got to get a single hot path out of this. - // One way might be to add only the most-likely edge to - // a path in DFSPaths. - // To be able to fuse two loops that are in a path - // we need to first collapse the paths. - // Some paths will not allow collapsing - // of conditionals. We do not want to support those - // in this implementation and we remove these - // paths. To be able to fuse the loops, we need all of the - // loops' blocks in the paths. Though - // splitting two loops and selective fusing is - // a possiblity, that is not explored at this time. - // dfs: P1 P2 P3 P4 P5 ... - // Let collapsed Block be B. Insert P5's compare and - // its operand definers from P5 in it. - // Visit P4 and check if its block has any code - // that may influence the instructions in the - // collapsed block. Else, insert the compare - // and its operand definers from current block - // in it. Phi operand blocks must be - // postdominated by P1. - - for(unsigned pcou = 0; pcou < PathsAt.size(); ++pcou) { - std::vector& OneP = PathsAt[pcou]; - OneP.pop_back(); - bool doit = true; - if (std::find(OneP.begin(), OneP.end(), - fstL->getHeader()) == OneP.end()) { - doit = false; - } - if (doit == false) { - continue; - } - if (std::find(OneP.begin(), OneP.end(), - sndL->getHeader()) == OneP.end()) { - doit = false; - } - if (doit == false) { - continue; - } - if (doit == false) { - continue; - } - std::vector OnePath; - for(unsigned bcou = 0; bcou < OneP.size(); bcou++) { - BasicBlock* pB = OneP[bcou]; - if(std::find(fstL->block_begin(), fstL->block_end(), pB) != - fstL->block_end()) { - continue; - } - if(std::find(sndL->block_begin(), sndL->block_end(), pB) != - sndL->block_end()) { - continue; - } - OnePath.push_back(pB); - } - - DenseMap BlocksInPath; - for(unsigned bcou = 0; bcou < OneP.size(); bcou++) { - BasicBlock* pB = OneP[bcou]; - BlocksInPath[pB] = true; - } - for(unsigned bcou = 0; bcou < OneP.size(); bcou++) { - BasicBlock* pB = OneP[bcou]; - for(BasicBlock::iterator BI = pB->begin(), BE = pB->end(); - BI != BE; ++BI) { - Instruction *ins = cast(BI); - for(User *U : ins->users()) { - Instruction *UI = cast(U); - if(BlocksInPath.count(UI->getParent()) == 0) { - if(!(PDT.dominates(PDBB,UI->getParent()))) { - doit = false; - break; - } - } - } - if (doit == false) { - break; - } - } - if (doit == false) { - break; - } - } - if (doit == false) { - continue; - } - std::list peels; - for(unsigned bcou = 0; bcou < OnePath.size(); bcou++) { - BasicBlock* pB = OnePath[bcou]; - // We got to check that what is in peels do not have a - // Data dependence on any of the instructions in this loop; - for(BasicBlock::iterator BI = pB->begin(), BE = pB->end(); - BI != BE; ++BI) { - if (isa(BI)) { - doit = false; - } else if (isa(BI)) { - doit = false; - } - if (doit == false) break; - Instruction *Inst = cast(BI); - for(User *U : Inst->users()) { - Instruction *UI = cast(U); - if (UI->getParent() != pB) { - BasicBlock *OUB = UI->getParent(); - if (!PDT.dominates(PDBB,OUB)) { - doit = false; - break; - } - } - } - if (doit == false) break; - } - if(doit == false) break; - for(std::list::iterator pi = peels.begin(), - pe = peels.end(); pi != pe; ++pi) { - if (LoadInst *LI = dyn_cast(*pi)) { - AliasAnalysis::Location Loc = AA->getLocation(LI); - MemDepResult Dep = MA->getPointerDependencyFrom(Loc, - true, pB->end(), pB); - if (Dep.getInst()) { - if (StoreInst *SII = dyn_cast(Dep.getInst())) { - if (!(SII->getMetadata( - LLVMContext::MD_nonaddr_taken_global) != nullptr)) { - doit = false; - } - } - } - } - } - if(doit == false) break; - if (BranchInst *Br = dyn_cast(pB->getTerminator())) { - if (Br->isConditional()) { - if (CmpInst *CI = dyn_cast(Br->getOperand(0))) { - peels.push_back(CI); - BasicBlock::iterator BI = pB->end(); - do { - BI--; - Instruction *ins = cast(BI); - for (User *U : ins->users()) { - Instruction *UI = cast(U); - if (std::find(peels.begin(), peels.end(), UI) - != peels.end()) - peels.push_front(ins); - } - } while(BI != pB->begin()); - } - } - } - } - if(doit == false) {continue;} - - std::vector CInsOperands; - DenseMap CInsDir; - bool OKToFuse = true; - for (std::vector::iterator Hi = - OnePath.begin(), He = OnePath.end(); Hi != He; ++Hi) { - BasicBlock* BB = *Hi; - bool direction = false; - if (BranchInst *Br = dyn_cast(BB->getTerminator())) { - if (Br->isConditional()) { - BasicBlock *FB = Br->getSuccessor(0); - BasicBlock *SB = Br->getSuccessor(1); - if (std::find(OnePath.begin(), OnePath.end(), FB) - != OnePath.end()) { - direction = true; - } else if (std::find(OnePath.begin(), OnePath.end(), SB) - != OnePath.end()){ - direction = false; - } else if (FB == PDBB) { - direction = true; - } else if (SB == PDBB) { - direction = false; - } else if (FB == sndBB) { - direction = true; - } else if (SB == sndBB) { - direction = false; - } else { - OKToFuse = false; - } - - if (CmpInst *CI = dyn_cast(Br->getOperand(0))) { - CInsOperands.push_back(CI); - CInsDir[CI] = direction; - } else { - CInsOperands.push_back(Br->getOperand(0)); - CInsDir[Br->getOperand(0)] = direction; - } - } - } - } - if (OKToFuse == false) { - continue; - } - for (auto SI : CInsOperands) { - AliasAnalysis::Location Loc; - if (LoadInst *LI = dyn_cast(SI)) { - Loc = AA->getLocation(LI); - for (auto *BB : OnePath) { - MemDepResult Dep = MA->getPointerDependencyFrom( - Loc, true, BB->end(), BB); - if (Dep.getInst()) { - if (!isa(Dep.getInst())) { - if (Dep.isDef() || Dep.isClobber()) { - OKToFuse = false; - } - } - } - } - } - } - if (!OKToFuse) {continue;} - - SmallVector OldPreds; - BasicBlock *OldBB = domfst; - for (pred_iterator I = pred_begin(OldBB), E = pred_end(OldBB); - I != E; ++I) { - BasicBlock *P = *I; - OldPreds.push_back(P); - } - - std::list CinsPeel; - for (auto SI : CInsOperands) { - CinsPeel.push_back(SI); - } - for (std::vector::reverse_iterator SI - = CInsOperands.rbegin(), - SE = CInsOperands.rend(); SI != SE; ++SI) { - if (isa(*SI)) { - BasicBlock* BB = cast(*SI)->getParent(); - BasicBlock::iterator BI = BB->end(); - do { - BI--; - Instruction *ins = cast(BI); - for (User *U : ins->users()) { - Instruction *UI = cast(U); - if (std::find(CinsPeel.begin(), CinsPeel.end(), UI) - != CinsPeel.end()) - CinsPeel.push_front(ins); - } - } while (BI != BB->begin()); - } - } - - for (auto *cpeel : CinsPeel) { - for (auto *use : cpeel->users()) { - if (isa(use)) { - OKToFuse = false; - } - } - } - if (OKToFuse == false) continue; - - DEBUG( - std::string pathInStr; - for(auto *OI : OnePath) { - pathInStr.append(" "); - pathInStr.append(std::to_string(RPO[OI])); - } - dbgs() << "Path with both loops = " << pathInStr << '\n'); - - // Split the predecessors of domfst. - // Insert toplvlBB between old predecessors - // and domfst. Collapse all instructions - // from the path that leads to both loops - // into toplvlBB. The compares that need to - // be satisfied to take the path through - // the loops will be peeled and placed in - // a new block: "realTopBB" that will precede toplvlBB. - BasicBlock *toplvlBB = SplitBlockPredecessors( - OldBB, OldPreds, "pflLander", P); - - ValueToValueMapTy Dmap; - std::vector CloneLis; - std::vector OrigLis; - for (auto PI : OnePath) { - BasicBlock* PB = PI; - for(BasicBlock::iterator II = PB->begin(), IE = PB->end(); - II != IE; ++II) { - if (dyn_cast(II)) continue; - Instruction *NewInst = II->clone(); - if (II->hasName()) - NewInst->setName("pfl."+II->getName()); - CloneLis.push_back(NewInst); - Value *IV = II; - Dmap[IV] = NewInst; - OrigLis.push_back(IV); - } - } - - for (unsigned CI = 0; CI < OrigLis.size(); ++CI) { - Value *Ov = OrigLis[CI]; - Value *Nv = CloneLis[CI]; - for (User *U : Ov->users()) { - Instruction *UI = cast(U); - if (Dmap.count(UI) > 0) { - Instruction *upd = cast(Dmap[UI]); - for (unsigned opn = 0; opn < UI->getNumOperands(); ++opn) { - if (UI->getOperand(opn) == Ov) { - upd->setOperand(opn, Nv); - } - } - } - } - toplvlBB->getInstList().insert(toplvlBB->getTerminator(), - cast(Nv)); - } - - Value * T = nullptr; - for(auto SI : CInsOperands){ - if(Dmap.count(SI) == 0) { - // this is the case when the compare is in a - // dominating block as long as we do not handle Phis. - Dmap[SI] = SI; - } - } - SmallVector dPreds; - for (pred_iterator I = pred_begin(toplvlBB), E = pred_end(toplvlBB); - I != E; ++I) { - BasicBlock *P = *I; - dPreds.push_back(P); - } - - // Insert a new block:realTopBB between toplvlBB and - // domfst's preds. This block will contain - // only the compares so that toplvlBB - // can be bypassed when all conditions are - // not satisfied. - BasicBlock *realTopBB = SplitBlockPredecessors( - toplvlBB, dPreds, "pflTop", P); - - // for each compare: - // call recursively - // for each use of ins: - // if (use->parent = toplvl) - // mark use - // Once marked, all of these instructions can be - // moved to realTopBB. - std::set ins_visit; - for(auto SI : CInsOperands) { - markInstructionUses(Dmap[SI], toplvlBB, ins_visit); - } - for (BasicBlock::iterator BI = toplvlBB->begin(), BE = toplvlBB->end(); - BI != BE;) { - Value *X = BI++; - if (ins_visit.find(X) != ins_visit.end()) { - cast(X)->removeFromParent(); - realTopBB->getInstList().insert(realTopBB->getTerminator(), - cast(X)); - } - } - // Make an expression tree of all compare instructions. - unsigned tempnum=0; - for (auto SI : CInsOperands) { - if (T == nullptr) { - if (CInsDir[SI] == true) { - T = BinaryOperator::CreateAnd(Dmap[SI], - ConstantInt::get((SI)->getType(),1), - std::string("pfl").append(std::to_string(tempnum++)), - realTopBB->getTerminator()); - } else { - Constant *C = ConstantInt::get((SI)->getType(),1); - T = BinaryOperator::CreateOr(Dmap[SI], C, - std::string("pfl").append(std::to_string(tempnum++)), - realTopBB->getTerminator()); - } - } else { - if (CInsDir[SI] == true) { - T = BinaryOperator::CreateAnd(Dmap[SI], T, - std::string("pfl").append(std::to_string(tempnum++)), - realTopBB->getTerminator()); - } else { - Value *Not = - BinaryOperator::CreateNot(Dmap[SI], - std::string("pfl").append(std::to_string(tempnum++)), - realTopBB->getTerminator()); - T = BinaryOperator::CreateAnd(Not, T, "", realTopBB->getTerminator()); - } - } - } - realTopBB->getTerminator()->eraseFromParent(); - unsigned It =0; - std::vector NewBlocks; - // NewLoop is the fused loop - Loop *NewLoop = new Loop(); - if (fstL->getParentLoop()) { - fstL->getParentLoop()->addChildLoop(NewLoop); - } else LInfo.addTopLevelLoop(NewLoop); - // Clone all blocks from loop1 and loop2 in NewBlocks - BasicBlock *fstLHd = cast(fstL->getHeader()); - for (Loop::block_iterator In = fstL->block_begin(), - E = fstL->block_end(); In != E; ++In) { - BasicBlock *New = CloneBasicBlock(*In, Dmap, "pf." + Twine(It)); - F.getBasicBlockList().push_back(New); - It++; - Dmap[*In] = New; - NewBlocks.push_back(New); - } - for (Loop::block_iterator In = sndL->block_begin(), - E = sndL->block_end(); In != E; ++In) { - BasicBlock *New = CloneBasicBlock(*In, Dmap, "pf." + Twine(It)); - F.getBasicBlockList().push_back(New); - It++; - Dmap[*In] = New; - NewBlocks.push_back(New); - } - // Insert a new Phi for the loop induction variable - PHINode *newPhi = PHINode::Create(LoopPhis[fstL]->getType(), - 2, "pl", - cast(Dmap[fstL->getHeader()])->begin()); - PHINode *a[2]; - a[0] = cast(Dmap[LoopPhis[fstL]]); - a[1] = cast(Dmap[LoopPhis[sndL]]); - Dmap[LoopPhis[sndL]] = newPhi; - Dmap[LoopPhis[fstL]] = newPhi; - LoopPhis[NewLoop] = newPhi; - BasicBlock *sndHRep = cast(Dmap[sndL->getHeader()]); - BasicBlock *fstHRep = cast(Dmap[fstL->getHeader()]); - - // Update operands in the cloned instructions. - for (unsigned ni = 0; ni < NewBlocks.size(); ++ni) { - for (BasicBlock::iterator bi = NewBlocks[ni]->begin(), - be = NewBlocks[ni]->end(); bi != be; ++bi) { - Instruction *I = cast(bi); - for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) { - Value *Op = I->getOperand(op); - ValueToValueMapTy::iterator It = Dmap.find(Op); - if (It != Dmap.end()) - I->setOperand(op, It->second); - } - } - } - // Adjust branches from old loops. - for (unsigned ni = 0; ni < NewBlocks.size(); ++ni) { - BasicBlock *fstBB = NewBlocks[ni]; - if (BranchInst *Br = dyn_cast(fstBB->getTerminator())) { - for (unsigned succ = 0; succ < Br->getNumSuccessors(); succ++) { - if (Br->getSuccessor(succ) == sndHRep) { - Br->setSuccessor(succ, fstHRep); - } else if (Br->getSuccessor(succ) == fstHRep) { - Br->setSuccessor(succ, sndHRep); - } else if (Br->getSuccessor(succ) == fstLExit) { - Br->setSuccessor(succ, sndHRep); - } else if (Br->getSuccessor(succ) == sndLExit) { - Br->setSuccessor(succ, PDBB); - } - } - } - } - // Clean up some conditional branches. - for (unsigned ni = 0; ni < NewBlocks.size(); ++ni) { - BasicBlock *fstBB = NewBlocks[ni]; - if (BranchInst *Br = dyn_cast(fstBB->getTerminator())) { - if (Br->isConditional() && Br->getNumSuccessors() == 2) { - if (Br->getSuccessor(0) == Br->getSuccessor(1)) { - BasicBlock *brP = Br->getParent(); - BasicBlock *targ = Br->getSuccessor(0); - Br->eraseFromParent(); - BranchInst::Create(targ, brP); - } - } - } - } - // Update the new Phi's incoming blocks/values. - newPhi->addIncoming(cast( - a[0])->getIncomingValue(0), toplvlBB); - - if (Dmap.count(a[1]->getIncomingBlock(0))) { - Value* backBB; - Value * backVal; - backBB = Dmap[a[1]->getIncomingBlock(0)]; - backVal = a[1]->getIncomingValue(0); - newPhi->addIncoming(backVal, cast(backBB)); - } else { - Value* backBB; - Value * backVal; - backBB = Dmap[a[1]->getIncomingBlock(1)]; - backVal = a[1]->getIncomingValue(1); - newPhi->addIncoming(backVal, cast(backBB)); - } - - if (toplvlBB->getTerminator()) { - BasicBlock::iterator BRI = toplvlBB->getTerminator(); - toplvlBB->getInstList().erase(BRI); - } - if (realTopBB->getTerminator()) { - BasicBlock::iterator BRI = realTopBB->getTerminator(); - realTopBB->getInstList().erase(BRI); - } - // Insert an unconditional branch from toplvlBB to new loop. - BranchInst::Create( - cast(Dmap[fstLHd]), toplvlBB); - // Insert a conditional branch in realTopBB: - // true -> go to toplvlBB. - // false -> go to domfst. - if (T) { - BranchInst::Create( - toplvlBB, domfst, T, realTopBB); - } else { - BranchInst::Create(toplvlBB, realTopBB); - } - // Do some dead-code elimination. - for (unsigned ni = 0; ni < NewBlocks.size(); ++ni) { - NewLoop->addBasicBlockToLoop(NewBlocks[ni], LInfo.getBase()); - bool Changed = false; - do { - Changed = false; - BasicBlock *BB = NewBlocks[ni]; - for (BasicBlock::iterator DI = BB->begin(); DI != BB->end(); ) { - Instruction *Inst = DI++; - if (isInstructionTriviallyDead(Inst, nullptr)) { - Inst->eraseFromParent(); - Changed = true; - } - } - } while (Changed); - } - DEBUG( - dbgs() << "Fused "; - fstL->getHeader()->printAsOperand(dbgs(), false); - dbgs() << " and "; - sndL->getHeader()->printAsOperand(dbgs(), false); - dbgs() << " Fn Name: "; - dbgs() << F.getName(); - dbgs() << '\n'; - ); - verifyFunction(F, &dbgs()); - fuseCount++; - // Recurse and try to fuse a sequence of fusion candidates - for (auto loop_pair : LoopFusionMap) { - Loop *keyLoop = loop_pair.first; - Loop *valLoop = loop_pair.second->sndL; - if (valLoop == fstL) { - FuseEdge *nfe = new FuseEdge(keyLoop,NewLoop, - loop_pair.second->domfst, - realTopBB, - loop_pair.second->CommonPDom); - LoopFusionMap[keyLoop] = nfe; - PDT.recalculate(F); - BasicBlock* nhd = NewLoop->getHeader(); - BasicBlock *nlat = NewLoop->getLoopLatch(); - Back[nlat] = nhd; - FuseTwo(keyLoop,LInfo,PDT,Changed); - } - } - break; - } - } - doDFS(F); - Changed = true; -} - -// visit the fusion candidates bottom-up. -void ProactiveFusionImpl::DFSVisitor(Loop* fstL, std::set& visited, - ControlDep& CD, LoopInfo& LInfo, - DominatorTreeBase& PDT, - bool& Changed) { - visited.insert(fstL); - if (LoopFusionMap.find(fstL) == LoopFusionMap.end()) { - return; - } - FuseEdge* fe = LoopFusionMap[fstL]; - Loop *sndL = fe->sndL; - if (std::find(FusedEdges.begin(), FusedEdges.end(), - std::make_pair(fstL->getHeader(), - sndL->getHeader())) != - FusedEdges.end()) - return; - if (LoopFusionMap.find(sndL) == LoopFusionMap.end()) { - FuseTwo(fstL, LInfo, PDT, Changed); - doDFS(F); - Changed = true; - } else { - if (visited.find(sndL) == visited.end()) - DFSVisitor(sndL, visited, CD, LInfo, PDT, Changed); - } -} - -bool ProactiveFusionImpl::checkLoops(LoopInfo& LInfo) { - LoopFusionMap.clear(); - InnerLoops.clear(); - iloopH.clear(); - if (hasLoopsToFuse(F, LInfo) == false) return false; - DA.initDependenceAnalysis(F,AA,SCE,&LInfo); - std::vector torem; - for(std::list::iterator LI = InnerLoops.begin(), - LE = InnerLoops.end(); LI != LE;) { - Loop* IL = *LI++; - std::vector phis; - std::vector ldst; - for (Loop::block_iterator I = IL->block_begin(), - E = IL->block_end(); I != E; ++I) { - for (BasicBlock::iterator BI = (*I)->begin(), - BE = (*I)->end(); BI != BE; ++BI) { - if (isa(*BI) || isa(*BI)) - ldst.push_back(BI); - else if (isa(*BI)) { - phis.push_back(BI); - } - } - } - bool dont = false; - for(unsigned i = 0; i < ldst.size(); ++i) { - Instruction *ins1 = cast(ldst[i]); - for(unsigned j = 0; j < ldst.size(); ++j) { - Instruction *ins2 = cast(ldst[j]); - if(ins1 != ins2) { - if(auto D = DA.depends(ins1,ins2, true)) { - if(!D->isLoopIndependent()) { - torem.push_back(IL); - dont = true; - } - } - } - } - if (dont == true) break; - } - if (dont == false) { - if (phis.size() != 1) { - dont = true; - torem.push_back(IL); - } else { - PHINode *Phi = cast(phis[0]); - // Check that this PHI type is allowed. - if (Phi->getParent() != IL->getHeader()) { - torem.push_back(IL); - dont = true; - } else { - if (hasOutsideLoopUser(IL,Phi)) { - torem.push_back(IL); - dont = true; - } else { - Type *PhiTy = Phi->getType(); - if (!PhiTy->isIntegerTy()) { - torem.push_back(IL); - dont = true; - } else { - if (Phi->getNumIncomingValues() != 2) { - torem.push_back(IL); - dont = true; - } else { - // Check if this is an induction variable. - InductionKind IK = isInductionVariable(Phi); - if (IK != IK_IntInduction) { - torem.push_back(IL); - dont = true; - } else { - // This is the value coming from the preheader. - //Value *StartValue = Phi->getIncomingValueForBlock( - const SCEV *PhiScev = SCE->getSCEV(Phi); - const SCEVAddRecExpr *AR = dyn_cast(PhiScev); - const SCEV *Step = AR->getStepRecurrence(*SCE); - const SCEVConstant *C = dyn_cast(Step); - if (!C) { - torem.push_back(IL); - dont=true; - } else { - const APInt &APStepVal = C->getValue()->getValue(); - // Huge step value - give up. - if (APStepVal.getBitWidth() > 64) { - dont=true; - torem.push_back(IL); - } else { - iterStep[IL] = Step; - BasicBlock* latch = IL->getLoopLatch(); - if (!latch->getTerminator()) dont=true; - else if (!latch->getTerminator()) dont=true; - else if (!isa(latch->getTerminator()->getOperand(0))) - dont=true; - else { - CmpInst* Cins = cast( - latch->getTerminator()->getOperand(0)); - LoopPhis[IL] = Phi; - if (Cins->getOperand(0) == Phi) { - const SCEV *CinsS = SCE->getSCEV(Cins->getOperand(1)); - if (CinsS) { - iterEnd[IL] = CinsS; - } else { - torem.push_back(IL); - dont = true; - } - } else if (Cins->getOperand(1) == Phi) { - const SCEV *CinsS = SCE->getSCEV(Cins->getOperand(0)); - if (CinsS) { - iterEnd[IL] = CinsS; - } else { - torem.push_back(IL); - dont = true; - } - } - } - } - } - } - } - } - } - } - } - } - } - if (torem.size() > 0) { - for (auto *L : torem) { - auto iter = std::find(InnerLoops.begin(), InnerLoops.end(), L); - if (iter != InnerLoops.end()) InnerLoops.erase(iter); - } - } - if (InnerLoops.size() > 1) return true; - else return false; -} - -bool ProactiveFusionImpl::deepCompareSCEV(const SCEV* A, const SCEV* B) { - if (A == B) return true; - unsigned AType = A->getSCEVType(), BType = B->getSCEVType(); - if (AType != BType) return false; - switch (static_cast(AType)) { - case scUnknown: { - const SCEVUnknown *AU = cast(A); - const SCEVUnknown *BU = cast(B); - if (isa(AU->getValue()) && isa(BU->getValue())) { - const LoadInst *ld1 = dyn_cast(AU->getValue()); - const LoadInst *ld2 = dyn_cast(BU->getValue()); - if (ld1->getOperand(0) == ld2->getOperand(0)) return true; - const GetElementPtrInst* gep1 - = dyn_cast(ld1->getOperand(0)); - const GetElementPtrInst* gep2 - = dyn_cast(ld2->getOperand(0)); - if (gep1 && gep2) { - if (gep1->getNumOperands() != gep2->getNumOperands()) return false; - for (unsigned X = 0; X < gep1->getNumOperands(); X++) { - if (gep1->getOperand(X) != gep2->getOperand(X)) { - return false; - } - } - return true; - } - } - return false; - } - case scConstant: { - const SCEVConstant *AC = cast(A); - const SCEVConstant *BC = cast(B); - const APInt &LA = AC->getValue()->getValue(); - const APInt &RA = BC->getValue()->getValue(); - unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth(); - if (LBitWidth != RBitWidth) - return false; - int64_t intValA = LA.getSExtValue(); - int64_t intValB = RA.getSExtValue(); - if (intValA == intValB) return true; - else return false; - - } - case scAddExpr: - case scMulExpr: - case scSMaxExpr: - case scUMaxExpr: { - const SCEVNAryExpr *LA = cast(A); - const SCEVNAryExpr *RA = cast(B); - unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands(); - if (LNumOps != RNumOps) return false; - for (unsigned i = 0; i != LNumOps; ++i) { - bool res = deepCompareSCEV(LA->getOperand(i), RA->getOperand(i)); - if (!res) return false; - } - return true; - } - case scAddRecExpr: { - const SCEVAddRecExpr *LA = cast(A); - const SCEVAddRecExpr *RA = cast(B); - unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands(); - if (LNumOps != RNumOps) return false; - for (unsigned i = 0; i != LNumOps; ++i) { - bool res = deepCompareSCEV(LA->getOperand(i), RA->getOperand(i)); - if (!res) return false; - } - return true; - } - case scTruncate: - case scZeroExtend: - case scSignExtend: { - const SCEVCastExpr *LC = cast(A); - const SCEVCastExpr *RC = cast(B); - // Compare cast expressions by operand. - return deepCompareSCEV(LC->getOperand(), RC->getOperand()); - } - case scUDivExpr: { - const SCEVUDivExpr *LC = cast(A); - const SCEVUDivExpr *RC = cast(B); - // Lexicographically compare udiv expressions. - bool X = deepCompareSCEV(LC->getLHS(), RC->getLHS()); - if (X == false) return false; - return deepCompareSCEV(LC->getRHS(), RC->getRHS()); - } - case scCouldNotCompute: { - return false; - } - } - return false; -} - -bool ProactiveFusionImpl::AnalyzeLoopsAndFuse(Function &F) { - fuseCount = 0; - unsigned iterc = 0; - for(;;) { - iterc++; - LoopPhis.clear(); - SCE->releaseMemory(); - DominatorTree DT; - DT.recalculate(F); - LoopInfo LInfo; - LoopFusionMap.clear(); - InnerLoops.clear(); - iterStep.clear(); - iterEnd.clear(); - iloopH.clear(); - LInfo.createRaw(DT); - SCE->LI = &LInfo; - SCE->DT = &DT; - if (checkLoops(LInfo) == false) return false; - doDFS(F); - ControlDep CD(DT, LInfo); - CD.setFunction(&F); - CD.setupCDMatrix(); - DominatorTreeBase PDT(true); - PDT.recalculate(F); - CD.buildControlDeps(PDT); - CD.buildCDClosure(); - // For i = 0 to InnerLoops.size() - // For j = i+1 to InnerLoops.size() - // Let IL = InnerLoop[i] and JL = InnerLoop[j] - // Let CDom = first common dominator of IL and JL - // Let CPDom = first common post-dominator of IL and JL - // If (CDom belongs-to Loopi && CPDom belongs-to to Loopj) - // If (Loopi == Loopj) Create new fusion edge from IL->JL - - DenseMap > > LoopGroup; - for (std::list::iterator LI = InnerLoops.begin(), - LE = InnerLoops.end(); LI != LE;) { - Loop* IL = *LI++; - if (!(iterStep.count(IL) == 1 && iterEnd.count(IL) == 1)) { - continue; - } - if (LoopPhis.count(IL) == 0) continue; - int64_t StepI = cast(iterStep[IL]) - ->getValue()->getValue().getSExtValue(); - const SCEV* EndI = iterEnd[IL]; - for (std::list::iterator LJ = LI, - LE = InnerLoops.end(); LJ != LE; ++LJ) { - Loop* JL = *LJ; - if (LoopPhis.count(JL) == 0) continue; - if (!(iterStep.count(JL) == 1 && iterEnd.count(JL) == 1)) continue; - if (FusedEdges.find(std::make_pair(IL->getHeader(), - JL->getHeader())) - != FusedEdges.end()) { - continue; - } else if (FusedEdges.find(std::make_pair(JL->getHeader(), - IL->getHeader())) - != FusedEdges.end()) { - continue; - } - int64_t StepJ = cast(iterStep[JL]) - ->getValue()->getValue().getSExtValue(); - if (StepI != StepJ) { - continue; - } - const SCEV* EndJ = iterEnd[JL]; - if (EndI != EndJ) { - if (deepCompareSCEV(EndI,EndJ) == false) { - continue; - } - } - BasicBlock *domfst = DT.findNearestCommonDominator( - IL->getHeader(),JL->getHeader()); - BasicBlock *pdomfst = PDT.findNearestCommonDominator( - IL->getHeader(),JL->getHeader()); - if (LInfo.getLoopFor(domfst) == LInfo.getLoopFor(pdomfst)) { - std::list >& prio = LoopGroup[IL]; - prio.push_back( std::make_pair(JL,1)); - } - } - } - bool *LoopCD2 = new bool[CD.RowC]; - bool *LoopCD3 = new bool[CD.RowC]; - - for (Function::iterator FI = F.begin(), FE = F.end(); - FI != FE; ++FI) { - BasicBlock* SB = FI; - HasCall[SB] = false; - for (BasicBlock::iterator BI = SB->begin(), BE = SB->end(); - BI != BE; ++BI) { - if (isa(BI)) { - HasCall[SB] = true; - break; - } - } - } - - for (std::list::iterator LI = InnerLoops.begin(), - LE = InnerLoops.end(); LI != LE;) { - Loop* IL = *LI++; - std::list >& prio = LoopGroup[IL]; - if (prio.size() == 0) continue; - bool *fst = CD.CDClosure[CD.BBIdMap[IL->getLoopPreheader()]]; - for (std::list >::iterator PI = - prio.begin(), PE = prio.end(); PI != PE; ++PI) { - std::pair& apair = *PI; - bool *nxt = CD.CDClosure[ - CD.BBIdMap[apair.first->getLoopPreheader()]]; - // Obtain Diff(Closure(IL), Closure(JL)) - // And Diff(Closure(JL), Closure(IL)) - std::vector CDL; - std::vector CDK; - for (unsigned CJ = 0; CJ < CD.RowC; CJ++) { - LoopCD2[CJ] = (~(fst[CJ] & nxt[CJ])) & fst[CJ]; - LoopCD3[CJ] = (~(fst[CJ] & nxt[CJ])) & nxt[CJ]; - if (LoopCD2[CJ]) CDL.push_back(CD.IdBBMap[CJ]); - if (LoopCD3[CJ]) CDK.push_back(CD.IdBBMap[CJ]); - } - if (CDK.size() > 5 || CDL.size() > 5) { - // We can control how deep we want to - // allow the control dep program slice - // in turn controlling the compile time - // of this optimization. - continue; - } - BasicBlock* domfst = nullptr; - BasicBlock* domnxt = nullptr; - if (CDK.size() == 0) { - dbgs() << "CDK.size() is " << CDK.size(); - dbgs() << " CDL.size() is " << CDL.size() << '\n'; - domnxt = apair.first->getHeader(); - } - if (CDL.size() == 0) { - domfst = IL->getHeader(); - } - - // We need to ensure that we got only DAGs in the - // control flow... no cycles. - bool bail = false; - for (std::vector::iterator LI = CDL.begin(), - LE = CDL.end(); LI != LE; ++LI) { - for (std::vector::iterator KI = CDK.begin(), - KE = CDK.end(); KI != KE; ++KI) { - if (LInfo.getLoopFor(*LI) != LInfo.getLoopFor(*KI)) { - bail = true; - } - } - } - if (bail) continue; - - if (domfst == nullptr) { - domfst = *CDL.begin(); - } - - for (std::vector::iterator LI = CDL.begin(), - LE = CDL.end(); LI != LE; ++LI) { - BasicBlock *BL = *LI; - if (DT.dominates(BL,domfst)) { - domfst = BL; - } - } - if (domnxt == nullptr) { - domnxt = *CDK.begin(); - } - - for (std::vector::iterator KI = CDK.begin(), - KE = CDK.end(); KI != KE; ++KI) { - BasicBlock *BL = *KI; - if (DT.dominates(BL,domnxt)) { - domnxt = BL; - } - } - Loop *fstL = nullptr; - Loop *sndL = nullptr; - bool doit = true; - // Immeidate post-dominator check until further studies. - if (PDT.getNode(domnxt)->getIDom()->getBlock() == - domfst /*PDT.dominates(domfst,domnxt) && - PDT.dominates(domfst,apair.first->getHeader()) && - DT.dominates(domnxt,domfst)*/) { - std::swap(domnxt,domfst); - if (LoopFusionMap.count(apair.first)) { - if (RPO[IL->getLoopPreheader()] > - RPO[LoopFusionMap[apair.first]->sndL->getLoopPreheader()]) { - doit = false; - } - } - if (doit) { - fstL = apair.first; - sndL = IL; - } - } else if (PDT.getNode(domfst)->getIDom()->getBlock() == - domnxt/*PDT.dominates(domnxt,domfst) && - PDT.dominates(domnxt,IL->getHeader()) && - DT.dominates(domfst,domnxt)*/) { - if (LoopFusionMap.count(IL)) { - if (RPO[apair.first->getLoopPreheader()] > - RPO[LoopFusionMap[IL]->sndL->getLoopPreheader()]) { - doit = false; - } - } - if (doit) { - sndL = apair.first; - fstL = IL; - } - } - if (fstL == nullptr || sndL == nullptr) { - continue; - } else { - SmallVector ExitBBfst; - SmallVector ExitBBsnd; - fstL->getExitBlocks(ExitBBfst); - BasicBlock *fstLExit = ExitBBfst[0]; - sndL->getExitBlocks(ExitBBsnd); - BasicBlock *sndLExit = ExitBBsnd[0]; - { - BasicBlock *sndBB = domnxt; - BasicBlock *PDBB = PDT.getNode(sndBB)->getIDom()->getBlock(); - //Some simple tests - if (!(PDT.dominates(PDBB,fstL->getHeader()) - && PDT.dominates(PDBB,sndL->getHeader()) - && PDT.dominates(PDBB,sndLExit) - && PDT.dominates(PDBB,fstLExit))) { - PDBB = PDT.getNode(sndLExit)->getIDom()->getBlock(); - if (!(PDT.dominates(PDBB,fstL->getHeader()) - && PDT.dominates(PDBB,sndL->getHeader()) - && PDT.dominates(PDBB,sndLExit) - && PDT.dominates(PDBB,fstLExit))) { - continue; - } - } - std::set visited; - bool end_reached = false; - // Ensure that there is a path from IL to JL to ILExit-JLExit's - // common post-dominator, that is passing through - // simple control flow. - DFSBack(end_reached, - PDT.findNearestCommonDominator(domfst,domnxt), - domfst, fstL, sndL, visited); - if (end_reached == false) continue; - } - if (fuseCount == 0) { - // We combine all ld/st from both loops and - // run the dependence test on that. - std::vector ldst; - for (Loop::block_iterator I = fstL->block_begin(), - E = fstL->block_end(); I != E; ++I) { - for (BasicBlock::iterator BI = (*I)->begin(), - BE = (*I)->end(); BI != BE; ++BI) { - if (isa(*BI) || isa(*BI)) - ldst.push_back(BI); - } - } - for (Loop::block_iterator I = sndL->block_begin(), - E = sndL->block_end(); I != E; ++I) { - for (BasicBlock::iterator BI = (*I)->begin(), - BE = (*I)->end(); BI != BE; ++BI) { - if (isa(*BI) || isa(*BI)) - ldst.push_back(BI); - } - } - bool dont = false; - for(unsigned i = 0; i < ldst.size(); ++i) { - Instruction *ins1 = cast(ldst[i]); - for(unsigned j = 0; j < ldst.size(); ++j) { - Instruction *ins2 = cast(ldst[j]); - if(ins1 != ins2) { - if(auto D = DA.depends(ins1,ins2, true)) { - if(!D->isLoopIndependent()) { - dont = true; - } - } - } - } - if (dont == true) continue; - } - } - FuseEdge* fe = new FuseEdge(fstL,sndL,domfst,domnxt, - PDT.findNearestCommonDominator(fstLExit,sndLExit)); - LoopFusionMap[fstL] = fe; - } - } - } - delete [] LoopCD2; - delete [] LoopCD3; - DEBUG( - if (LoopFusionMap.size()) dbgs() << "Loops in " << F.getName() << '\n'; - for (std::list::iterator LI = InnerLoops.begin(), - LE = InnerLoops.end(); LI != LE; ++LI) { - if (LoopFusionMap.find(*LI) != LoopFusionMap.end()) { - dbgs() << "Fuse = "; - (*LI)->getHeader()->printAsOperand(dbgs(), false); - dbgs() << " --- "; - LoopFusionMap[*LI]->sndL->getHeader() - ->printAsOperand(dbgs(),false); - dbgs() << '\n'; - } - }); - std::list workgrp; - for (std::list::iterator LI = InnerLoops.begin(), - LE = InnerLoops.end(); LI != LE; ++LI) { - if (LoopFusionMap.find(*LI) != LoopFusionMap.end()) { - workgrp.push_back(*LI); - } - } - if (workgrp.size() == 0) return true; - std::set visited; - bool Changed = false; - while (workgrp.size() > 0) { - Loop* fstL = workgrp.front(); - workgrp.pop_front(); - DFSVisitor(fstL, visited, CD, LInfo, PDT, Changed); - } - if (!Changed) break; - - LInfo.releaseMemory(); - CD.releaseMemory(); - PDT.releaseMemory(); - DT.releaseMemory(); - DEBUG(dbgs()<<"iterating for more loop fusion opportunities\n"); - } - return true; -} - -bool ProactiveFusion::runOnFunction(Function &F) { - if (F.isDeclaration()) return false; - ScalarEvolution* SE = &getAnalysis(); - MA = &getAnalysis(); - AA = &getAnalysis(); - ProactiveFusionImpl P(F); - P.P = this; - P.MA = MA; - P.AA = AA; - P.SCE = SE; - bool Tr = P.AnalyzeLoopsAndFuse(F); - - if (P.fuseCount > 0) { - dbgs() << "Loops Fusion Count = " - << P.fuseCount << " for " << F.getName() << '\n'; - removeUnreachableBlocks(F); - } - return Tr; -} - -ProactiveFusion::ProactiveFusion() : FunctionPass(ID) {} -char ProactiveFusion::ID = 0; -} - -INITIALIZE_PASS_BEGIN(ProactiveFusion, "ProactiveFusion2", - "Proactive Loop Fusion", false, false) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) -INITIALIZE_PASS_DEPENDENCY(DataLayoutPass) -INITIALIZE_PASS_END(ProactiveFusion, "ProactiveFusion2", - "Proactive Loop Fusion", false, false) - -namespace llvm { -FunctionPass *createProactiveFusionPass() { return new ProactiveFusion(); } -} - Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -1,4 +1,4 @@ - //===-- Scalar.cpp --------------------------------------------------------===// +//===-- Scalar.cpp --------------------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -51,7 +51,6 @@ initializeLoopUnrollPass(Registry); initializeLoopUnswitchPass(Registry); initializeLoopIdiomRecognizePass(Registry); - initializeProactiveFusionPass(Registry); initializeLowerAtomicPass(Registry); initializeLowerExpectIntrinsicPass(Registry); initializeMemCpyOptPass(Registry); Index: tools/gold/gold-plugin.cpp =================================================================== --- tools/gold/gold-plugin.cpp +++ tools/gold/gold-plugin.cpp @@ -87,8 +87,6 @@ OT_BC_ONLY, OT_SAVE_TEMPS }; - static bool proactive_loop_fusion_analysis = false; - static bool proactive_loop_fusion = false; static bool generate_api_file = false; static OutputType TheOutputType = OT_NORMAL; static std::string obj_path; @@ -124,10 +122,6 @@ TheOutputType = OT_SAVE_TEMPS; } else if (opt == "disable-output") { TheOutputType = OT_DISABLE; - } else if (opt == "proactive-loop-fusion-analysis") { - proactive_loop_fusion_analysis = true; - } else if (opt == "proactive-loop-fusion") { - proactive_loop_fusion = true; } else { // Save this option to pass to the code generator. // ParseCommandLineOptions() expects argv[0] to be program name. Lazily @@ -711,9 +705,6 @@ PMB.VerifyOutput = true; PMB.LoopVectorize = true; PMB.SLPVectorize = true; - PMB.ProactiveLoopOverrider = true; - PMB.ProactiveLoopFusionAnalysis = options::proactive_loop_fusion_analysis; - PMB.ProactiveLoopFusion = options::proactive_loop_fusion; PMB.populateLTOPassManager(passes, &TM); passes.run(M); }