Index: lib/Transforms/IPO/GlobalOpt.cpp =================================================================== --- lib/Transforms/IPO/GlobalOpt.cpp +++ lib/Transforms/IPO/GlobalOpt.cpp @@ -16,10 +16,12 @@ #include "llvm/Transforms/IPO.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -28,6 +30,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -69,6 +72,8 @@ struct GlobalOpt : public ModulePass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); + AU.addRequired(); } static char ID; // Pass identification, replacement for typeid GlobalOpt() : ModulePass(ID) { @@ -85,9 +90,14 @@ bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI, const GlobalStatus &GS); bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn); - + + bool isFunctionKnownNotToRecurse(const Function *F); + bool isPointerValueDeadOnEntryToFunction(const Function *F, GlobalValue *GV); + TargetLibraryInfo *TLI; + CallGraph *CG; SmallSet NotDiscardableComdats; + DenseMap FunctionsKnownNotToRecurse; }; } @@ -95,6 +105,8 @@ INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt", "Global Variable Optimizer", false, false) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(GlobalOpt, "globalopt", "Global Variable Optimizer", false, false) @@ -1725,46 +1737,117 @@ return ProcessInternalGlobal(GV, GVI, GS); } +static bool functionDoesNotCallItself(CallGraphNode *CGN) { + for (auto &I : *CGN) + if (I.second == CGN) + return false; + return true; +} + +// main() is known not to be recursive and to only be called once. +static bool isFunctionMain(const Function *F) { + return F->getName() == "main" && F->hasExternalLinkage(); +} + +bool GlobalOpt::isFunctionKnownNotToRecurse(const Function *F) { + + if (FunctionsKnownNotToRecurse.empty()) + for (scc_iterator I = scc_begin(CG); !I.isAtEnd(); ++I) { + const std::vector &SCC = *I; + assert(!SCC.empty() && "SCC with no functions?"); + Function *F = SCC[0]->getFunction(); + + // For a function not to recurse, it must be in its own SCC and must not + // call itself, or (conservatively) be callable by external functions. + if (SCC.size() != 1 || !F || !F->hasLocalLinkage()) + continue; + + FunctionsKnownNotToRecurse[F] = functionDoesNotCallItself(SCC[0]); + } + + return FunctionsKnownNotToRecurse[F]; +} + +bool GlobalOpt::isPointerValueDeadOnEntryToFunction(const Function *F, GlobalValue *GV) { + // Find all uses of GV. We expect them all to be in F, and if we can't + // identify any of the uses we bail out. + // + // On each of these uses, identify if the memory that GV points to is + // used/required/live at the start of the function. If it is not, for example + // if the first thing the function does is store to the GV, the GV can + // possibly be demoted. + // + // We don't do an exhaustive search for memory operations - simply look + // through bitcasts as they're quite common and benign. + const DataLayout &DL = GV->getParent()->getDataLayout(); + SmallVector Loads, Stores; + for (auto *U : GV->users()) { + // FIXME: Deal with constantexpr bitcasts? + Instruction *I = dyn_cast(U); + if (!I) + return false; + assert(I->getParent()->getParent() == F); + + if (isa(I)) { + Loads.push_back(I); + } else if (I->getOpcode() == Instruction::BitCast) { + auto *OrigType = cast(I->getOperand(0)->getType()); + auto *NewType = cast(I->getType()); + if (DL.getTypeStoreSize(NewType->getElementType()) > + DL.getTypeStoreSize(OrigType->getElementType())) + // This load is larger than the GV; this means it will only partially + // alias (not must alias) with any preceding stores. + return false; + + for (auto *UU : I->users()) { + LoadInst *LI = dyn_cast(UU); + if (!LI) + return false; + // FIXME: We could catch more cases by inspecting stores to bitcasted + // addresses. We'd have to make sure the bitcasted type was >= the + // original type though, else we'd end up with a *partial* store which + // may leave bits of the value live and break our model. + Loads.push_back(LI); + } + } else if (isa(I)) { + Stores.push_back(I); + } else { + return false; + } + } + + // We have identified all uses of GV into loads and stores. Now check if all + // of them are known not to depend on the value of the global at the function + // entry point. We do this by ensuring that every load is dominated by at + // least one store. + auto &DT = getAnalysis(*const_cast(F)) + .getDomTree(); + + for (auto *L : Loads) { + bool Resolved = false; + for (auto *S : Stores) + if (DT.dominates(S, L)) { + Resolved = true; + break; + } + if (!Resolved) + return false; + } + + // All loads have known dependences inside F, so the global can be localized. + return true; +} + /// ProcessInternalGlobal - Analyze the specified global variable and optimize /// it if possible. If we make a change, return true. bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, Module::global_iterator &GVI, const GlobalStatus &GS) { auto &DL = GV->getParent()->getDataLayout(); - // If this is a first class global and has only one accessing function - // and this function is main (which we know is not recursive), we replace - // the global with a local alloca in this function. - // - // NOTE: It doesn't make sense to promote non-single-value types since we - // are just replacing static memory to stack memory. - // - // If the global is in different address space, don't bring it to stack. - if (!GS.HasMultipleAccessingFunctions && - GS.AccessingFunction && !GS.HasNonInstructionUser && - GV->getType()->getElementType()->isSingleValueType() && - GS.AccessingFunction->getName() == "main" && - GS.AccessingFunction->hasExternalLinkage() && - GV->getType()->getAddressSpace() == 0) { - DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV); - Instruction &FirstI = const_cast(*GS.AccessingFunction - ->getEntryBlock().begin()); - Type *ElemTy = GV->getType()->getElementType(); - // FIXME: Pass Global's alignment when globals have alignment - AllocaInst *Alloca = new AllocaInst(ElemTy, nullptr, - GV->getName(), &FirstI); - if (!isa(GV->getInitializer())) - new StoreInst(GV->getInitializer(), Alloca, &FirstI); - - GV->replaceAllUsesWith(Alloca); - GV->eraseFromParent(); - ++NumLocalized; - return true; - } - // If the global is never loaded (but may be stored to), it is dead. // Delete it now. if (!GS.IsLoaded) { - DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV); + DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n"); bool Changed; if (isLeakCheckerRoot(GV)) { @@ -1849,6 +1932,37 @@ } } } + // If this is a first class global and has only one accessing function + // and this function is main (which we know is not recursive), we replace + // the global with a local alloca in this function. + // + // NOTE: It doesn't make sense to promote non-single-value types since we + // are just replacing static memory to stack memory. + // + // If the global is in different address space, don't bring it to stack. + if (!GS.HasMultipleAccessingFunctions && + GS.AccessingFunction && !GS.HasNonInstructionUser && + GV->getType()->getElementType()->isSingleValueType() && + GV->getType()->getAddressSpace() == 0 && + !GV->isExternallyInitialized() && + (isFunctionMain(GS.AccessingFunction) || + (isFunctionKnownNotToRecurse(GS.AccessingFunction) && + isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV))) ) { + DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n"); + Instruction &FirstI = const_cast(*GS.AccessingFunction + ->getEntryBlock().begin()); + Type *ElemTy = GV->getType()->getElementType(); + // FIXME: Pass Global's alignment when globals have alignment + AllocaInst *Alloca = new AllocaInst(ElemTy, nullptr, + GV->getName(), &FirstI); + if (!isa(GV->getInitializer())) + new StoreInst(GV->getInitializer(), Alloca, &FirstI); + + GV->replaceAllUsesWith(Alloca); + GV->eraseFromParent(); + ++NumLocalized; + return true; + } return false; } @@ -1910,7 +2024,10 @@ bool inComdat = C && NotDiscardableComdats.count(C); F->removeDeadConstantUsers(); if ((!inComdat || F->hasLocalLinkage()) && F->isDefTriviallyDead()) { - F->eraseFromParent(); + auto *CGN = (*CG)[F]; + CG->getExternalCallingNode()->removeAnyCallEdgeTo(CGN); + CGN->removeAllCalledFunctions(); + delete CG->removeFunctionFromModule(CGN); Changed = true; ++NumFnDeleted; } else if (F->hasLocalLinkage()) { @@ -3042,7 +3159,8 @@ auto &DL = M.getDataLayout(); TLI = &getAnalysis().getTLI(); - + CG = &getAnalysis().getCallGraph(); + bool LocalChange = true; while (LocalChange) { LocalChange = false; Index: test/Transforms/GlobalOpt/global-demotion.ll =================================================================== --- /dev/null +++ test/Transforms/GlobalOpt/global-demotion.ll @@ -0,0 +1,80 @@ +; RUN: opt -globalopt -S < %s | Filecheck %s + +@G1 = internal global i32 5 +@G2 = internal global i32 5 +@G3 = internal global i32 5 +@G4 = internal global i32 5 +@G5 = internal global i32 5 + +; CHECK-LABEL: @test1 +define internal i32 @test1() { +; CHECK-NOT: @G1 + store i32 4, i32* @G1 + %a = load i32, i32* @G1 +; CHECK: ret + ret i32 %a +} + +; The load comes before the store which makes @G2 live before the call. +; CHECK-LABEL: @test2 +define internal i32 @test2() { +; CHECK-NOT: %G2 + %a = load i32, i32* @G2 + store i32 4, i32* @G2 +; CHECK: ret + ret i32 %a +} + +; This global is indexed by a GEP - this makes it partial alias and we bail out. +; FIXME: We don't actually have to bail out in this case. + +; CHECK-LABEL: @test3 +define internal i32 @test3() { +; CHECK-NOT: %G3 + %x = getelementptr i32,i32* @G3, i32 0 + %a = load i32, i32* %x + store i32 4, i32* @G3 +; CHECK: ret + ret i32 %a +} + +; The global is casted away to a larger type then loaded. The store only partially +; covers the load, so we must not demote. + +; CHECK-LABEL: @test4 +define internal i32 @test4() { +; CHECK-NOT: %G4 + store i32 4, i32* @G4 + %x = bitcast i32* @G4 to i64* + %a = load i64, i64* %x + %b = trunc i64 %a to i32 +; CHECK: ret + ret i32 %b +} + +; The global is casted away to a smaller type then loaded. This one is fine. + +; CHECK-LABEL: @test5 +define internal i32 @test5() { +; CHECK-NOT: @G5 + store i32 4, i32* @G5 + %x = bitcast i32* @G5 to i16* + %a = load i16, i16* %x + %b = zext i16 %a to i32 +; CHECK: ret + ret i32 %b +} + +define i32 @main() { + %a = call i32 @test1() + %b = call i32 @test2() + %c = call i32 @test3() + %d = call i32 @test4() + %e = call i32 @test5() + + %x = or i32 %a, %b + %y = or i32 %x, %c + %z = or i32 %y, %d + %w = or i32 %z, %e + ret i32 %w +} \ No newline at end of file Index: test/Transforms/GlobalOpt/metadata.ll =================================================================== --- test/Transforms/GlobalOpt/metadata.ll +++ test/Transforms/GlobalOpt/metadata.ll @@ -8,6 +8,7 @@ define i32 @main(i32 %argc, i8** %argv) { ; CHECK-LABEL: @main( ; CHECK: %G = alloca + %1 = load i8**, i8*** @G store i8** %argv, i8*** @G ret i32 0 } Index: test/Transforms/GlobalOpt/unnamed-addr.ll =================================================================== --- test/Transforms/GlobalOpt/unnamed-addr.ll +++ test/Transforms/GlobalOpt/unnamed-addr.ll @@ -34,6 +34,7 @@ define void @baz(i32 %x) { entry: + %0 = load i32, i32* @a, align 4 store i32 %x, i32* @a, align 4 store i32 %x, i32* @b, align 4 store i32 %x, i32* @c, align 4 Index: test/Transforms/MergeFunc/crash2.ll =================================================================== --- test/Transforms/MergeFunc/crash2.ll +++ test/Transforms/MergeFunc/crash2.ll @@ -8,11 +8,11 @@ ; causes an assert in the ValueHandle call back because we are RAUWing with a ; different type (AllocaInst) than its key type (GlobalValue). -@G = internal global i8** null +@G = private global i8** null @G2 = internal global i8** null define i32 @main(i32 %argc, i8** %argv) { -; CHECK: alloca +; CHECK-NOT: @G store i8** %argv, i8*** @G ret i32 0 }