Index: lib/Transforms/IPO/GlobalOpt.cpp =================================================================== --- lib/Transforms/IPO/GlobalOpt.cpp +++ lib/Transforms/IPO/GlobalOpt.cpp @@ -28,6 +28,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -69,6 +70,7 @@ struct GlobalOpt : public ModulePass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); } static char ID; // Pass identification, replacement for typeid GlobalOpt() : ModulePass(ID) { @@ -85,7 +87,9 @@ bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI, const GlobalStatus &GS); bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn); - + + bool isPointerValueDeadOnEntryToFunction(const Function *F, GlobalValue *GV); + TargetLibraryInfo *TLI; SmallSet NotDiscardableComdats; }; @@ -95,6 +99,7 @@ INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt", "Global Variable Optimizer", false, false) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(GlobalOpt, "globalopt", "Global Variable Optimizer", false, false) @@ -1725,46 +1730,99 @@ return ProcessInternalGlobal(GV, GVI, GS); } +// main() is known not to be recursive and to only be called once. +// FIXME: This is gross. Fix this properly once and for all by changing clang to +// add norecurse to main(). +static bool isFunctionMain(const Function *F) { + return F->getName() == "main" && F->hasExternalLinkage(); +} + +bool GlobalOpt::isPointerValueDeadOnEntryToFunction(const Function *F, GlobalValue *GV) { + // Find all uses of GV. We expect them all to be in F, and if we can't + // identify any of the uses we bail out. + // + // On each of these uses, identify if the memory that GV points to is + // used/required/live at the start of the function. If it is not, for example + // if the first thing the function does is store to the GV, the GV can + // possibly be demoted. + // + // We don't do an exhaustive search for memory operations - simply look + // through bitcasts as they're quite common and benign. + const DataLayout &DL = GV->getParent()->getDataLayout(); + SmallVector Loads, Stores; + for (auto *U : GV->users()) { + if (Operator::getOpcode(U) == Instruction::BitCast) { + auto *OrigType = cast(U->getOperand(0)->getType()); + auto *NewType = cast(U->getType()); + + for (auto *UU : U->users()) { + if (isa(UU)) { + if (DL.getTypeStoreSize(NewType->getElementType()) > + DL.getTypeStoreSize(OrigType->getElementType())) + // This load is larger than the GV; this means that it may access + // memory past the end of any preceding stores which renders our + // analysis useless. We must bail out here. + return false; + + Loads.push_back(cast(UU)); + } else if (isa(UU)) { + if (DL.getTypeStoreSize(NewType->getElementType()) < + DL.getTypeStoreSize(OrigType->getElementType())) + // This store is smaller than the GV; this means it only partially + // writes memory that subsequent loads may depend on. This may + // cause us to think that a load doesn't depend on memory at the + // start of the function when it actually does. We must bail out. + return false; + + Stores.push_back(cast(UU)); + } else { + return false; + } + } + continue; + } + + Instruction *I = dyn_cast(U); + if (!I) + return false; + assert(I->getParent()->getParent() == F); + + if (isa(I)) { + Loads.push_back(I); + } else if (isa(I)) { + Stores.push_back(I); + } else { + return false; + } + } + + // We have identified all uses of GV into loads and stores. Now check if all + // of them are known not to depend on the value of the global at the function + // entry point. We do this by ensuring that every load is dominated by at + // least one store. + auto &DT = getAnalysis(*const_cast(F)) + .getDomTree(); + + for (auto *L : Loads) { + if (!std::all_of(Stores.begin(), Stores.end(), + [&](Instruction *S) { return DT.dominates(S, L); })) + return false; + } + + // All loads have known dependences inside F, so the global can be localized. + return true; +} + /// ProcessInternalGlobal - Analyze the specified global variable and optimize /// it if possible. If we make a change, return true. bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, Module::global_iterator &GVI, const GlobalStatus &GS) { auto &DL = GV->getParent()->getDataLayout(); - // If this is a first class global and has only one accessing function - // and this function is main (which we know is not recursive), we replace - // the global with a local alloca in this function. - // - // NOTE: It doesn't make sense to promote non-single-value types since we - // are just replacing static memory to stack memory. - // - // If the global is in different address space, don't bring it to stack. - if (!GS.HasMultipleAccessingFunctions && - GS.AccessingFunction && !GS.HasNonInstructionUser && - GV->getType()->getElementType()->isSingleValueType() && - GS.AccessingFunction->getName() == "main" && - GS.AccessingFunction->hasExternalLinkage() && - GV->getType()->getAddressSpace() == 0) { - DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV); - Instruction &FirstI = const_cast(*GS.AccessingFunction - ->getEntryBlock().begin()); - Type *ElemTy = GV->getType()->getElementType(); - // FIXME: Pass Global's alignment when globals have alignment - AllocaInst *Alloca = new AllocaInst(ElemTy, nullptr, - GV->getName(), &FirstI); - if (!isa(GV->getInitializer())) - new StoreInst(GV->getInitializer(), Alloca, &FirstI); - - GV->replaceAllUsesWith(Alloca); - GV->eraseFromParent(); - ++NumLocalized; - return true; - } - // If the global is never loaded (but may be stored to), it is dead. // Delete it now. if (!GS.IsLoaded) { - DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV); + DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n"); bool Changed; if (isLeakCheckerRoot(GV)) { @@ -1849,6 +1907,37 @@ } } } + // If this is a first class global and has only one accessing function + // and this function is main (which we know is not recursive), we replace + // the global with a local alloca in this function. + // + // NOTE: It doesn't make sense to promote non-single-value types since we + // are just replacing static memory to stack memory. + // + // If the global is in different address space, don't bring it to stack. + if (!GS.HasMultipleAccessingFunctions && + GS.AccessingFunction && !GS.HasNonInstructionUser && + GV->getType()->getElementType()->isSingleValueType() && + GV->getType()->getAddressSpace() == 0 && + !GV->isExternallyInitialized() && + (isFunctionMain(GS.AccessingFunction) || + (GS.AccessingFunction->doesNotRecurse() && + isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV))) ) { + DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n"); + Instruction &FirstI = const_cast(*GS.AccessingFunction + ->getEntryBlock().begin()); + Type *ElemTy = GV->getType()->getElementType(); + // FIXME: Pass Global's alignment when globals have alignment + AllocaInst *Alloca = new AllocaInst(ElemTy, nullptr, + GV->getName(), &FirstI); + if (!isa(GV->getInitializer())) + new StoreInst(GV->getInitializer(), Alloca, &FirstI); + + GV->replaceAllUsesWith(Alloca); + GV->eraseFromParent(); + ++NumLocalized; + return true; + } return false; } @@ -3042,7 +3131,7 @@ auto &DL = M.getDataLayout(); TLI = &getAnalysis().getTLI(); - + bool LocalChange = true; while (LocalChange) { LocalChange = false; Index: test/Transforms/GlobalOpt/global-demotion.ll =================================================================== --- /dev/null +++ test/Transforms/GlobalOpt/global-demotion.ll @@ -0,0 +1,80 @@ +; RUN: opt -functionattrs -globalopt -S < %s | FileCheck %s + +@G1 = internal global i32 5 +@G2 = internal global i32 5 +@G3 = internal global i32 5 +@G4 = internal global i32 5 +@G5 = internal global i32 5 + +; CHECK-LABEL: @test1 +define internal i32 @test1() { +; CHECK-NOT: @G1 + store i32 4, i32* @G1 + %a = load i32, i32* @G1 +; CHECK: ret + ret i32 %a +} + +; The load comes before the store which makes @G2 live before the call. +; CHECK-LABEL: @test2 +define internal i32 @test2() { +; CHECK-NOT: %G2 + %a = load i32, i32* @G2 + store i32 4, i32* @G2 +; CHECK: ret + ret i32 %a +} + +; This global is indexed by a GEP - this makes it partial alias and we bail out. +; FIXME: We don't actually have to bail out in this case. + +; CHECK-LABEL: @test3 +define internal i32 @test3() { +; CHECK-NOT: %G3 + %x = getelementptr i32,i32* @G3, i32 0 + %a = load i32, i32* %x + store i32 4, i32* @G3 +; CHECK: ret + ret i32 %a +} + +; The global is casted away to a larger type then loaded. The store only partially +; covers the load, so we must not demote. + +; CHECK-LABEL: @test4 +define internal i32 @test4() { +; CHECK-NOT: %G4 + store i32 4, i32* @G4 + %x = bitcast i32* @G4 to i64* + %a = load i64, i64* %x + %b = trunc i64 %a to i32 +; CHECK: ret + ret i32 %b +} + +; The global is casted away to a smaller type then loaded. This one is fine. + +; CHECK-LABEL: @test5 +define internal i32 @test5() { +; CHECK-NOT: @G5 + store i32 4, i32* @G5 + %x = bitcast i32* @G5 to i16* + %a = load i16, i16* %x + %b = zext i16 %a to i32 +; CHECK: ret + ret i32 %b +} + +define i32 @main() { + %a = call i32 @test1() + %b = call i32 @test2() + %c = call i32 @test3() + %d = call i32 @test4() + %e = call i32 @test5() + + %x = or i32 %a, %b + %y = or i32 %x, %c + %z = or i32 %y, %d + %w = or i32 %z, %e + ret i32 %w +} Index: test/Transforms/GlobalOpt/metadata.ll =================================================================== --- test/Transforms/GlobalOpt/metadata.ll +++ test/Transforms/GlobalOpt/metadata.ll @@ -8,6 +8,7 @@ define i32 @main(i32 %argc, i8** %argv) { ; CHECK-LABEL: @main( ; CHECK: %G = alloca + %1 = load i8**, i8*** @G store i8** %argv, i8*** @G ret i32 0 } Index: test/Transforms/GlobalOpt/unnamed-addr.ll =================================================================== --- test/Transforms/GlobalOpt/unnamed-addr.ll +++ test/Transforms/GlobalOpt/unnamed-addr.ll @@ -34,6 +34,7 @@ define void @baz(i32 %x) { entry: + %0 = load i32, i32* @a, align 4 store i32 %x, i32* @a, align 4 store i32 %x, i32* @b, align 4 store i32 %x, i32* @c, align 4 Index: test/Transforms/MergeFunc/crash2.ll =================================================================== --- test/Transforms/MergeFunc/crash2.ll +++ test/Transforms/MergeFunc/crash2.ll @@ -8,11 +8,11 @@ ; causes an assert in the ValueHandle call back because we are RAUWing with a ; different type (AllocaInst) than its key type (GlobalValue). -@G = internal global i8** null +@G = private global i8** null @G2 = internal global i8** null define i32 @main(i32 %argc, i8** %argv) { -; CHECK: alloca +; CHECK-NOT: @G store i8** %argv, i8*** @G ret i32 0 }