Index: lib/Transforms/IPO/GlobalOpt.cpp =================================================================== --- lib/Transforms/IPO/GlobalOpt.cpp +++ lib/Transforms/IPO/GlobalOpt.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InvariantInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CallSite.h" @@ -70,6 +71,7 @@ struct GlobalOpt : public ModulePass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); AU.addRequired(); } static char ID; // Pass identification, replacement for typeid @@ -100,6 +102,7 @@ INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt", "Global Variable Optimizer", false, false) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(InvariantInfoMarkerPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(GlobalOpt, "globalopt", "Global Variable Optimizer", false, false) @@ -2819,10 +2822,64 @@ } } +static void processInvariantIntrinsics(InvariantInfo &InvInfo, BasicBlock *BB); + +/// Process invariant intrinsics in the given function. +static void processInvariantIntrinsics(InvariantInfo &InvInfo, Function *F) { + for (Function::iterator BB = F->begin(), BE = F->end(); BB != BE; ++BB) + processInvariantIntrinsics(InvInfo, &*BB); +} + +/// Scan the given block to process invariant intrinsics, tracing whatever +/// call chain that can be traced. +void processInvariantIntrinsics(InvariantInfo &InvInfo, BasicBlock *BB) { + for (BasicBlock::iterator CurInst = BB->begin(), InstE = BB->end(); + CurInst != InstE; ++CurInst) { + if (isa(CurInst) || isa(CurInst)) { + CallSite CS(&*CurInst); + + if (IntrinsicInst *II = dyn_cast(CurInst)) + processInvariantIntrinsic(II, InvInfo); + + // Ignore debug info, inline asm, intrinsics, ... + if (isa(CS.getInstruction()) || + isa(CS.getCalledValue()) || + dyn_cast(CS.getInstruction())) { + ++CurInst; + continue; + } + + Function *Callee = CS.getCalledFunction(); + if (Callee && !Callee->isDeclaration()) + processInvariantIntrinsics(InvInfo, Callee); + + if (InvokeInst *II = dyn_cast(CurInst)) { + for (unsigned i = 0; i < II->getNumSuccessors(); ++i) + processInvariantIntrinsics(InvInfo, II->getSuccessor(i)); + break; + } + } else if (TerminatorInst *TI = dyn_cast(CurInst)) { + for (unsigned i = 0; i < TI->getNumSuccessors(); ++i) + processInvariantIntrinsics(InvInfo, TI->getSuccessor(i)); + break; + } + } +} + /// Evaluate static constructors in the function, if we can. Return true if we /// can, false otherwise. static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, - const TargetLibraryInfo *TLI) { + const TargetLibraryInfo *TLI, + InvariantInfo &InvInfo) { + + // Scan the function's blocks to process invariant (start) intrinsics. + // This will mark writeonce global variables as written, and is necessary to + // do here because EvaluateBlock(), via EvaluateFunction() (below), could + // exit before processing the invariant intrinsic call, e.g., if a call to + // a function declaration that we cannot constant fold occurs before the + // intrinsic call. + processInvariantIntrinsics(InvInfo, F); + // Call the function. Evaluator Eval(DL, TLI); Constant *RetValDummy; @@ -3175,6 +3232,7 @@ auto &DL = M.getDataLayout(); TLI = &getAnalysis().getTLI(); + auto &InvInfo = getAnalysis().getInvariantInfo(); bool LocalChange = true; while (LocalChange) { @@ -3199,7 +3257,7 @@ // Optimize global_ctors list. LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) { - return EvaluateStaticConstructor(F, DL, TLI); + return EvaluateStaticConstructor(F, DL, TLI, InvInfo); }); // Optimize non-address-taken globals. Index: test/Transforms/LoadElim/invariant.ll =================================================================== --- test/Transforms/LoadElim/invariant.ll +++ test/Transforms/LoadElim/invariant.ll @@ -1,8 +1,10 @@ -; RUN: opt < %s -gvn -S | FileCheck %s +; RUN: opt < %s -gvn -S | FileCheck %s --check-prefix=CHECKL --check-prefix=CHECK +; RUN: opt < %s -globalopt -gvn -S | FileCheck %s --check-prefix=CHECKG --check-prefix=CHECK ; On a given pointer to allocated memory, invariant_start/end intrinsic ; calls indicate that the memory can be considered constant for load ; elimination purposes. + define void @example() { entry: %i = alloca i32 @@ -27,6 +29,48 @@ ret void } +; Example with a global variable instead of an alloca instruction, +; with an invariant_start call during global construction and +; with no invariant_end call. + +@gi = internal global i32 0 +@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @global_constructor, i8* null }] + +define void @globalex() { +entry: + call void @foo(i32* @gi) ; #2 + %0 = load i32, i32* @gi ; Not clobbered by #2; Unknown dependence; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %0) + call void @foo(i32* @gi) ; #3 + %1 = load i32, i32* @gi ; Not clobbered by #3; Merged into %0. + ; CHECKL: load i32, i32* + ; CHECKG-NOT: load i32, i32* + call void @bar(i32 %1) + call void @foo(i32* @gi) ; #4 + %2 = load i32, i32* @gi ; Not clobbered by #4, nor #3; Merged into %0. + ; CHECKL: load i32, i32* + ; CHECKG-NOT: load i32, i32* + call void @bar(i32 %2) + ret void +} + +define internal void @construct_gi() { +entry: + call void @foo(i32* @gi) ; #1 -- preserves load instructions from over-simplification. + %0 = bitcast i32* @gi to i8* + %1 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %0) + ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* + ret void +} + +define internal void @global_constructor() { +entry: + call void @construct_gi() + ret void +} + +; Helper function declarations. declare void @bar(i32) declare void @foo(i32*) declare {}* @llvm.invariant.start(i64, i8* nocapture)