Index: include/llvm/Analysis/Passes.h =================================================================== --- include/llvm/Analysis/Passes.h +++ include/llvm/Analysis/Passes.h @@ -26,6 +26,13 @@ //===--------------------------------------------------------------------===// // + // createStaticGlobalAAPass - This pass provides alias info for + // static global values that do not have their addresses taken. + // + ImmutablePass *createStaticGlobalAliasAnalysisPass(); + + //===--------------------------------------------------------------------===// + // // createGlobalsModRefPass - This pass provides alias and mod/ref info for // global values that do not have their addresses taken. // Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -260,6 +260,7 @@ void initializeStackProtectorPass(PassRegistry&); void initializeStackColoringPass(PassRegistry&); void initializeStackSlotColoringPass(PassRegistry&); +void initializeStaticGlobalAliasAnalysisPass(PassRegistry&); void initializeStraightLineStrengthReducePass(PassRegistry &); void initializeStripDeadDebugInfoPass(PassRegistry&); void initializeStripDeadPrototypesPassPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -133,6 +133,7 @@ (void) llvm::createSCCPPass(); (void) llvm::createScalarReplAggregatesPass(); (void) llvm::createSingleLoopExtractorPass(); + (void) llvm::createStaticGlobalAliasAnalysisPass(); (void) llvm::createStripSymbolsPass(); (void) llvm::createStripNonDebugSymbolsPass(); (void) llvm::createStripDeadDebugInfoPass(); Index: lib/Analysis/Analysis.cpp =================================================================== --- lib/Analysis/Analysis.cpp +++ lib/Analysis/Analysis.cpp @@ -69,6 +69,7 @@ initializeTargetTransformInfoWrapperPassPass(Registry); initializeTypeBasedAliasAnalysisPass(Registry); initializeScopedNoAliasAAPass(Registry); + initializeStaticGlobalAliasAnalysisPass(Registry); } void LLVMInitializeAnalysis(LLVMPassRegistryRef R) { Index: lib/Analysis/CMakeLists.txt =================================================================== --- lib/Analysis/CMakeLists.txt +++ lib/Analysis/CMakeLists.txt @@ -61,6 +61,7 @@ Trace.cpp TypeBasedAliasAnalysis.cpp ScopedNoAliasAA.cpp + StaticGlobalsAA.cpp ValueTracking.cpp ADDITIONAL_HEADER_DIRS Index: lib/Analysis/StaticGlobalsAA.cpp =================================================================== --- /dev/null +++ lib/Analysis/StaticGlobalsAA.cpp @@ -0,0 +1,163 @@ +//===- StaticGlobalsAA.cpp - Simple Alias Analysis for Static Globals -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +using namespace llvm; + +#define DEBUG_TYPE "staticglobal-aa" + +namespace { + + // This global tracker knows that global values are being checked for, + // whereas the CaptureTracker APIs are not setup to handle global values. + // This means the tracker needs to check whether it the User is an + // Instruction. If it is not, its probably a ConstantExpr which includes + // GEPs that use GlobalValues; in this case we can check here whether a GEP + // is being stored and thus escaping the global. + struct GlobalCaptureTracker : public CaptureTracker { + explicit GlobalCaptureTracker() : Captured(false) {} + + void tooManyUses() override { Captured = true; } + + bool shouldExplore(const Use *U) override { + if (isa(U->getUser())) + return true; + else if (auto *CE = dyn_cast(U->getUser())) { + for (const auto &CEUser : CE->users()) { + if (auto *Store = dyn_cast(CEUser)) { + if (Store->getOperand(0) == CE) { + Captured = true; + break; + } + } + } + } + return false; + } + + bool captured(const Use *U) override { + Captured = true; + return true; + } + + bool Captured; + }; + + class StaticGlobalAliasAnalysis : public ImmutablePass, public AliasAnalysis { + + SmallPtrSet NonAddressTakenGlobals; + + Module *M; + + void AnalyzeGlobals(); + + public: + static char ID; + StaticGlobalAliasAnalysis() : ImmutablePass(ID) { + initializeStaticGlobalAliasAnalysisPass(*PassRegistry::getPassRegistry()); + } + + bool doInitialization(Module &M) override { + InitializeAliasAnalysis(this, &M.getDataLayout()); + this->M = &M; + return true; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AliasAnalysis::getAnalysisUsage(AU); + AU.setPreservesAll(); + } + + AliasResult alias(const Location &LocA, const Location &LocB) override; + + /// getAdjustedAnalysisPointer - This method is used when a pass implements + /// an analysis interface through multiple inheritance. If needed, it + /// should override this to adjust the this pointer as needed for the + /// specified pass info. + void *getAdjustedAnalysisPointer(AnalysisID ID) override { + if (ID == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + }; +} // end anonymous namespace + +char StaticGlobalAliasAnalysis::ID = 0; +INITIALIZE_AG_PASS(StaticGlobalAliasAnalysis, AliasAnalysis, + "staticglobal-aa", "Simple Alias Analysis Of Static Globals", + false, true, false) + +ImmutablePass *llvm::createStaticGlobalAliasAnalysisPass() { + return new StaticGlobalAliasAnalysis(); +} + +/// AnalyzeGlobals - Scan through the users of all of the internal +/// GlobalValue's in the Module, recording the one's which do not have their +/// address taken. +void StaticGlobalAliasAnalysis::AnalyzeGlobals() { + DEBUG(dbgs() << "\nSGAA: Analyzing globals\n"); + NonAddressTakenGlobals.clear(); + for (auto &GV : M->globals()) { + if (GV.hasLocalLinkage() && !isa(GV)) { + // Read-only static, doesn't matter as read-only memory should be picked + // up way earlier. + if (GV.isConstant()) { + continue; + } + DEBUG(dbgs() << "\nSGAA: Analyze " << GV.getName() << "\n"); + + GlobalCaptureTracker Tracker; + PointerMayBeCaptured(&GV, &Tracker); + + if (!Tracker.Captured) + NonAddressTakenGlobals.insert(&GV); + } + } +} + +/// alias - If one of the pointers is to a global that we are tracking, and the +/// other is some random pointer, we know there cannot be an alias, because the +/// address of the global isn't taken. +AliasAnalysis::AliasResult +StaticGlobalAliasAnalysis::alias(const Location &LocA, + const Location &LocB) { + DEBUG(dbgs() << "SGAA: alias\n"); + AnalyzeGlobals(); + + if (!NonAddressTakenGlobals.empty()) { + // Get the base object these pointers point to. + const Value *UV1 = GetUnderlyingObject(LocA.Ptr, *DL); + const Value *UV2 = GetUnderlyingObject(LocB.Ptr, *DL); + + + // If either of the underlying values is a global, they may be + // non-addr-taken globals, which we can answer queries about. + const GlobalValue *GV1 = dyn_cast(UV1); + const GlobalValue *GV2 = dyn_cast(UV2); + if (GV1 || GV2) { + // If the two pointers are derived from two different non-addr-taken + // globals, or if one is and the other isn't, we know these can't alias. + if (NonAddressTakenGlobals.count(GV1) || + NonAddressTakenGlobals.count(GV2)) + return NoAlias; + } + } + return AliasAnalysis::alias(LocA, LocB); +} Index: lib/CodeGen/Passes.cpp =================================================================== --- lib/CodeGen/Passes.cpp +++ lib/CodeGen/Passes.cpp @@ -104,6 +104,12 @@ cl::init(false), cl::Hidden, cl::desc("Enable the new, experimental CFL alias analysis in CodeGen")); +static cl::opt +UseSGAA("use-staticglobal-aa-in-codegen", + cl::init(false), cl::Hidden, + cl::desc("Enable alias analysis of static globals in CodeGen")); + + /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. @@ -372,6 +378,8 @@ // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that // BasicAliasAnalysis wins if they disagree. This is intended to help // support "obvious" type-punning idioms. + if (UseSGAA) + addPass(createStaticGlobalAliasAnalysisPass()); if (UseCFLAA) addPass(createCFLAliasAnalysisPass()); addPass(createTypeBasedAliasAnalysisPass()); Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -78,6 +78,11 @@ cl::desc("Enable the new, experimental CFL alias analysis")); static cl::opt +UseSGAA("use-staticglobal-aa", + cl::init(false), cl::Hidden, + cl::desc("Enable alias analysis of static global variables.")); + +static cl::opt EnableMLSM("mlsm", cl::init(true), cl::Hidden, cl::desc("Enable motion of merged load and store")); @@ -198,6 +203,8 @@ if (LibraryInfo) MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); + if (UseSGAA) + MPM.add(createStaticGlobalAliasAnalysisPass()); addInitialAliasAnalysisPasses(MPM); if (!DisableUnitAtATime) { Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -81,6 +81,7 @@ initializePlaceSafepointsPass(Registry); initializeFloat2IntPass(Registry); initializeLoopDistributePass(Registry); + initializeStaticGlobalAliasAnalysisPass(Registry); } void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) { @@ -228,6 +229,10 @@ unwrap(PM)->add(createTypeBasedAliasAnalysisPass()); } +void LLVMAddStaticGlobalAliasAnalysisPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createStaticGlobalAliasAnalysisPass()); +} + void LLVMAddScopedNoAliasAAPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createScopedNoAliasAAPass()); } Index: test/Analysis/StaticGlobalAA/dead-store.ll =================================================================== --- /dev/null +++ test/Analysis/StaticGlobalAA/dead-store.ll @@ -0,0 +1,39 @@ +; RUN: opt < %s -O3 -S -use-staticglobal-aa 2>&1 | FileCheck %s + +; ModuleID = 'basic.c' +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64" + +@a.0 = internal unnamed_addr global i32 0 + +; CHECK-LABEL: @f( +; CHECK: load +; CHECK: load +; CHECK-NOT: load +; CHECK: store +; CHECK: store +; CHECK-NOT: store +; CHECK: ret + +; Function Attrs: nounwind +define void @f(i32* nocapture %b) #0 { +entry: + %0 = load i32, i32* @a.0, align 4, !tbaa !1 + %shl = shl i32 %0, 1 + store i32 %shl, i32* @a.0, align 4, !tbaa !1 + %arrayidx = getelementptr inbounds i32, i32* %b, i64 1 + %1 = load i32, i32* %arrayidx, align 4, !tbaa !1 + %shl1 = shl i32 %1, 1 + store i32 %shl1, i32* %arrayidx, align 4, !tbaa !1 + %2 = load i32, i32* @a.0, align 4, !tbaa !1 + %add = add nsw i32 %2, 2 + store i32 %add, i32* @a.0, align 4, !tbaa !1 + ret void +} + +attributes #0 = { nounwind } + +!1 = !{!2, !2, i64 0} +!2 = !{!"int", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C/C++ TBAA"} Index: test/Analysis/StaticGlobalAA/vectorize-loop.ll =================================================================== --- /dev/null +++ test/Analysis/StaticGlobalAA/vectorize-loop.ll @@ -0,0 +1,48 @@ +; RUN: opt < %s -O3 -S -use-staticglobal-aa 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64" + +@a = internal unnamed_addr global [5 x i32] zeroinitializer, align 4 + +; CHECK-LABEL: @f( +; CHECK: load <4 x i32> +; CHECK: load <4 x i32> +; CHECK-NOT: load <4 x i32> +; CHECK: store <4 x i32> +; CHECK: store <4 x i32> +; CHECK-NOT: store <4 x i32> +; CHECK: ret + +; Function Attrs: nounwind +define void @f(i32* nocapture %b) #0 { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret void + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds [5 x i32], [5 x i32]* @a, i64 0, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4, !tbaa !1 + %shl = shl i32 %0, 1 + store i32 %shl, i32* %arrayidx, align 4, !tbaa !1 + %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %1 = load i32, i32* %arrayidx2, align 4, !tbaa !1 + %shl3 = shl i32 %1, 1 + store i32 %shl3, i32* %arrayidx2, align 4, !tbaa !1 + %2 = load i32, i32* %arrayidx, align 4, !tbaa !1 + %add = add nsw i32 %2, 2 + store i32 %add, i32* %arrayidx, align 4, !tbaa !1 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 5 + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +attributes #0 = { nounwind } + +!1 = !{!2, !2, i64 0} +!2 = !{!"int", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C/C++ TBAA"}