diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -383,6 +383,10 @@ bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const; + /// Return true if globals in this address space can have initializers other + /// than `undef`. + bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const; + unsigned getAssumedAddrSpace(const Value *V) const; /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p @@ -1457,6 +1461,8 @@ virtual bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, Intrinsic::ID IID) const = 0; virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0; + virtual bool + canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0; virtual unsigned getAssumedAddrSpace(const Value *V) const = 0; virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, @@ -1782,6 +1788,11 @@ return Impl.isNoopAddrSpaceCast(FromAS, ToAS); } + bool + canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override { + return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS); + } + unsigned getAssumedAddrSpace(const Value *V) const override { return Impl.getAssumedAddrSpace(V); } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -105,6 +105,9 @@ } bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; } + bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const { + return AS == 0; + }; unsigned getAssumedAddrSpace(const Value *V) const { return -1; } diff --git a/llvm/include/llvm/Transforms/Utils/GlobalStatus.h b/llvm/include/llvm/Transforms/Utils/GlobalStatus.h --- a/llvm/include/llvm/Transforms/Utils/GlobalStatus.h +++ b/llvm/include/llvm/Transforms/Utils/GlobalStatus.h @@ -9,6 +9,7 @@ #ifndef LLVM_TRANSFORMS_UTILS_GLOBALSTATUS_H #define LLVM_TRANSFORMS_UTILS_GLOBALSTATUS_H +#include "llvm/IR/Instructions.h" #include "llvm/Support/AtomicOrdering.h" namespace llvm { @@ -45,7 +46,7 @@ /// This global is stored to, but only its initializer and one other value /// is ever stored to it. If this global isStoredOnce, we track the value - /// stored to it in StoredOnceValue below. This is only tracked for scalar + /// stored to it via StoredOnceStore below. This is only tracked for scalar /// globals. StoredOnce, @@ -55,8 +56,16 @@ } StoredType = NotStored; /// If only one value (besides the initializer constant) is ever stored to - /// this global, keep track of what value it is. - Value *StoredOnceValue = nullptr; + /// this global, keep track of what value it is via the store instruction. + const StoreInst *StoredOnceStore = nullptr; + + /// If only one value (besides the initializer constant) is ever stored to + /// this global return the stored value. + Value *getStoredOnceValue() const { + return (StoredType == StoredOnce && StoredOnceStore) + ? StoredOnceStore->getOperand(0) + : nullptr; + } /// These start out null/false. When the first accessing function is noticed, /// it is recorded. When a second different accessing function is noticed, diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -259,6 +259,11 @@ return TTIImpl->isNoopAddrSpaceCast(FromAS, ToAS); } +bool TargetTransformInfo::canHaveNonUndefGlobalInitializerInAddressSpace( + unsigned AS) const { + return TTIImpl->canHaveNonUndefGlobalInitializerInAddressSpace(AS); +} + unsigned TargetTransformInfo::getAssumedAddrSpace(const Value *V) const { return TTIImpl->getAssumedAddrSpace(V); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -182,6 +182,12 @@ bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, Intrinsic::ID IID) const; + + bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const { + return AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS && + AS != AMDGPUAS::PRIVATE_ADDRESS; + } + Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const; diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -48,6 +48,11 @@ return AddressSpace::ADDRESS_SPACE_GENERIC; } + bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const { + return AS != AddressSpace::ADDRESS_SPACE_SHARED && + AS != AddressSpace::ADDRESS_SPACE_LOCAL && AS != ADDRESS_SPACE_PARAM; + } + Optional instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const; diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -1507,6 +1507,7 @@ /// it if possible. If we make a change, return true. static bool processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS, + function_ref GetTTI, function_ref GetTLI, function_ref LookupDomTree) { auto &DL = GV->getParent()->getDataLayout(); @@ -1605,18 +1606,26 @@ if (SRAGlobal(GV, DL)) return true; } - if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) { + Value *StoredOnceValue = GS.getStoredOnceValue(); + if (GS.StoredType == GlobalStatus::StoredOnce && StoredOnceValue) { // Avoid speculating constant expressions that might trap (div/rem). - auto *SOVConstant = dyn_cast(GS.StoredOnceValue); + auto *SOVConstant = dyn_cast(StoredOnceValue); if (SOVConstant && SOVConstant->canTrap()) return Changed; + Function &StoreFn = + const_cast(*GS.StoredOnceStore->getFunction()); // If the initial value for the global was an undef value, and if only // one other value was stored into it, we can just change the // initializer to be the stored value, then delete all stores to the // global. This allows us to mark it constant. + // This is restricted to address spaces that allow globals to have + // initializers. NVPTX, for example, does not support initializers for + // shared memory (AS 3). if (SOVConstant && SOVConstant->getType() == GV->getValueType() && - isa(GV->getInitializer())) { + isa(GV->getInitializer()) && + GetTTI(StoreFn).canHaveNonUndefGlobalInitializerInAddressSpace( + GV->getType()->getAddressSpace())) { // Change the initial value here. GV->setInitializer(SOVConstant); @@ -1635,8 +1644,7 @@ // Try to optimize globals based on the knowledge that only one value // (besides its initializer) is ever stored to the global. - if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL, - GetTLI)) + if (optimizeOnceStoredGlobal(GV, StoredOnceValue, GS.Ordering, DL, GetTLI)) return true; // Otherwise, if the global was not a boolean, we can shrink it to be a @@ -1656,6 +1664,7 @@ /// make a change, return true. static bool processGlobal(GlobalValue &GV, + function_ref GetTTI, function_ref GetTLI, function_ref LookupDomTree) { if (GV.getName().startswith("llvm.")) @@ -1688,7 +1697,8 @@ if (GVar->isConstant() || !GVar->hasInitializer()) return Changed; - return processInternalGlobal(GVar, GS, GetTLI, LookupDomTree) || Changed; + return processInternalGlobal(GVar, GS, GetTTI, GetTLI, LookupDomTree) || + Changed; } /// Walk all of the direct calls of the specified function, changing them to @@ -1991,7 +2001,7 @@ } } - Changed |= processGlobal(*F, GetTLI, LookupDomTree); + Changed |= processGlobal(*F, GetTTI, GetTLI, LookupDomTree); if (!F->hasLocalLinkage()) continue; @@ -2060,6 +2070,7 @@ static bool OptimizeGlobalVars(Module &M, + function_ref GetTTI, function_ref GetTLI, function_ref LookupDomTree, SmallPtrSetImpl &NotDiscardableComdats) { @@ -2088,7 +2099,7 @@ continue; } - Changed |= processGlobal(*GV, GetTLI, LookupDomTree); + Changed |= processGlobal(*GV, GetTTI, GetTLI, LookupDomTree); } return Changed; } @@ -2666,8 +2677,8 @@ }); // Optimize non-address-taken globals. - LocalChange |= - OptimizeGlobalVars(M, GetTLI, LookupDomTree, NotDiscardableComdats); + LocalChange |= OptimizeGlobalVars(M, GetTTI, GetTLI, LookupDomTree, + NotDiscardableComdats); // Resolve aliases, when possible. LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats); diff --git a/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/llvm/lib/Transforms/Utils/GlobalStatus.cpp --- a/llvm/lib/Transforms/Utils/GlobalStatus.cpp +++ b/llvm/lib/Transforms/Utils/GlobalStatus.cpp @@ -127,9 +127,9 @@ GS.StoredType = GlobalStatus::InitializerStored; } else if (GS.StoredType < GlobalStatus::StoredOnce) { GS.StoredType = GlobalStatus::StoredOnce; - GS.StoredOnceValue = StoredVal; + GS.StoredOnceStore = SI; } else if (GS.StoredType == GlobalStatus::StoredOnce && - GS.StoredOnceValue == StoredVal) { + GS.getStoredOnceValue() == StoredVal) { // noop. } else { GS.StoredType = GlobalStatus::Stored; diff --git a/llvm/test/Transforms/GlobalOpt/address_space_initializer.ll b/llvm/test/Transforms/GlobalOpt/address_space_initializer.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/address_space_initializer.ll @@ -0,0 +1,43 @@ +; RUN: opt -passes=globalopt < %s -S | FileCheck %s +; RUN: opt -passes=globalopt --mtriple=nvptx64 < %s -S | FileCheck %s --check-prefix=GPU +; RUN: opt -passes=globalopt --mtriple=amdgcn < %s -S | FileCheck %s --check-prefix=GPU + +; Check that we don't try to set a global initializer for non AS(0) globals. + +@g0 = internal global i16 undef +@g1 = internal addrspace(3) global i16 undef +@g2 = internal addrspace(1) global i16 undef +; CHECK: internal unnamed_addr constant i16 3 +; CHECK: internal unnamed_addr addrspace(3) global i16 undef +; CHECK: internal unnamed_addr addrspace(1) global i16 undef +; GPU: internal unnamed_addr constant i16 3 +; GPU: internal unnamed_addr addrspace(3) global i16 undef +; GPU: internal unnamed_addr addrspace(1) constant i16 7 + +define void @a() { + store i16 3, i16* @g0, align 8 + store i16 5, i16* addrspacecast (i16 addrspace(3)* @g1 to i16*), align 8 + store i16 7, i16* addrspacecast (i16 addrspace(1)* @g2 to i16*), align 8 + ret void +} + +define i8 @get0() { + %bc = bitcast i16* @g0 to i8* + %gep = getelementptr i8, i8* %bc, i64 1 + %r = load i8, i8* %gep + ret i8 %r +} +define i8 @get1() { + %ac = addrspacecast i16 addrspace(3)* @g1 to i16* + %bc = bitcast i16* %ac to i8* + %gep = getelementptr i8, i8* %bc, i64 1 + %r = load i8, i8* %gep + ret i8 %r +} +define i8 @get2() { + %ac = addrspacecast i16 addrspace(1)* @g2 to i16* + %bc = bitcast i16* %ac to i8* + %gep = getelementptr i8, i8* %bc, i64 1 + %r = load i8, i8* %gep + ret i8 %r +}