diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -1612,6 +1612,9 @@ Function &StoreFn = const_cast(*GS.StoredOnceStore->getFunction()); + bool CanHaveNonUndefGlobalInitializer = + GetTTI(StoreFn).canHaveNonUndefGlobalInitializerInAddressSpace( + GV->getType()->getAddressSpace()); // If the initial value for the global was an undef value, and if only // one other value was stored into it, we can just change the // initializer to be the stored value, then delete all stores to the @@ -1621,8 +1624,7 @@ // shared memory (AS 3). if (SOVConstant && SOVConstant->getType() == GV->getValueType() && isa(GV->getInitializer()) && - GetTTI(StoreFn).canHaveNonUndefGlobalInitializerInAddressSpace( - GV->getType()->getAddressSpace())) { + CanHaveNonUndefGlobalInitializer) { // Change the initial value here. GV->setInitializer(SOVConstant); @@ -1645,8 +1647,10 @@ return true; // Otherwise, if the global was not a boolean, we can shrink it to be a - // boolean. - if (SOVConstant && GS.Ordering == AtomicOrdering::NotAtomic) { + // boolean. Skip this optimization for AS that doesn't allow an initializer. + if (SOVConstant && GS.Ordering == AtomicOrdering::NotAtomic && + (!isa(GV->getInitializer()) || + CanHaveNonUndefGlobalInitializer)) { if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { ++NumShrunkToBool; return true; diff --git a/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool.ll b/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool.ll @@ -0,0 +1,39 @@ +; RUN: opt -passes=globalopt --mtriple=amdgcn-amd-amdhsa < %s -S | FileCheck %s +; REQUIRES: amdgpu-registered-target + +@gvar = internal unnamed_addr global i32 undef +@lvar = internal unnamed_addr addrspace(3) global i32 undef + +; Should optimize @gvar. +; CHECK-NOT: @gvar + +; Negative test for AS(3). Skip shrink global to bool optimization. +; CHECK: @lvar = internal unnamed_addr addrspace(3) global i32 undef + +define void @test_global_var() { +; CHECK-LABEL: @test_global_var( +; CHECK: store volatile i32 10, i32* undef, align 4 +; +entry: + store i32 10, i32* @gvar + br label %exit +exit: + %ld = load i32, i32* @gvar + store volatile i32 %ld, i32* undef + ret void +} + +define void @test_lds_var() { +; CHECK-LABEL: @test_lds_var( +; CHECK: store i32 10, i32 addrspace(3)* @lvar, align 4 +; CHECK: [[LD:%.*]] = load i32, i32 addrspace(3)* @lvar, align 4 +; CHECK: store volatile i32 [[LD]], i32* undef, align 4 +; +entry: + store i32 10, i32 addrspace(3)* @lvar + br label %exit +exit: + %ld = load i32, i32 addrspace(3)* @lvar + store volatile i32 %ld, i32* undef + ret void +}