diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -1444,6 +1444,42 @@ } } +// For a global variable with one store, if the store dominates any loads, +// those loads will always load the stored value (as opposed to the +// initializer), even in the presence of recursion. +static bool forwardStoredOnceStore( + GlobalVariable *GV, const StoreInst *StoredOnceStore, + function_ref LookupDomTree) { + const Value *StoredOnceValue = StoredOnceStore->getValueOperand(); + const Function *F = StoredOnceStore->getFunction(); + // If the value is not a constant, there may be synchronization that causes + // multiple threads to observe different values stored to/loaded from the + // global. + if (!isa(StoredOnceValue) && !F->hasNoSync()) + return false; + SmallVector Loads; + for (User *U : GV->users()) { + if (auto *LI = dyn_cast(U)) { + if (LI->getFunction() == F && + LI->getType() == StoredOnceValue->getType() && LI->isSimple()) + Loads.push_back(LI); + } + } + // Only compute DT if we have any loads to examine. + bool MadeChange = false; + if (!Loads.empty()) { + auto &DT = LookupDomTree(*const_cast(F)); + for (auto *LI : Loads) { + if (DT.dominates(StoredOnceStore, LI)) { + LI->replaceAllUsesWith(const_cast(StoredOnceValue)); + LI->eraseFromParent(); + MadeChange = true; + } + } + } + return MadeChange; +} + /// Analyze the specified global variable and optimize /// it if possible. If we make a change, return true. static bool @@ -1603,6 +1639,10 @@ if (optimizeOnceStoredGlobal(GV, StoredOnceValue, DL, GetTLI)) return true; + // Try to forward the store to any loads. + if (forwardStoredOnceStore(GV, GS.StoredOnceStore, LookupDomTree)) + return true; + // Otherwise, if the global was not a boolean, we can shrink it to be a // boolean. Skip this optimization for AS that doesn't allow an initializer. if (SOVConstant && GS.Ordering == AtomicOrdering::NotAtomic && diff --git a/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool-check-debug.ll b/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool-check-debug.ll --- a/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool-check-debug.ll +++ b/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool-check-debug.ll @@ -2,21 +2,24 @@ @foo = internal global i32 0, align 4 -define dso_local i32 @bar() { +define void @store() { entry: store i32 5, i32* @foo, align 4 + ret void +} + +define i32 @bar() { +entry: %0 = load i32, i32* @foo, align 4 ret i32 %0 } ;CHECK: @bar ;CHECK-NEXT: entry: -;CHECK-NEXT: store i1 true, i1* @foo, align 1, !dbg ![[DbgLocStore:[0-9]+]] ;CHECK-NEXT: %.b = load i1, i1* @foo, align 1, !dbg ![[DbgLocLoadSel:[0-9]+]] ;CHECK-NEXT: %0 = select i1 %.b, i32 5, i32 0, !dbg ![[DbgLocLoadSel]] ;CHECK-NEXT: call void @llvm.dbg.value({{.*}}), !dbg ![[DbgLocLoadSel]] ;CHECK-NEXT: ret i32 %0, !dbg ![[DbgLocRet:[0-9]+]] -;CHECK: ![[DbgLocStore]] = !DILocation(line: 1, -;CHECK: ![[DbgLocLoadSel]] = !DILocation(line: 2, -;CHECK: ![[DbgLocRet]] = !DILocation(line: 3, +;CHECK: ![[DbgLocLoadSel]] = !DILocation(line: 3, +;CHECK: ![[DbgLocRet]] = !DILocation(line: 4, diff --git a/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool.ll b/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool.ll --- a/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool.ll +++ b/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool.ll @@ -10,11 +10,13 @@ ; Negative test for AS(3). Skip shrink global to bool optimization. ; CHECK: @lvar = internal unnamed_addr addrspace(3) global i32 undef -define void @test_global_var() { +define void @test_global_var(i1 %i) { ; CHECK-LABEL: @test_global_var( ; CHECK: store volatile i32 10, i32* undef, align 4 ; entry: + br i1 %i, label %bb1, label %exit +bb1: store i32 10, i32* @gvar br label %exit exit: @@ -23,13 +25,15 @@ ret void } -define void @test_lds_var() { +define void @test_lds_var(i1 %i) { ; CHECK-LABEL: @test_lds_var( ; CHECK: store i32 10, i32 addrspace(3)* @lvar, align 4 ; CHECK: [[LD:%.*]] = load i32, i32 addrspace(3)* @lvar, align 4 ; CHECK: store volatile i32 [[LD]], i32* undef, align 4 ; entry: + br i1 %i, label %bb1, label %exit +bb1: store i32 10, i32 addrspace(3)* @lvar br label %exit exit: diff --git a/llvm/test/Transforms/GlobalOpt/stored-once-forward-value.ll b/llvm/test/Transforms/GlobalOpt/stored-once-forward-value.ll --- a/llvm/test/Transforms/GlobalOpt/stored-once-forward-value.ll +++ b/llvm/test/Transforms/GlobalOpt/stored-once-forward-value.ll @@ -7,15 +7,14 @@ @g4 = internal unnamed_addr global i32 0 @g5 = internal unnamed_addr global i32 0 @g6 = internal unnamed_addr global i32 0 +@g7 = internal unnamed_addr global i32 0 declare void @b() define i1 @dom_const() { ; CHECK-LABEL: @dom_const( -; CHECK-NEXT: store i1 true, ptr @g1, align 1 ; CHECK-NEXT: call void @b() -; CHECK-NEXT: [[R:%.*]] = load i1, ptr @g1, align 1 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 true ; store i1 true, ptr @g1 call void @b() @@ -23,6 +22,17 @@ ret i1 %r } +define i32 @dom_arg_nosync(i32 %a) nosync { +; CHECK-LABEL: @dom_arg_nosync( +; CHECK-NEXT: call void @b() +; CHECK-NEXT: ret i32 [[A:%.*]] +; + store i32 %a, ptr @g7 + call void @b() + %r = load i32, ptr @g7 + ret i32 %r +} + define i32 @dom_arg(i32 %a) { ; CHECK-LABEL: @dom_arg( ; CHECK-NEXT: store i32 [[A:%.*]], ptr @g2, align 4 @@ -74,8 +84,7 @@ ; CHECK-LABEL: @dom_multiple_function_loads( ; CHECK-NEXT: store i1 true, ptr @g5, align 1 ; CHECK-NEXT: call void @b() -; CHECK-NEXT: [[R:%.*]] = load i1, ptr @g5, align 1 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 true ; store i1 true, ptr @g5 call void @b() diff --git a/llvm/test/Transforms/PhaseOrdering/recompute-globalsaa.ll b/llvm/test/Transforms/PhaseOrdering/recompute-globalsaa.ll --- a/llvm/test/Transforms/PhaseOrdering/recompute-globalsaa.ll +++ b/llvm/test/Transforms/PhaseOrdering/recompute-globalsaa.ll @@ -9,7 +9,6 @@ define i32 @main() { ; CHECK-LABEL: @main( ; CHECK-NEXT: entry: -; CHECK-NEXT: store i1 true, i1* @a, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** @e, align 8 ; CHECK-NEXT: store i32 0, i32* [[TMP0]], align 4 ; CHECK-NEXT: store i32* null, i32** @e, align 8