diff --git a/llvm/include/llvm/Transforms/Utils/GlobalStatus.h b/llvm/include/llvm/Transforms/Utils/GlobalStatus.h --- a/llvm/include/llvm/Transforms/Utils/GlobalStatus.h +++ b/llvm/include/llvm/Transforms/Utils/GlobalStatus.h @@ -35,6 +35,9 @@ /// can be deleted. bool IsLoaded = false; + /// Number of stores to the global. + unsigned NumStores = 0; + /// Keep track of what stores to the global look like. enum StoredType { /// There is no store to this global. It can thus be marked constant. diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -1444,6 +1444,42 @@ } } +// For a global variable with one store, if the store dominates any loads, +// those loads will always load the stored value (as opposed to the +// initializer), even in the presence of recursion. +static bool forwardStoredOnceStore( + GlobalVariable *GV, const StoreInst *StoredOnceStore, + function_ref LookupDomTree) { + const Value *StoredOnceValue = StoredOnceStore->getValueOperand(); + // We can do this optimization for non-constants in nosync + norecurse + // functions, but globals used in exactly one norecurse functions are already + // promoted to an alloca. + if (!isa(StoredOnceValue)) + return false; + const Function *F = StoredOnceStore->getFunction(); + SmallVector Loads; + for (User *U : GV->users()) { + if (auto *LI = dyn_cast(U)) { + if (LI->getFunction() == F && + LI->getType() == StoredOnceValue->getType() && LI->isSimple()) + Loads.push_back(LI); + } + } + // Only compute DT if we have any loads to examine. + bool MadeChange = false; + if (!Loads.empty()) { + auto &DT = LookupDomTree(*const_cast(F)); + for (auto *LI : Loads) { + if (DT.dominates(StoredOnceStore, LI)) { + LI->replaceAllUsesWith(const_cast(StoredOnceValue)); + LI->eraseFromParent(); + MadeChange = true; + } + } + } + return MadeChange; +} + /// Analyze the specified global variable and optimize /// it if possible. If we make a change, return true. static bool @@ -1603,6 +1639,12 @@ if (optimizeOnceStoredGlobal(GV, StoredOnceValue, DL, GetTLI)) return true; + // Try to forward the store to any loads. If we have more than one store, we + // may have a store of the initializer between StoredOnceStore and a load. + if (GS.NumStores == 1) + if (forwardStoredOnceStore(GV, GS.StoredOnceStore, LookupDomTree)) + return true; + // Otherwise, if the global was not a boolean, we can shrink it to be a // boolean. Skip this optimization for AS that doesn't allow an initializer. if (SOVConstant && GS.Ordering == AtomicOrdering::NotAtomic && diff --git a/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/llvm/lib/Transforms/Utils/GlobalStatus.cpp --- a/llvm/lib/Transforms/Utils/GlobalStatus.cpp +++ b/llvm/lib/Transforms/Utils/GlobalStatus.cpp @@ -104,6 +104,8 @@ if (SI->isVolatile()) return true; + ++GS.NumStores; + GS.Ordering = strongerOrdering(GS.Ordering, SI->getOrdering()); // If this is a direct store to the global (i.e., the global is a scalar diff --git a/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool-check-debug.ll b/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool-check-debug.ll --- a/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool-check-debug.ll +++ b/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool-check-debug.ll @@ -2,21 +2,24 @@ @foo = internal global i32 0, align 4 -define dso_local i32 @bar() { +define void @store() { entry: store i32 5, i32* @foo, align 4 + ret void +} + +define i32 @bar() { +entry: %0 = load i32, i32* @foo, align 4 ret i32 %0 } ;CHECK: @bar ;CHECK-NEXT: entry: -;CHECK-NEXT: store i1 true, i1* @foo, align 1, !dbg ![[DbgLocStore:[0-9]+]] ;CHECK-NEXT: %.b = load i1, i1* @foo, align 1, !dbg ![[DbgLocLoadSel:[0-9]+]] ;CHECK-NEXT: %0 = select i1 %.b, i32 5, i32 0, !dbg ![[DbgLocLoadSel]] ;CHECK-NEXT: call void @llvm.dbg.value({{.*}}), !dbg ![[DbgLocLoadSel]] ;CHECK-NEXT: ret i32 %0, !dbg ![[DbgLocRet:[0-9]+]] -;CHECK: ![[DbgLocStore]] = !DILocation(line: 1, -;CHECK: ![[DbgLocLoadSel]] = !DILocation(line: 2, -;CHECK: ![[DbgLocRet]] = !DILocation(line: 3, +;CHECK: ![[DbgLocLoadSel]] = !DILocation(line: 3, +;CHECK: ![[DbgLocRet]] = !DILocation(line: 4, diff --git a/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool.ll b/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool.ll --- a/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool.ll +++ b/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool.ll @@ -10,11 +10,13 @@ ; Negative test for AS(3). Skip shrink global to bool optimization. ; CHECK: @lvar = internal unnamed_addr addrspace(3) global i32 undef -define void @test_global_var() { +define void @test_global_var(i1 %i) { ; CHECK-LABEL: @test_global_var( ; CHECK: store volatile i32 10, i32* undef, align 4 ; entry: + br i1 %i, label %bb1, label %exit +bb1: store i32 10, i32* @gvar br label %exit exit: @@ -23,13 +25,15 @@ ret void } -define void @test_lds_var() { +define void @test_lds_var(i1 %i) { ; CHECK-LABEL: @test_lds_var( ; CHECK: store i32 10, i32 addrspace(3)* @lvar, align 4 ; CHECK: [[LD:%.*]] = load i32, i32 addrspace(3)* @lvar, align 4 ; CHECK: store volatile i32 [[LD]], i32* undef, align 4 ; entry: + br i1 %i, label %bb1, label %exit +bb1: store i32 10, i32 addrspace(3)* @lvar br label %exit exit: diff --git a/llvm/test/Transforms/GlobalOpt/stored-once-forward-value.ll b/llvm/test/Transforms/GlobalOpt/stored-once-forward-value.ll --- a/llvm/test/Transforms/GlobalOpt/stored-once-forward-value.ll +++ b/llvm/test/Transforms/GlobalOpt/stored-once-forward-value.ll @@ -7,15 +7,16 @@ @g4 = internal unnamed_addr global i32 0 @g5 = internal unnamed_addr global i32 0 @g6 = internal unnamed_addr global i32 0 +@g7 = internal unnamed_addr global i32 0 +@g8 = internal unnamed_addr global ptr null +@tl = internal thread_local unnamed_addr global i32 0 declare void @b() define i1 @dom_const() { ; CHECK-LABEL: @dom_const( -; CHECK-NEXT: store i1 true, ptr @g1, align 1 ; CHECK-NEXT: call void @b() -; CHECK-NEXT: [[R:%.*]] = load i1, ptr @g1, align 1 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 true ; store i1 true, ptr @g1 call void @b() @@ -36,16 +37,29 @@ ret i32 %r } +define ptr @dom_thread_local_global() { +; CHECK-LABEL: @dom_thread_local_global( +; CHECK-NEXT: store ptr @tl, ptr @g3, align 8 +; CHECK-NEXT: call void @b() +; CHECK-NEXT: [[R:%.*]] = load ptr, ptr @g3, align 8 +; CHECK-NEXT: ret ptr [[R]] +; + store ptr @tl, ptr @g3 + call void @b() + %r = load ptr, ptr @g3 + ret ptr %r +} + define i32 @dom_different_types() { ; CHECK-LABEL: @dom_different_types( -; CHECK-NEXT: store i1 true, ptr @g3, align 1 +; CHECK-NEXT: store i1 true, ptr @g4, align 1 ; CHECK-NEXT: call void @b() -; CHECK-NEXT: [[R:%.*]] = load i32, ptr @g3, align 4 +; CHECK-NEXT: [[R:%.*]] = load i32, ptr @g4, align 4 ; CHECK-NEXT: ret i32 [[R]] ; - store i1 true, ptr @g3 + store i1 true, ptr @g4 call void @b() - %r = load i32, ptr @g3 + %r = load i32, ptr @g4 ret i32 %r } @@ -53,57 +67,70 @@ ; CHECK-LABEL: @no_dom( ; CHECK-NEXT: br i1 [[I:%.*]], label [[BB1:%.*]], label [[END:%.*]] ; CHECK: bb1: -; CHECK-NEXT: store i1 true, ptr @g4, align 1 +; CHECK-NEXT: store i1 true, ptr @g5, align 1 ; CHECK-NEXT: br label [[END]] ; CHECK: end: ; CHECK-NEXT: call void @b() -; CHECK-NEXT: [[R:%.*]] = load i1, ptr @g4, align 1 +; CHECK-NEXT: [[R:%.*]] = load i1, ptr @g5, align 1 ; CHECK-NEXT: ret i1 [[R]] ; br i1 %i, label %bb1, label %end bb1: - store i1 true, ptr @g4 + store i1 true, ptr @g5 br label %end end: call void @b() - %r = load i1, ptr @g4 + %r = load i1, ptr @g5 ret i1 %r } define i1 @dom_multiple_function_loads() { ; CHECK-LABEL: @dom_multiple_function_loads( -; CHECK-NEXT: store i1 true, ptr @g5, align 1 +; CHECK-NEXT: store i1 true, ptr @g6, align 1 ; CHECK-NEXT: call void @b() -; CHECK-NEXT: [[R:%.*]] = load i1, ptr @g5, align 1 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 true ; - store i1 true, ptr @g5 + store i1 true, ptr @g6 call void @b() - %r = load i1, ptr @g5 + %r = load i1, ptr @g6 ret i1 %r } define i1 @other() { ; CHECK-LABEL: @other( ; CHECK-NEXT: call void @b() -; CHECK-NEXT: [[R:%.*]] = load i1, ptr @g5, align 1 +; CHECK-NEXT: [[R:%.*]] = load i1, ptr @g6, align 1 ; CHECK-NEXT: ret i1 [[R]] ; call void @b() - %r = load i1, ptr @g5 + %r = load i1, ptr @g6 ret i1 %r } define i1 @dom_volatile() { ; CHECK-LABEL: @dom_volatile( -; CHECK-NEXT: store i1 true, ptr @g6, align 1 +; CHECK-NEXT: store i1 true, ptr @g7, align 1 ; CHECK-NEXT: call void @b() -; CHECK-NEXT: [[R:%.*]] = load volatile i1, ptr @g6, align 1 +; CHECK-NEXT: [[R:%.*]] = load volatile i1, ptr @g7, align 1 ; CHECK-NEXT: ret i1 [[R]] ; - store i1 true, ptr @g6 + store i1 true, ptr @g7 call void @b() - %r = load volatile i1, ptr @g6 + %r = load volatile i1, ptr @g7 ret i1 %r } +define i1 @dom_store_const_and_initializer() { +; CHECK-LABEL: @dom_store_const_and_initializer( +; CHECK-NEXT: store i1 true, ptr @g8, align 1 +; CHECK-NEXT: store i1 false, ptr @g8, align 1 +; CHECK-NEXT: call void @b() +; CHECK-NEXT: [[R:%.*]] = load i1, ptr @g8, align 1 +; CHECK-NEXT: ret i1 [[R]] +; + store i1 true, ptr @g8 + store i1 false, ptr @g8 + call void @b() + %r = load i1, ptr @g8 + ret i1 %r +} diff --git a/llvm/test/Transforms/PhaseOrdering/recompute-globalsaa.ll b/llvm/test/Transforms/PhaseOrdering/recompute-globalsaa.ll --- a/llvm/test/Transforms/PhaseOrdering/recompute-globalsaa.ll +++ b/llvm/test/Transforms/PhaseOrdering/recompute-globalsaa.ll @@ -9,7 +9,6 @@ define i32 @main() { ; CHECK-LABEL: @main( ; CHECK-NEXT: entry: -; CHECK-NEXT: store i1 true, i1* @a, align 4 ; CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** @e, align 8 ; CHECK-NEXT: store i32 0, i32* [[TMP0]], align 4 ; CHECK-NEXT: store i32* null, i32** @e, align 8