diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -82,6 +82,7 @@ STATISTIC(NumFastOther, "Number of other instrs removed"); STATISTIC(NumCompletePartials, "Number of stores dead by later partials"); STATISTIC(NumModifiedStores, "Number of stores modified"); +STATISTIC(NumNoopStores, "Number of noop stores deleted"); DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa", "Controls which MemoryDefs are eliminated."); @@ -1821,6 +1822,21 @@ } }; +/// \returns true if \p KillingDef stores the result of \p Load to the source of +/// \p Load. +static bool storeIsNoop(MemorySSA &MSSA, LoadInst *Load, + MemoryDef *KillingDef) { + Instruction *Store = KillingDef->getMemoryInst(); + // If the load's operand isn't the destination of the store, bail. + if (Load->getPointerOperand() != Store->getOperand(1)) + return false; + + // Get the defining access for the load. + auto *LoadAccess = MSSA.getMemoryAccess(Load)->getDefiningAccess(); + // The store is dead if the defining accesses are the same. + return LoadAccess == KillingDef->getDefiningAccess(); +} + bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, @@ -1835,6 +1851,18 @@ if (State.SkipStores.count(KillingDef)) continue; Instruction *SI = KillingDef->getMemoryInst(); + + // Check if we're storing a value that we just loaded. + if (auto *Load = dyn_cast(SI->getOperand(0))) { + if (storeIsNoop(MSSA, Load, KillingDef)) { + State.deleteDeadInstruction(SI); + LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *SI + << '\n'); + NumNoopStores++; + continue; + } + } + auto MaybeSILoc = State.getLocForWriteEx(SI); if (!MaybeSILoc) { LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for " diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll @@ -98,3 +98,37 @@ store i32 1, i32* @x ret i32 %x } + +; **** Noop load->store tests ************************************************** + +; We can optimize unordered atomic loads or stores. +define void @test_load_atomic(i32* %Q) { +; CHECK-LABEL: @test_load_atomic( +; CHECK-NEXT: ret void +; + %a = load atomic i32, i32* %Q unordered, align 4 + store atomic i32 %a, i32* %Q unordered, align 4 + ret void +} + +; We can optimize unordered atomic loads or stores. +define void @test_store_atomic(i32* %Q) { +; CHECK-LABEL: @test_store_atomic( +; CHECK-NEXT: ret void +; + %a = load i32, i32* %Q + store atomic i32 %a, i32* %Q unordered, align 4 + ret void +} + +; We can NOT optimize release atomic loads or stores. +define void @test_store_atomic_release(i32* %Q) { +; CHECK-LABEL: @test_store_atomic_release( +; CHECK-NEXT: load +; CHECK-NEXT: store atomic +; CHECK-NEXT: ret void +; + %a = load i32, i32* %Q + store atomic i32 %a, i32* %Q release, align 4 + ret void +} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/noop-stores.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/noop-stores.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/noop-stores.ll @@ -0,0 +1,171 @@ +; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind +declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture, i8, i64, i32) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind +declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind +declare void @llvm.init.trampoline(i8*, i8*, i8*) + +; **** Noop load->store tests ************************************************** + +; We CAN optimize volatile loads. +define void @test_load_volatile(i32* %Q) { +; CHECK-LABEL: @test_load_volatile( +; CHECK-NEXT: [[A:%.*]] = load volatile i32, i32* [[Q:%.*]] +; CHECK-NEXT: store i32 [[A]], i32* [[Q]] +; CHECK-NEXT: ret void +; + %a = load volatile i32, i32* %Q + store i32 %a, i32* %Q + ret void +} + +; We can NOT optimize volatile stores. +define void @test_store_volatile(i32* %Q) { +; CHECK-LABEL: @test_store_volatile( +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[Q:%.*]] +; CHECK-NEXT: store volatile i32 [[A]] +; CHECK-NEXT: ret void +; + %a = load i32, i32* %Q + store volatile i32 %a, i32* %Q + ret void +} + +; PR2599 - load -> store to same address. +define void @test12({ i32, i32 }* %x) nounwind { +; CHECK-LABEL: @test12( +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[X:%.*]], i32 0, i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = sub i32 0, [[TMP8]] +; CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP7]], align 4 +; CHECK-NEXT: ret void +; + %tmp4 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 0 + %tmp5 = load i32, i32* %tmp4, align 4 + %tmp7 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 1 + %tmp8 = load i32, i32* %tmp7, align 4 + %tmp17 = sub i32 0, %tmp8 + store i32 %tmp5, i32* %tmp4, align 4 + store i32 %tmp17, i32* %tmp7, align 4 + ret void +} + +; Remove redundant store if loaded value is in another block. +define i32 @test26(i1 %c, i32* %p) { +; CHECK-LABEL: @test26( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: ret i32 0 +; +entry: + %v = load i32, i32* %p, align 4 + br i1 %c, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + store i32 %v, i32* %p, align 4 + br label %bb3 +bb3: + ret i32 0 +} + +; Remove redundant store if loaded value is in another block. +define i32 @test27(i1 %c, i32* %p) { +; CHECK-LABEL: @test27( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: ret i32 0 +; +entry: + %v = load i32, i32* %p, align 4 + br i1 %c, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + br label %bb3 +bb3: + store i32 %v, i32* %p, align 4 + ret i32 0 +} + +declare void @unknown_func() + +; Remove redundant store, which is in the lame loop as the load. +define i32 @test33(i1 %c, i32* %p, i32 %i) { +; CHECK-LABEL: @test33( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: call void @unknown_func() +; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB3:%.*]] +; CHECK: bb3: +; CHECK-NEXT: ret i32 0 +; +entry: + br label %bb1 +bb1: + %v = load i32, i32* %p, align 4 + br label %bb2 +bb2: + store i32 %v, i32* %p, align 4 + ; Might read and overwrite value at %p, but doesn't matter. + call void @unknown_func() + br i1 undef, label %bb1, label %bb3 +bb3: + ret i32 0 +} + +declare void @unkown_write(i32*) + +; We can't remove the "noop" store around an unkown write. +define void @test43(i32* %Q) { +; CHECK-LABEL: @test43( +; CHECK-NEXT: load +; CHECK-NEXT: call +; CHECK-NEXT: store +; CHECK-NEXT: ret void +; + %a = load i32, i32* %Q + call void @unkown_write(i32* %Q) + store i32 %a, i32* %Q + ret void +} + +; We CAN remove it when the unkown write comes AFTER. +define void @test44(i32* %Q) { +; CHECK-LABEL: @test44( +; CHECK-NEXT: call +; CHECK-NEXT: ret void + %a = load i32, i32* %Q + store i32 %a, i32* %Q + call void @unkown_write(i32* %Q) + ret void +} + +define void @test45(i32* %Q) { +; CHECK-LABEL: @test45( +; CHECK-NEXT: [[A:%.*]] = load +; CHECK-NEXT: store i32 [[A]] +; CHECK-NEXT: ret void + %a = load i32, i32* %Q + store i32 10, i32* %Q + store i32 %a, i32* %Q + ret void +} + + diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll @@ -10,16 +10,6 @@ declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind declare void @llvm.init.trampoline(i8*, i8*, i8*) -define void @test5(i32* %Q) { -; CHECK-LABEL: @test5( -; CHECK-NEXT: [[A:%.*]] = load volatile i32, i32* [[Q:%.*]] -; CHECK-NEXT: ret void -; - %a = load volatile i32, i32* %Q - store i32 %a, i32* %Q - ret void -} - ; Do not delete stores that are only partially killed. define i32 @test8() { ; CHECK-LABEL: @test8( @@ -80,25 +70,6 @@ ret void } -; PR2599 - load -> store to same address. -define void @test12({ i32, i32 }* %x) nounwind { -; CHECK-LABEL: @test12( -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[X:%.*]], i32 0, i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -; CHECK-NEXT: [[TMP17:%.*]] = sub i32 0, [[TMP8]] -; CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP7]], align 4 -; CHECK-NEXT: ret void -; - %tmp4 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 0 - %tmp5 = load i32, i32* %tmp4, align 4 - %tmp7 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 1 - %tmp8 = load i32, i32* %tmp7, align 4 - %tmp17 = sub i32 0, %tmp8 - store i32 %tmp5, i32* %tmp4, align 4 - store i32 %tmp17, i32* %tmp7, align 4 - ret void -} - declare noalias i8* @malloc(i32) declare noalias i8* @calloc(i32, i32) @@ -143,54 +114,6 @@ ret void } -; Remove redundant store if loaded value is in another block. -define i32 @test26(i1 %c, i32* %p) { -; CHECK-LABEL: @test26( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] -; CHECK: bb1: -; CHECK-NEXT: br label [[BB3:%.*]] -; CHECK: bb2: -; CHECK-NEXT: br label [[BB3]] -; CHECK: bb3: -; CHECK-NEXT: ret i32 0 -; -entry: - %v = load i32, i32* %p, align 4 - br i1 %c, label %bb1, label %bb2 -bb1: - br label %bb3 -bb2: - store i32 %v, i32* %p, align 4 - br label %bb3 -bb3: - ret i32 0 -} - -; Remove redundant store if loaded value is in another block. -define i32 @test27(i1 %c, i32* %p) { -; CHECK-LABEL: @test27( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] -; CHECK: bb1: -; CHECK-NEXT: br label [[BB3:%.*]] -; CHECK: bb2: -; CHECK-NEXT: br label [[BB3]] -; CHECK: bb3: -; CHECK-NEXT: ret i32 0 -; -entry: - %v = load i32, i32* %p, align 4 - br i1 %c, label %bb1, label %bb2 -bb1: - br label %bb3 -bb2: - br label %bb3 -bb3: - store i32 %v, i32* %p, align 4 - ret i32 0 -} - declare void @unknown_func() ; Remove redundant store if loaded value is in another block inside a loop. @@ -213,48 +136,6 @@ ret i32 0 } -; Remove redundant store, which is in the lame loop as the load. -define i32 @test33(i1 %c, i32* %p, i32 %i) { -; CHECK-LABEL: @test33( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[BB1:%.*]] -; CHECK: bb1: -; CHECK-NEXT: br label [[BB2:%.*]] -; CHECK: bb2: -; CHECK-NEXT: call void @unknown_func() -; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB3:%.*]] -; CHECK: bb3: -; CHECK-NEXT: ret i32 0 -; -entry: - br label %bb1 -bb1: - %v = load i32, i32* %p, align 4 - br label %bb2 -bb2: - store i32 %v, i32* %p, align 4 - ; Might read and overwrite value at %p, but doesn't matter. - call void @unknown_func() - br i1 undef, label %bb1, label %bb3 -bb3: - ret i32 0 -} - -define void @test43(i32* %P, i32* noalias %Q) { -; CHECK-LABEL: @test43( -; CHECK-NEXT: entry: -; CHECK-NEXT: store i32 50331649, i32* [[P:%.*]] -; CHECK-NEXT: store i32 2, i32* [[Q:%.*]] -; CHECK-NEXT: ret void -; -entry: - store i32 1, i32* %P - %P2 = bitcast i32* %P to i8* - store i32 2, i32* %Q - store i8 3, i8* %P2 - ret void -} - define void @test43a(i32* %P, i32* noalias %Q) { ; CHECK-LABEL: @test43a( ; CHECK-NEXT: entry: