diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1835,6 +1835,44 @@ if (State.SkipStores.count(KillingDef)) continue; Instruction *SI = KillingDef->getMemoryInst(); + + // Check if we're storing a value that we just loaded. + if (auto *Load = dyn_cast(SI->getOperand(0))) { + auto *LoadAccess = MSSA.getMemoryAccess(Load)->getDefiningAccess(); + + bool IsDead = LoadAccess == KillingDef->getDefiningAccess(); + if (!IsDead) { + SetVector ToCheck; + MemoryAccess *Current = KillingDef->getDefiningAccess(); + ToCheck.insert(KillingDef); + ToCheck.insert(Current); + for (unsigned I = 1; I < ToCheck.size(); ++I) { + auto *Current = ToCheck[I]; + if (auto PhiAccess = dyn_cast(Current)) { + for (unsigned PhiIndex = 0; PhiIndex < PhiAccess->getNumOperands(); + ++PhiIndex) + ToCheck.insert(PhiAccess->getOperand(PhiIndex)); + continue; + } + + if (isa(Current)) { + IsDead = LoadAccess == Current; + break; + } + + if (cast(Current)->getDefiningAccess() == LoadAccess) + IsDead = true; + } + } + + if (Load->getPointerOperand() == SI->getOperand(1) && IsDead) { + State.deleteDeadInstruction(SI); + LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *SI + << '\n'); + break; + } + } + auto MaybeSILoc = State.getLocForWriteEx(SI); if (!MaybeSILoc) { LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for " diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll @@ -98,3 +98,37 @@ store i32 1, i32* @x ret i32 %x } + +; **** Noop load->store tests ************************************************** + +; We can optimize unordered atomic loads or stores. +define void @test_load_atomic(i32* %Q) { +; CHECK-LABEL: @test_load_atomic( +; CHECK-NEXT: ret void +; + %a = load atomic i32, i32* %Q unordered, align 4 + store atomic i32 %a, i32* %Q unordered, align 4 + ret void +} + +; We can optimize unordered atomic loads or stores. +define void @test_store_atomic(i32* %Q) { +; CHECK-LABEL: @test_store_atomic( +; CHECK-NEXT: ret void +; + %a = load i32, i32* %Q + store atomic i32 %a, i32* %Q unordered, align 4 + ret void +} + +; We can NOT optimize release atomic loads or stores. +define void @test_store_atomic_release(i32* %Q) { +; CHECK-LABEL: @test_store_atomic_release( +; CHECK-NEXT: load +; CHECK-NEXT: store atomic +; CHECK-NEXT: ret void +; + %a = load i32, i32* %Q + store atomic i32 %a, i32* %Q release, align 4 + ret void +} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll @@ -25,15 +25,13 @@ define i32 @test3(i32* %g_addr) nounwind { ; CHECK-LABEL: @test3( -; CHECK-NEXT: [[G_VALUE:%.*]] = load i32, i32* [[G_ADDR:%.*]], align 4 ; CHECK-NEXT: store i32 -1, i32* @g, align 4 -; CHECK-NEXT: store i32 [[G_VALUE]], i32* [[G_ADDR]], align 4 +; CHECK-NEXT: store i32 1, i32* %g_addr, align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* @g, align 4 ; CHECK-NEXT: ret i32 [[TMP3]] ; - %g_value = load i32, i32* %g_addr, align 4 store i32 -1, i32* @g, align 4 - store i32 %g_value, i32* %g_addr, align 4 + store i32 1, i32* %g_addr, align 4 %tmp3 = load i32, i32* @g, align 4 ret i32 %tmp3 } @@ -268,8 +266,8 @@ ; CHECK: bb2: ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: store i32 [[V]], i32* [[P]], align 4 -; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: store i32 [[I]], i32* [[P]], align 4 +; CHECK-NEXT: ret i32 [[V]] ; entry: %v = load i32, i32* %p, align 4 @@ -282,8 +280,8 @@ bb2: br label %bb3 bb3: - store i32 %v, i32* %p, align 4 - ret i32 0 + store i32 %i, i32* %p, align 4 + ret i32 %v } ; Don't remove redundant store because of may-aliased store. @@ -298,8 +296,8 @@ ; CHECK-NEXT: store i32 [[I:%.*]], i32* [[P2:%.*]], align 4 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: store i32 [[V]], i32* [[P]], align 4 -; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: store i32 [[I]], i32* [[P]], align 4 +; CHECK-NEXT: ret i32 [[V]] ; entry: %v = load i32, i32* %p, align 4 @@ -311,8 +309,8 @@ store i32 %i, i32* %p2, align 4 br label %bb3 bb3: - store i32 %v, i32* %p, align 4 - ret i32 0 + store i32 %i, i32* %p, align 4 + ret i32 %v } declare void @unknown_func() @@ -329,8 +327,8 @@ ; CHECK-NEXT: call void @unknown_func() ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: store i32 [[V]], i32* [[P]], align 4 -; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: store i32 [[I]], i32* [[P]], align 4 +; CHECK-NEXT: ret i32 [[V]] ; entry: %v = load i32, i32* %p, align 4 @@ -342,8 +340,8 @@ call void @unknown_func() br label %bb3 bb3: - store i32 %v, i32* %p, align 4 - ret i32 0 + store i32 %i, i32* %p, align 4 + ret i32 %v } ; Don't remove redundant store in a loop with a may-alias store. @@ -353,22 +351,22 @@ ; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: store i32 [[V]], i32* [[P]], align 4 +; CHECK-NEXT: store i32 [[I]], i32* [[P]], align 4 ; CHECK-NEXT: call void @unknown_func() ; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: ret i32 [[V]] ; entry: %v = load i32, i32* %p, align 4 br label %bb1 bb1: - store i32 %v, i32* %p, align 4 + store i32 %i, i32* %p, align 4 ; Might read and overwrite value at %p call void @unknown_func() br i1 undef, label %bb1, label %bb2 bb2: - ret i32 0 + ret i32 %v } ; TODO @@ -579,3 +577,181 @@ store atomic i8 3, i8* %P2 unordered, align 4 ret void } + +; **** Noop load->store tests ************************************************** + +; We CAN optimize volatile loads. +define void @test_load_volatile(i32* %Q) { +; CHECK-LABEL: @test_load_volatile( +; CHECK-NEXT: [[A:%.*]] = load volatile i32, i32* [[Q:%.*]] +; CHECK-NEXT: store i32 [[A]], i32* [[Q]] +; CHECK-NEXT: ret void +; + %a = load volatile i32, i32* %Q + store i32 %a, i32* %Q + ret void +} + +; We can NOT optimize volatile stores. +define void @test_store_volatile(i32* %Q) { +; CHECK-LABEL: @test_store_volatile( +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[Q:%.*]] +; CHECK-NEXT: store volatile i32 [[A]] +; CHECK-NEXT: ret void +; + %a = load i32, i32* %Q + store volatile i32 %a, i32* %Q + ret void +} + +; PR2599 - load -> store to same address. +define void @test12({ i32, i32 }* %x) nounwind { +; CHECK-LABEL: @test12( +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[X:%.*]], i32 0, i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = sub i32 0, [[TMP8]] +; CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP7]], align 4 +; CHECK-NEXT: ret void +; + %tmp4 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 0 + %tmp5 = load i32, i32* %tmp4, align 4 + %tmp7 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 1 + %tmp8 = load i32, i32* %tmp7, align 4 + %tmp17 = sub i32 0, %tmp8 + store i32 %tmp5, i32* %tmp4, align 4 + store i32 %tmp17, i32* %tmp7, align 4 + ret void +} + +; Remove redundant store if loaded value is in another block. +define i32 @test26(i1 %c, i32* %p) { +; CHECK-LABEL: @test26( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: ret i32 0 +; +entry: + %v = load i32, i32* %p, align 4 + br i1 %c, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + store i32 %v, i32* %p, align 4 + br label %bb3 +bb3: + ret i32 0 +} + +; Remove redundant store if loaded value is in another block. +define i32 @test27(i1 %c, i32* %p) { +; CHECK-LABEL: @test27( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: ret i32 0 +; +entry: + %v = load i32, i32* %p, align 4 + br i1 %c, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + br label %bb3 +bb3: + store i32 %v, i32* %p, align 4 + ret i32 0 +} + +; Remove redundant store if loaded value is in another block inside a loop. +define i32 @test31(i1 %c, i32* %p, i32 %i) { +; CHECK-LABEL: @test31( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: ret i32 0 +; +entry: + %v = load i32, i32* %p, align 4 + br label %bb1 +bb1: + store i32 %v, i32* %p, align 4 + br i1 undef, label %bb1, label %bb2 +bb2: + ret i32 0 +} + +; Remove redundant store, which is in the lame loop as the load. +define i32 @test33(i1 %c, i32* %p, i32 %i) { +; CHECK-LABEL: @test33( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: call void @unknown_func() +; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB3:%.*]] +; CHECK: bb3: +; CHECK-NEXT: ret i32 0 +; +entry: + br label %bb1 +bb1: + %v = load i32, i32* %p, align 4 + br label %bb2 +bb2: + store i32 %v, i32* %p, align 4 + ; Might read and overwrite value at %p, but doesn't matter. + call void @unknown_func() + br i1 undef, label %bb1, label %bb3 +bb3: + ret i32 0 +} + +declare void @unkown_write(i32*) + +; We can't remove the "noop" store around an unkown write. +define void @test43(i32* %Q) { +; CHECK-LABEL: @test43( +; CHECK-NEXT: load +; CHECK-NEXT: call +; CHECK-NEXT: store +; CHECK-NEXT: ret void +; + %a = load i32, i32* %Q + call void @unkown_write(i32* %Q) + store i32 %a, i32* %Q + ret void +} + +; We CAN remove it when the unkown write comes AFTER. +define void @test44(i32* %Q) { +; CHECK-LABEL: @test44( +; CHECK-NEXT: call +; CHECK-NEXT: ret void + %a = load i32, i32* %Q + store i32 %a, i32* %Q + call void @unkown_write(i32* %Q) + ret void +} + +define void @test45(i32* %Q) { +; CHECK-LABEL: @test45( +; CHECK-NEXT: [[A:%.*]] = load +; CHECK-NEXT: store i32 [[A]] +; CHECK-NEXT: ret void + %a = load i32, i32* %Q + store i32 10, i32* %Q + store i32 %a, i32* %Q + ret void +}