diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1836,6 +1836,38 @@ if (State.SkipStores.count(KillingDef)) continue; Instruction *SI = KillingDef->getMemoryInst(); + + // Check if we're storing a value that we just loaded. + if (auto *Load = dyn_cast(SI->getOperand(0))) { + // Make sure no writes happen before SI. + bool OnlyWriteIsSI = llvm::all_of( + Load->getPointerOperand()->users(), [&SI, &DT](User *MemUser) { + // Special case: load volatile. + if (isa(MemUser)) + return true; + auto *UseI = dyn_cast(MemUser); + if (!UseI) + return false; + // Ignore when the user is the store. + if (UseI == SI) + return true; + // Make sure either the user doesn't write to memory, or it comes + // after this store. + return !UseI->mayWriteToMemory() || DT.dominates(SI, UseI); + }); + if (OnlyWriteIsSI) { + if (Load->getPointerOperand() == SI->getOperand(1)) { + State.deleteDeadInstruction(SI); + LLVM_DEBUG(dbgs() + << "DSE: Remove Dead Store:\n DEAD: " << *SI << '\n'); + break; + } + LLVM_DEBUG(dbgs() << " skip, load source != store source\n"); + } else { + LLVM_DEBUG(dbgs() << " skip, memory already written to\n"); + } + } + auto MaybeSILoc = State.getLocForWriteEx(SI); if (!MaybeSILoc) { LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for " diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll @@ -98,3 +98,37 @@ store i32 1, i32* @x ret i32 %x } + +; **** Noop load->store tests ************************************************** + +; We can optimize unordered atomic loads or stores. +define void @test_load_atomic(i32* %Q) { +; CHECK-LABEL: @test_load_atomic( +; CHECK-NEXT: ret void +; + %a = load atomic i32, i32* %Q unordered, align 4 + store atomic i32 %a, i32* %Q unordered, align 4 + ret void +} + +; We can optimize unordered atomic loads or stores. +define void @test_store_atomic(i32* %Q) { +; CHECK-LABEL: @test_store_atomic( +; CHECK-NEXT: ret void +; + %a = load i32, i32* %Q + store atomic i32 %a, i32* %Q unordered, align 4 + ret void +} + +; We can NOT optimize release atomic loads or stores. +define void @test_store_atomic_release(i32* %Q) { +; CHECK-LABEL: @test_store_atomic_release( +; CHECK-NEXT: load +; CHECK-NEXT: store atomic +; CHECK-NEXT: ret void +; + %a = load i32, i32* %Q + store atomic i32 %a, i32* %Q release, align 4 + ret void +} diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll --- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll @@ -25,15 +25,13 @@ define i32 @test3(i32* %g_addr) nounwind { ; CHECK-LABEL: @test3( -; CHECK-NEXT: [[G_VALUE:%.*]] = load i32, i32* [[G_ADDR:%.*]], align 4 ; CHECK-NEXT: store i32 -1, i32* @g, align 4 -; CHECK-NEXT: store i32 [[G_VALUE]], i32* [[G_ADDR]], align 4 +; CHECK-NEXT: store i32 1, i32* %g_addr, align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* @g, align 4 ; CHECK-NEXT: ret i32 [[TMP3]] ; - %g_value = load i32, i32* %g_addr, align 4 store i32 -1, i32* @g, align 4 - store i32 %g_value, i32* %g_addr, align 4 + store i32 1, i32* %g_addr, align 4 %tmp3 = load i32, i32* @g, align 4 ret i32 %tmp3 } @@ -242,10 +240,8 @@ define i8* @test25(i8* %p) nounwind { ; CHECK-LABEL: @test25( ; CHECK-NEXT: [[P_4:%.*]] = getelementptr i8, i8* [[P:%.*]], i64 4 -; CHECK-NEXT: [[TMP:%.*]] = load i8, i8* [[P_4]], align 1 ; CHECK-NEXT: store i8 0, i8* [[P_4]], align 1 ; CHECK-NEXT: [[Q:%.*]] = call i8* @strdup(i8* [[P]]) #6 -; CHECK-NEXT: store i8 [[TMP]], i8* [[P_4]], align 1 ; CHECK-NEXT: ret i8* [[Q]] ; %p.4 = getelementptr i8, i8* %p, i64 4 @@ -268,8 +264,8 @@ ; CHECK: bb2: ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: store i32 [[V]], i32* [[P]], align 4 -; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: store i32 [[I]], i32* [[P]], align 4 +; CHECK-NEXT: ret i32 [[V]] ; entry: %v = load i32, i32* %p, align 4 @@ -282,8 +278,8 @@ bb2: br label %bb3 bb3: - store i32 %v, i32* %p, align 4 - ret i32 0 + store i32 %i, i32* %p, align 4 + ret i32 %v } ; Don't remove redundant store because of may-aliased store. @@ -298,8 +294,8 @@ ; CHECK-NEXT: store i32 [[I:%.*]], i32* [[P2:%.*]], align 4 ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: store i32 [[V]], i32* [[P]], align 4 -; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: store i32 [[I]], i32* [[P]], align 4 +; CHECK-NEXT: ret i32 [[V]] ; entry: %v = load i32, i32* %p, align 4 @@ -311,8 +307,8 @@ store i32 %i, i32* %p2, align 4 br label %bb3 bb3: - store i32 %v, i32* %p, align 4 - ret i32 0 + store i32 %i, i32* %p, align 4 + ret i32 %v } declare void @unknown_func() @@ -329,8 +325,8 @@ ; CHECK-NEXT: call void @unknown_func() ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: -; CHECK-NEXT: store i32 [[V]], i32* [[P]], align 4 -; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: store i32 [[I]], i32* [[P]], align 4 +; CHECK-NEXT: ret i32 [[V]] ; entry: %v = load i32, i32* %p, align 4 @@ -342,8 +338,8 @@ call void @unknown_func() br label %bb3 bb3: - store i32 %v, i32* %p, align 4 - ret i32 0 + store i32 %i, i32* %p, align 4 + ret i32 %v } ; Don't remove redundant store in a loop with a may-alias store. @@ -353,22 +349,22 @@ ; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: store i32 [[V]], i32* [[P]], align 4 +; CHECK-NEXT: store i32 [[I]], i32* [[P]], align 4 ; CHECK-NEXT: call void @unknown_func() ; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: ret i32 [[V]] ; entry: %v = load i32, i32* %p, align 4 br label %bb1 bb1: - store i32 %v, i32* %p, align 4 + store i32 %i, i32* %p, align 4 ; Might read and overwrite value at %p call void @unknown_func() br i1 undef, label %bb1, label %bb2 bb2: - ret i32 0 + ret i32 %v } ; TODO @@ -579,3 +575,180 @@ store atomic i8 3, i8* %P2 unordered, align 4 ret void } + +; **** Noop load->store tests ************************************************** + +; We CAN optimize volatile loads. +define void @test_load_volatile(i32* %Q) { +; CHECK-LABEL: @test_load_volatile( +; CHECK-NEXT: [[A:%.*]] = load volatile i32, i32* [[Q:%.*]] +; CHECK-NEXT: ret void +; + %a = load volatile i32, i32* %Q + store i32 %a, i32* %Q + ret void +} + +; We can NOT optimize volatile stores. +define void @test_store_volatile(i32* %Q) { +; CHECK-LABEL: @test_store_volatile( +; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[Q:%.*]] +; CHECK-NEXT: store volatile i32 [[A]] +; CHECK-NEXT: ret void +; + %a = load i32, i32* %Q + store volatile i32 %a, i32* %Q + ret void +} + +; PR2599 - load -> store to same address. +define void @test12({ i32, i32 }* %x) nounwind { +; CHECK-LABEL: @test12( +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[X:%.*]], i32 0, i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = sub i32 0, [[TMP8]] +; CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP7]], align 4 +; CHECK-NEXT: ret void +; + %tmp4 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 0 + %tmp5 = load i32, i32* %tmp4, align 4 + %tmp7 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 1 + %tmp8 = load i32, i32* %tmp7, align 4 + %tmp17 = sub i32 0, %tmp8 + store i32 %tmp5, i32* %tmp4, align 4 + store i32 %tmp17, i32* %tmp7, align 4 + ret void +} + +; Remove redundant store if loaded value is in another block. +define i32 @test26(i1 %c, i32* %p) { +; CHECK-LABEL: @test26( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: ret i32 0 +; +entry: + %v = load i32, i32* %p, align 4 + br i1 %c, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + store i32 %v, i32* %p, align 4 + br label %bb3 +bb3: + ret i32 0 +} + +; Remove redundant store if loaded value is in another block. +define i32 @test27(i1 %c, i32* %p) { +; CHECK-LABEL: @test27( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: ret i32 0 +; +entry: + %v = load i32, i32* %p, align 4 + br i1 %c, label %bb1, label %bb2 +bb1: + br label %bb3 +bb2: + br label %bb3 +bb3: + store i32 %v, i32* %p, align 4 + ret i32 0 +} + +; Remove redundant store if loaded value is in another block inside a loop. +define i32 @test31(i1 %c, i32* %p, i32 %i) { +; CHECK-LABEL: @test31( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: ret i32 0 +; +entry: + %v = load i32, i32* %p, align 4 + br label %bb1 +bb1: + store i32 %v, i32* %p, align 4 + br i1 undef, label %bb1, label %bb2 +bb2: + ret i32 0 +} + +; Remove redundant store, which is in the lame loop as the load. +define i32 @test33(i1 %c, i32* %p, i32 %i) { +; CHECK-LABEL: @test33( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: call void @unknown_func() +; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB3:%.*]] +; CHECK: bb3: +; CHECK-NEXT: ret i32 0 +; +entry: + br label %bb1 +bb1: + %v = load i32, i32* %p, align 4 + br label %bb2 +bb2: + store i32 %v, i32* %p, align 4 + ; Might read and overwrite value at %p, but doesn't matter. + call void @unknown_func() + br i1 undef, label %bb1, label %bb3 +bb3: + ret i32 0 +} + +declare void @unkown_write(i32*) + +; We can't remove the "noop" store around an unkown write. +define void @test43(i32* %Q) { +; CHECK-LABEL: @test43( +; CHECK-NEXT: load +; CHECK-NEXT: call +; CHECK-NEXT: store +; CHECK-NEXT: ret void +; + %a = load i32, i32* %Q + call void @unkown_write(i32* %Q) + store i32 %a, i32* %Q + ret void +} + +; We CAN remove it when the unkown write comes AFTER. +define void @test44(i32* %Q) { +; CHECK-LABEL: @test44( +; CHECK-NEXT: call +; CHECK-NEXT: ret void + %a = load i32, i32* %Q + store i32 %a, i32* %Q + call void @unkown_write(i32* %Q) + ret void +} + +define void @test45(i32* %Q) { +; CHECK-LABEL: @test43( +; CHECK-NEXT: [[A:%.*]] = load +; CHECK-NEXT: store i32 [[A]] +; CHECK-NEXT: ret void + %a = load i32, i32* %Q + store i32 10, i32* %Q + store i32 %a, i32* %Q + ret void +}