Index: llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp +++ llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp @@ -562,6 +562,19 @@ continue; } + // Skip invariant.start intrinsics since they only read memory, and we can + // forward values across it. Also, we dont need to consume the last store + // since the semantics of invariant.start allow us to perform DSE of the + // last store, if there was a store following invariant.start. Consider: + // + // store 30, i8* p + // invariant.start(p) + // store 40, i8* p + // We can DSE the store to 30, since the store 40 to invariant location p + // causes undefined behaviour. + if (match(Inst, m_Intrinsic())) + continue; + if (match(Inst, m_Intrinsic())) { if (auto *CondI = dyn_cast(cast(Inst)->getArgOperand(0))) { Index: llvm/trunk/test/Transforms/EarlyCSE/invariant.start.ll =================================================================== --- llvm/trunk/test/Transforms/EarlyCSE/invariant.start.ll +++ llvm/trunk/test/Transforms/EarlyCSE/invariant.start.ll @@ -0,0 +1,71 @@ +; RUN: opt < %s -S -early-cse | FileCheck %s +; RUN: opt < %s -S -passes=early-cse | FileCheck %s + +declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly +declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) nounwind + +; Check that we do load-load forwarding over invariant.start, since it does not +; clobber memory +define i8 @test1(i8 *%P) { + ; CHECK-LABEL: @test1( + ; CHECK-NEXT: %V1 = load i8, i8* %P + ; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P) + ; CHECK-NEXT: ret i8 0 + + + %V1 = load i8, i8* %P + %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P) + %V2 = load i8, i8* %P + %Diff = sub i8 %V1, %V2 + ret i8 %Diff +} + + +; Trivial Store->load forwarding over invariant.start +define i8 @test2(i8 *%P) { + ; CHECK-LABEL: @test2( + ; CHECK-NEXT: store i8 42, i8* %P + ; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P) + ; CHECK-NEXT: ret i8 42 + + + store i8 42, i8* %P + %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P) + %V1 = load i8, i8* %P + ret i8 %V1 +} + +; We can DSE over invariant.start calls, since the first store to +; %P is valid, and the second store is actually unreachable based on semantics +; of invariant.start. +define void @test3(i8* %P) { + +; CHECK-LABEL: @test3( +; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P) +; CHECK-NEXT: store i8 60, i8* %P + + + store i8 50, i8* %P + %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P) + store i8 60, i8* %P + ret void +} + + +; FIXME: Now the first store can actually be eliminated, since there is no read within +; the invariant region, between start and end. +define void @test4(i8* %P) { + +; CHECK-LABEL: @test4( +; CHECK-NEXT: store i8 50, i8* %P +; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P) +; CHECK-NEXT: call void @llvm.invariant.end.p0i8({}* %i, i64 1, i8* %P) +; CHECK-NEXT: store i8 60, i8* %P + + + store i8 50, i8* %P + %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %P) + call void @llvm.invariant.end.p0i8({}* %i, i64 1, i8* %P) + store i8 60, i8* %P + ret void +}