Index: llvm/trunk/include/llvm/Analysis/MemoryLocation.h =================================================================== --- llvm/trunk/include/llvm/Analysis/MemoryLocation.h +++ llvm/trunk/include/llvm/Analysis/MemoryLocation.h @@ -27,6 +27,10 @@ class StoreInst; class MemTransferInst; class MemIntrinsic; +class AtomicMemTransferInst; +class AtomicMemIntrinsic; +class AnyMemTransferInst; +class AnyMemIntrinsic; class TargetLibraryInfo; /// Representation for a specific memory location. @@ -90,10 +94,14 @@ /// Return a location representing the source of a memory transfer. static MemoryLocation getForSource(const MemTransferInst *MTI); + static MemoryLocation getForSource(const AtomicMemTransferInst *MTI); + static MemoryLocation getForSource(const AnyMemTransferInst *MTI); /// Return a location representing the destination of a memory set or /// transfer. static MemoryLocation getForDest(const MemIntrinsic *MI); + static MemoryLocation getForDest(const AtomicMemIntrinsic *MI); + static MemoryLocation getForDest(const AnyMemIntrinsic *MI); /// Return a location representing a particular argument of a call. static MemoryLocation getForArgument(ImmutableCallSite CS, unsigned ArgIdx, Index: llvm/trunk/lib/Analysis/MemoryLocation.cpp =================================================================== --- llvm/trunk/lib/Analysis/MemoryLocation.cpp +++ llvm/trunk/lib/Analysis/MemoryLocation.cpp @@ -65,6 +65,14 @@ } MemoryLocation MemoryLocation::getForSource(const MemTransferInst *MTI) { + return getForSource(cast(MTI)); +} + +MemoryLocation MemoryLocation::getForSource(const AtomicMemTransferInst *MTI) { + return getForSource(cast(MTI)); +} + +MemoryLocation MemoryLocation::getForSource(const AnyMemTransferInst *MTI) { uint64_t Size = UnknownSize; if (ConstantInt *C = dyn_cast(MTI->getLength())) Size = C->getValue().getZExtValue(); @@ -77,17 +85,25 @@ return MemoryLocation(MTI->getRawSource(), Size, AATags); } -MemoryLocation MemoryLocation::getForDest(const MemIntrinsic *MTI) { +MemoryLocation MemoryLocation::getForDest(const MemIntrinsic *MI) { + return getForDest(cast(MI)); +} + +MemoryLocation MemoryLocation::getForDest(const AtomicMemIntrinsic *MI) { + return getForDest(cast(MI)); +} + +MemoryLocation MemoryLocation::getForDest(const AnyMemIntrinsic *MI) { uint64_t Size = UnknownSize; - if (ConstantInt *C = dyn_cast(MTI->getLength())) + if (ConstantInt *C = dyn_cast(MI->getLength())) Size = C->getValue().getZExtValue(); // memcpy/memmove can have AA tags. For memcpy, they apply // to both the source and the destination. AAMDNodes AATags; - MTI->getAAMetadata(AATags); + MI->getAAMetadata(AATags); - return MemoryLocation(MTI->getRawDest(), Size, AATags); + return MemoryLocation(MI->getRawDest(), Size, AATags); } MemoryLocation MemoryLocation::getForArgument(ImmutableCallSite CS, Index: llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -160,6 +160,9 @@ case Intrinsic::memset: case Intrinsic::memmove: case Intrinsic::memcpy: + case Intrinsic::memcpy_element_unordered_atomic: + case Intrinsic::memmove_element_unordered_atomic: + case Intrinsic::memset_element_unordered_atomic: case Intrinsic::init_trampoline: case Intrinsic::lifetime_end: return true; @@ -189,7 +192,7 @@ if (StoreInst *SI = dyn_cast(Inst)) return MemoryLocation::get(SI); - if (MemIntrinsic *MI = dyn_cast(Inst)) { + if (auto *MI = dyn_cast(Inst)) { // memcpy/memmove/memset. MemoryLocation Loc = MemoryLocation::getForDest(MI); return Loc; @@ -222,7 +225,7 @@ // The only instructions that both read and write are the mem transfer // instructions (memcpy/memmove). - if (MemTransferInst *MTI = dyn_cast(Inst)) + if (auto *MTI = dyn_cast(Inst)) return MemoryLocation::getForSource(MTI); return MemoryLocation(); } @@ -249,6 +252,10 @@ case Intrinsic::memcpy: // Don't remove volatile memory intrinsics. return !cast(II)->isVolatile(); + case Intrinsic::memcpy_element_unordered_atomic: + case Intrinsic::memmove_element_unordered_atomic: + case Intrinsic::memset_element_unordered_atomic: + return true; } } @@ -273,6 +280,7 @@ case Intrinsic::memcpy: // Do shorten memory intrinsics. // FIXME: Add memmove if it's also safe to transform. + // TODO: Add atomic memcpy/memset return true; } } @@ -287,6 +295,7 @@ static bool isShortenableAtTheBeginning(Instruction *I) { // FIXME: Handle only memset for now. Supporting memcpy/memmove should be // easily done by offsetting the source address. + // TODO: Handle atomic memory intrinsics IntrinsicInst *II = dyn_cast(I); return II && II->getIntrinsicID() == Intrinsic::memset; } @@ -534,7 +543,7 @@ if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false; - if (isa(Inst)) { + if (isa(Inst)) { // LLVM's memcpy overlap semantics are not fully fleshed out (see PR11763) // but in practice memcpy(A <- B) either means that A and B are disjoint or // are equal (i.e. there are not partial overlaps). Given that, if we have: @@ -856,8 +865,6 @@ LoadedLoc = MemoryLocation::get(L); } else if (VAArgInst *V = dyn_cast(BBI)) { LoadedLoc = MemoryLocation::get(V); - } else if (MemTransferInst *MTI = dyn_cast(BBI)) { - LoadedLoc = MemoryLocation::getForSource(MTI); } else if (!BBI->mayReadFromMemory()) { // Instruction doesn't read memory. Note that stores that weren't removed // above will hit this case. Index: llvm/trunk/test/Transforms/DeadStoreElimination/memintrinsics.ll =================================================================== --- llvm/trunk/test/Transforms/DeadStoreElimination/memintrinsics.ll +++ llvm/trunk/test/Transforms/DeadStoreElimination/memintrinsics.ll @@ -54,10 +54,6 @@ define void @test4() { ; CHECK-LABEL: @test4( -; CHECK-NEXT: [[A:%.*]] = alloca i16, i16 1024, align 2 -; CHECK-NEXT: [[B:%.*]] = alloca i16, i16 1024, align 2 -; CHECK-NEXT: store atomic i16 0, i16* [[B]] unordered, align 2 -; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i16.p0i16.i16(i16* align 2 [[A]], i16* align 2 [[B]], i16 1024, i32 2) ; CHECK-NEXT: ret void ; %A = alloca i16, i16 1024, align 2 @@ -73,10 +69,6 @@ define void @test5() { ; CHECK-LABEL: @test5( -; CHECK-NEXT: [[A:%.*]] = alloca i16, i16 1024, align 2 -; CHECK-NEXT: [[B:%.*]] = alloca i16, i16 1024, align 2 -; CHECK-NEXT: store atomic i16 0, i16* [[B]] unordered, align 2 -; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i16.p0i16.i16(i16* align 2 [[A]], i16* align 2 [[B]], i16 1024, i32 2) ; CHECK-NEXT: ret void ; %A = alloca i16, i16 1024, align 2 @@ -92,8 +84,6 @@ define void @test6() { ; CHECK-LABEL: @test6( -; CHECK-NEXT: [[A:%.*]] = alloca i16, i16 1024, align 2 -; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i16.i16(i16* align 2 [[A]], i8 0, i16 1024, i32 2) ; CHECK-NEXT: ret void ; %A = alloca i16, i16 1024, align 2 Index: llvm/trunk/test/Transforms/DeadStoreElimination/simple.ll =================================================================== --- llvm/trunk/test/Transforms/DeadStoreElimination/simple.ll +++ llvm/trunk/test/Transforms/DeadStoreElimination/simple.ll @@ -92,9 +92,8 @@ ; alias). define void @test6_atomic(i32* align 4 %p, i8* align 4 %q) { ; CHECK-LABEL: @test6_atomic( -; CHECK-NEXT: store atomic i32 10, i32* [[P:%.*]] unordered, align 4 ; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[Q:%.*]], i8 42, i64 900, i32 4) -; CHECK-NEXT: store atomic i32 30, i32* [[P]] unordered, align 4 +; CHECK-NEXT: store atomic i32 30, i32* [[P:%.*]] unordered, align 4 ; CHECK-NEXT: ret void ; store atomic i32 10, i32* %p unordered, align 4 ;; dead. @@ -121,9 +120,8 @@ ; alias). define void @test7_atomic(i32* align 4 %p, i8* align 4 %q, i8* noalias align 4 %r) { ; CHECK-LABEL: @test7_atomic( -; CHECK-NEXT: store atomic i32 10, i32* [[P:%.*]] unordered, align 4 ; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 [[Q:%.*]], i8* align 4 [[R:%.*]], i64 900, i32 4) -; CHECK-NEXT: store atomic i32 30, i32* [[P]] unordered, align 4 +; CHECK-NEXT: store atomic i32 30, i32* [[P:%.*]] unordered, align 4 ; CHECK-NEXT: ret void ; store atomic i32 10, i32* %p unordered, align 4 ;; dead. @@ -292,7 +290,6 @@ define void @test15_atomic(i8* %P, i8* %Q) nounwind ssp { ; CHECK-LABEL: @test15_atomic( ; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1) ; CHECK-NEXT: ret void ; tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) @@ -300,11 +297,10 @@ ret void } -; It would only be valid to remove the non-atomic memcpy +;; Fully dead overwrite of memcpy. define void @test15_atomic_weaker(i8* %P, i8* %Q) nounwind ssp { ; CHECK-LABEL: @test15_atomic_weaker( -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false) -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) ; CHECK-NEXT: ret void ; tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i1 false) @@ -312,11 +308,10 @@ ret void } -; It would only be valid to remove the non-atomic memcpy +;; Fully dead overwrite of memcpy. define void @test15_atomic_weaker_2(i8* %P, i8* %Q) nounwind ssp { ; CHECK-LABEL: @test15_atomic_weaker_2( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false) ; CHECK-NEXT: ret void ; tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1) @@ -338,8 +333,7 @@ ;; Full overwrite of smaller memcpy. define void @test16_atomic(i8* %P, i8* %Q) nounwind ssp { ; CHECK-LABEL: @test16_atomic( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 8, i32 1) -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) ; CHECK-NEXT: ret void ; tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i32 1) @@ -350,8 +344,7 @@ ;; Full overwrite of smaller memory where overwrite has stronger atomicity define void @test16_atomic_weaker(i8* %P, i8* %Q) nounwind ssp { ; CHECK-LABEL: @test16_atomic_weaker( -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 8, i1 false) -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) ; CHECK-NEXT: ret void ; tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i1 false) @@ -362,8 +355,7 @@ ;; Full overwrite of smaller memory where overwrite has weaker atomicity. define void @test16_atomic_weaker_2(i8* %P, i8* %Q) nounwind ssp { ; CHECK-LABEL: @test16_atomic_weaker_2( -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 8, i32 1) -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false) ; CHECK-NEXT: ret void ; tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 8, i32 1) @@ -385,8 +377,7 @@ ;; Overwrite of memset by memcpy. define void @test17_atomic(i8* %P, i8* noalias %Q) nounwind ssp { ; CHECK-LABEL: @test17_atomic( -; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 [[P:%.*]], i8 42, i64 8, i32 1) -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q:%.*]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) ; CHECK-NEXT: ret void ; tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i32 1) @@ -398,8 +389,7 @@ ;; remove the memset. define void @test17_atomic_weaker(i8* %P, i8* noalias %Q) nounwind ssp { ; CHECK-LABEL: @test17_atomic_weaker( -; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* align 1 [[P:%.*]], i8 42, i64 8, i1 false) -; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q:%.*]], i64 12, i32 1) +; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1) ; CHECK-NEXT: ret void ; tail call void @llvm.memset.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i1 false) @@ -411,8 +401,7 @@ ;; the memset. define void @test17_atomic_weaker_2(i8* %P, i8* noalias %Q) nounwind ssp { ; CHECK-LABEL: @test17_atomic_weaker_2( -; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 [[P:%.*]], i8 42, i64 8, i32 1) -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q:%.*]], i64 12, i1 false) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i1 false) ; CHECK-NEXT: ret void ; tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %P, i8 42, i64 8, i32 1)