diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -166,6 +166,12 @@ // These intrinsics don't really modify the memory, but returning Mod // will allow them to be handled conservatively. return ModRefInfo::Mod; + case Intrinsic::masked_load: + Loc = MemoryLocation::getForArgument(II, 0, TLI); + return ModRefInfo::Ref; + case Intrinsic::masked_store: + Loc = MemoryLocation::getForArgument(II, 1, TLI); + return ModRefInfo::Mod; default: break; } @@ -442,7 +448,9 @@ if (IntrinsicInst *II = dyn_cast(Inst)) { // If we reach a lifetime begin or end marker, then the query ends here // because the value is undefined. - if (II->getIntrinsicID() == Intrinsic::lifetime_start) { + Intrinsic::ID ID = II->getIntrinsicID(); + switch (ID) { + case Intrinsic::lifetime_start: // FIXME: This only considers queries directly on the invariant-tagged // pointer, not on query pointers that are indexed off of them. It'd // be nice to handle that at some point (the right approach is to use @@ -450,6 +458,19 @@ if (BatchAA.isMustAlias(MemoryLocation(II->getArgOperand(1)), MemLoc)) return MemDepResult::getDef(II); continue; + case Intrinsic::masked_load: + case Intrinsic::masked_store: { + MemoryLocation Loc; + /*ModRefInfo MR =*/ GetLocation(II, Loc, TLI); + AliasResult R = BatchAA.alias(Loc, MemLoc); + if (R == NoAlias) + continue; + if (R == MustAlias) + return MemDepResult::getDef(II); + if (ID == Intrinsic::masked_load) + continue; + return MemDepResult::getClobber(II); + } } } diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp --- a/llvm/lib/Analysis/MemoryLocation.cpp +++ b/llvm/lib/Analysis/MemoryLocation.cpp @@ -176,6 +176,21 @@ cast(II->getArgOperand(0))->getZExtValue()), AATags); + case Intrinsic::masked_load: + assert(ArgIdx == 0 && "Invalid argument index"); + return MemoryLocation( + Arg, + LocationSize::upperBound(DL.getTypeStoreSize(II->getType())), + AATags); + + case Intrinsic::masked_store: + assert(ArgIdx == 1 && "Invalid argument index"); + return MemoryLocation( + Arg, + LocationSize::upperBound( + DL.getTypeStoreSize(II->getArgOperand(0)->getType())), + AATags); + case Intrinsic::invariant_end: // The first argument to an invariant.end is a "descriptor" type (e.g. a // pointer to a empty struct) which is never actually dereferenced. diff --git a/llvm/test/Transforms/GVN/masked-load-store.ll b/llvm/test/Transforms/GVN/masked-load-store.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/GVN/masked-load-store.ll @@ -0,0 +1,35 @@ +; RUN: opt -gvn -S < %s | FileCheck %s + +; Check that in both cases the second load is recognized as redundant +; and is removed. + +; CHECK-LABEL: define <128 x i8> @f0 +; CHECK: call <128 x i8> @llvm.masked.load.v128i8.p0v128i8 +; CHECK-NOT: llvm.masked.load +define <128 x i8> @f0(<128 x i8>* %a0, <128 x i8> %a1, <128 x i8> %a2) { + %v0 = icmp eq <128 x i8> %a1, %a2 + %v1 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + %v2 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + %v3 = add <128 x i8> %v1, %v2 + ret <128 x i8> %v3 +} + +; CHECK-LABEL: define <128 x i8> @f1 +; CHECK: call <128 x i8> @llvm.masked.load.v128i8.p0v128i8 +; CHECK: call void @llvm.masked.store.v128i8.p0v128i8 +; CHECK-NOT: llvm.masked.load +define <128 x i8> @f1(<128 x i8>* %a0, <128 x i8> %a1, <128 x i8> %a2) { + %v0 = icmp eq <128 x i8> %a1, %a2 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a0, i32 1 + %v2 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> %a2, <128 x i8>* %v1, i32 4, <128 x i1> %v0) + %v3 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + %v4 = add <128 x i8> %v2, %v3 + ret <128 x i8> %v4 +} + +; Stop scanning here. +; CHECK-LABEL: declare +declare <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>*, i32, <128 x i1>, <128 x i8>) +declare void @llvm.masked.store.v128i8.p0v128i8(<128 x i8>, <128 x i8>*, i32, <128 x i1>) +