Index: llvm/include/llvm/IR/IRBuilder.h =================================================================== --- llvm/include/llvm/IR/IRBuilder.h +++ llvm/include/llvm/IR/IRBuilder.h @@ -578,10 +578,14 @@ bool isVolatile = false, MDNode *TBAATag = nullptr, MDNode *TBAAStructTag = nullptr, MDNode *ScopeTag = nullptr, - MDNode *NoAliasTag = nullptr); + MDNode *NoAliasTag = nullptr, + bool Inline = false); CallInst *CreateMemCpyInline(Value *Dst, MaybeAlign DstAlign, Value *Src, - MaybeAlign SrcAlign, Value *Size); + MaybeAlign SrcAlign, Value *Size) { + return CreateMemCpy(Dst, DstAlign, Src, SrcAlign, Size, false, + nullptr, nullptr, nullptr, nullptr, true); + } /// Create and insert an element unordered-atomic memcpy between the /// specified pointers. Index: llvm/include/llvm/IR/IntrinsicInst.h =================================================================== --- llvm/include/llvm/IR/IntrinsicInst.h +++ llvm/include/llvm/IR/IntrinsicInst.h @@ -678,7 +678,8 @@ public: // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const IntrinsicInst *I) { - return I->getIntrinsicID() == Intrinsic::memcpy; + auto IID = I->getIntrinsicID(); + return IID == Intrinsic::memcpy || IID == Intrinsic::memcpy_inline; } static bool classof(const Value *V) { return isa(V) && classof(cast(V)); @@ -697,21 +698,6 @@ } }; -/// This class wraps the llvm.memcpy.inline intrinsic. -class MemCpyInlineInst : public MemTransferInst { -public: - ConstantInt *getLength() const { - return cast(MemTransferInst::getLength()); - } - // Methods for support type inquiry through isa, cast, and dyn_cast: - static bool classof(const IntrinsicInst *I) { - return I->getIntrinsicID() == Intrinsic::memcpy_inline; - } - static bool classof(const Value *V) { - return isa(V) && classof(cast(V)); - } -}; - // The common base class for any memset/memmove/memcpy intrinsics; // whether they be atomic or non-atomic. // i.e. llvm.element.unordered.atomic.memset/memcpy/memmove Index: llvm/lib/Analysis/Lint.cpp =================================================================== --- llvm/lib/Analysis/Lint.cpp +++ llvm/lib/Analysis/Lint.cpp @@ -316,9 +316,9 @@ // TODO: Check more intrinsics - case Intrinsic::memcpy: { + case Intrinsic::memcpy: + case Intrinsic::memcpy_inline: { MemCpyInst *MCI = cast(&I); - // TODO: If the size is known, use it. visitMemoryReference(I, MCI->getDest(), MemoryLocation::UnknownSize, MCI->getDestAlign(), nullptr, MemRef::Write); visitMemoryReference(I, MCI->getSource(), MemoryLocation::UnknownSize, @@ -338,22 +338,6 @@ "Undefined behavior: memcpy source and destination overlap", &I); break; } - case Intrinsic::memcpy_inline: { - MemCpyInlineInst *MCII = cast(&I); - const uint64_t Size = MCII->getLength()->getValue().getLimitedValue(); - visitMemoryReference(I, MCII->getDest(), Size, MCII->getDestAlign(), - nullptr, MemRef::Write); - visitMemoryReference(I, MCII->getSource(), Size, MCII->getSourceAlign(), - nullptr, MemRef::Read); - - // Check that the memcpy arguments don't overlap. The AliasAnalysis API - // isn't expressive enough for what we really want to do. Known partial - // overlap is not distinguished from the case where nothing is known. - const LocationSize LS = LocationSize::precise(Size); - Assert(AA->alias(MCII->getSource(), LS, MCII->getDest(), LS) != MustAlias, - "Undefined behavior: memcpy source and destination overlap", &I); - break; - } case Intrinsic::memmove: { MemMoveInst *MMI = cast(&I); // TODO: If the size is known, use it. Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5716,7 +5716,7 @@ return; } case Intrinsic::memcpy_inline: { - const auto &MCI = cast(I); + const auto &MCI = cast(I); SDValue Dst = getValue(I.getArgOperand(0)); SDValue Src = getValue(I.getArgOperand(1)); SDValue Size = getValue(I.getArgOperand(2)); Index: llvm/lib/IR/IRBuilder.cpp =================================================================== --- llvm/lib/IR/IRBuilder.cpp +++ llvm/lib/IR/IRBuilder.cpp @@ -139,14 +139,17 @@ Value *Src, MaybeAlign SrcAlign, Value *Size, bool isVolatile, MDNode *TBAATag, MDNode *TBAAStructTag, - MDNode *ScopeTag, MDNode *NoAliasTag) { + MDNode *ScopeTag, MDNode *NoAliasTag, + bool Inline) { Dst = getCastedInt8PtrValue(Dst); Src = getCastedInt8PtrValue(Src); Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)}; Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() }; Module *M = BB->getParent()->getParent(); - Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys); + + Intrinsic::ID MemCpy = Inline ? Intrinsic::memcpy_inline : Intrinsic::memcpy; + Function *TheFn = Intrinsic::getDeclaration(M, MemCpy, Tys); CallInst *CI = createCallHelper(TheFn, Ops, this); @@ -173,30 +176,6 @@ return CI; } -CallInst *IRBuilderBase::CreateMemCpyInline(Value *Dst, MaybeAlign DstAlign, - Value *Src, MaybeAlign SrcAlign, - Value *Size) { - Dst = getCastedInt8PtrValue(Dst); - Src = getCastedInt8PtrValue(Src); - Value *IsVolatile = getInt1(false); - - Value *Ops[] = {Dst, Src, Size, IsVolatile}; - Type *Tys[] = {Dst->getType(), Src->getType(), Size->getType()}; - Function *F = BB->getParent(); - Module *M = F->getParent(); - Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy_inline, Tys); - - CallInst *CI = createCallHelper(TheFn, Ops, this); - - auto *MCI = cast(CI); - if (DstAlign) - MCI->setDestAlignment(*DstAlign); - if (SrcAlign) - MCI->setSourceAlignment(*SrcAlign); - - return CI; -} - CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemCpy( Value *Dst, Align DstAlign, Value *Src, Align SrcAlign, Value *Size, uint32_t ElementSize, MDNode *TBAATag, MDNode *TBAAStructTag, Index: llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp =================================================================== --- llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -930,7 +930,8 @@ B.CreateMemCpy(Dest, MTI->getDestAlign(), Src, MTI->getSourceAlign(), MTI->getLength(), false, // isVolatile - TBAA, TBAAStruct, ScopeMD, NoAliasMD); + TBAA, TBAAStruct, ScopeMD, NoAliasMD, + MTI->getIntrinsicID() == Intrinsic::memcpy_inline); } else { assert(isa(MTI)); B.CreateMemMove(Dest, MTI->getDestAlign(), Src, MTI->getSourceAlign(), Index: llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll =================================================================== --- llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll +++ llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll @@ -116,10 +116,27 @@ ret void } +; CHECK-LABEL: @memcpy_inline_flat_to_flat_replace_src_with_group( +; CHECK: call void @llvm.memcpy.inline.p0i8.p3i8.i64(i8* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 24, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 +define amdgpu_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group(i8* %dest, i8 addrspace(3)* %src.group.ptr) #0 { + %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8* + call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 4 %dest, i8* align 4 %cast.src, i64 24, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 + ret void +} + +; CHECK-LABEL: @memcpy_inline_flat_to_flat_replace_dest_with_group( +; CHECK: call void @llvm.memcpy.inline.p3i8.p0i8.i64(i8 addrspace(3)* align 4 %dest.group.ptr, i8* align 4 %src.ptr, i64 24, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 +define amdgpu_kernel void @memcpy_inline_flat_to_flat_replace_dest_with_group(i8 addrspace(3)* %dest.group.ptr, i8* %src.ptr, i64 %size) #0 { + %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8* + call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 4 %cast.dest, i8* align 4 %src.ptr, i64 24, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 + ret void +} + declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1 declare void @llvm.memcpy.p0i8.p3i8.i32(i8* nocapture writeonly, i8 addrspace(3)* nocapture readonly, i32, i1) #1 declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1 +declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1 attributes #0 = { nounwind } attributes #1 = { argmemonly nounwind }