diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -11695,6 +11695,65 @@ If "len" is 0, the pointers may be NULL or dangling. However, they must still be appropriately aligned. +.. _int_memcpy_inline: + +'``llvm.memcpy.inline``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.memcpy.inline`` on any +integer bit width and for different address spaces. Not all targets +support all bit widths however. + +:: + + declare void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* , i8* , + i32 , i1 ) + declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* , i8* , + i64 , i1 ) + +Overview: +""""""""" + +The '``llvm.memcpy.inline.*``' intrinsics copy a block of memory from the +source location to the destination location and guarantees that no external +functions are called. + +Note that, unlike the standard libc function, the ``llvm.memcpy.inline.*`` +intrinsics do not return a value, takes extra isvolatile +arguments and the pointers can be in specified address spaces. + +Arguments: +"""""""""" + +The first argument is a pointer to the destination, the second is a +pointer to the source. The third argument is a constant integer argument +specifying the number of bytes to copy, and the fourth is a +boolean indicating a volatile access. + +The :ref:`align ` parameter attribute can be provided +for the first and second arguments. + +If the ``isvolatile`` parameter is ``true``, the ``llvm.memcpy.inline`` call is +a :ref:`volatile operation `. The detailed access behavior is not +very cleanly specified and it is unwise to depend on it. + +Semantics: +"""""""""" + +The '``llvm.memcpy.inline.*``' intrinsics copy a block of memory from the +source location to the destination location, which are not allowed to +overlap. It copies "len" bytes of memory over. If the argument is known +to be aligned to some boundary, this can be specified as an attribute on +the argument. + +If "len" is 0, the pointers may be NULL or dangling. However, they must still +be appropriately aligned. + +The generated code is guaranteed not to call any external functions. + .. _int_memmove: '``llvm.memmove``' Intrinsic diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -582,6 +582,7 @@ case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: + case Intrinsic::memcpy_inline: return true; default: return false; } @@ -608,8 +609,14 @@ public: // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const IntrinsicInst *I) { - return I->getIntrinsicID() == Intrinsic::memcpy || - I->getIntrinsicID() == Intrinsic::memmove; + switch (I->getIntrinsicID()) { + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memcpy_inline: + return true; + default: + return false; + } } static bool classof(const Value *V) { return isa(V) && classof(cast(V)); @@ -640,6 +647,21 @@ } }; + /// This class wraps the llvm.memcpy.inline intrinsic. + class MemCpyInlineInst : public MemTransferInst { + public: + ConstantInt *getLength() const { + return cast(MemTransferInst::getLength()); + } + // Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::memcpy_inline; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + }; + // The common base class for any memset/memmove/memcpy intrinsics; // whether they be atomic or non-atomic. // i.e. llvm.element.unordered.atomic.memset/memcpy/memmove diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -511,6 +511,17 @@ llvm_i1_ty], [IntrArgMemOnly, IntrWillReturn, NoCapture<0>, NoCapture<1>, NoAlias<0>, NoAlias<1>, WriteOnly<0>, ReadOnly<1>, ImmArg<3>]>; + +// Memcpy semantic that is guaranteed to be inlined. +// In particular this means that the generated code is not allowed to call any +// external function. +// The third argument (specifying the size) must be a constant. +def int_memcpy_inline + : Intrinsic<[], + [ llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty ], + [ IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>, ReadOnly<1>, + ImmArg<2>, ImmArg<3> ]>; + def int_memmove : Intrinsic<[], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty], diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp --- a/llvm/lib/Analysis/Lint.cpp +++ b/llvm/lib/Analysis/Lint.cpp @@ -345,6 +345,24 @@ "Undefined behavior: memcpy source and destination overlap", &I); break; } + case Intrinsic::memcpy_inline: { + MemCpyInlineInst *MCII = cast(&I); + const APInt &APLen = MCII->getLength()->getValue(); + assert(APLen.isIntN(64) && "@llvm.memcpy.inline size must be uint64"); + const uint64_t Size = APLen.getZExtValue(); + visitMemoryReference(I, MCII->getDest(), Size, MCII->getDestAlignment(), + nullptr, MemRef::Write); + visitMemoryReference(I, MCII->getSource(), Size, + MCII->getSourceAlignment(), nullptr, MemRef::Read); + + // Check that the memcpy arguments don't overlap. The AliasAnalysis API + // isn't expressive enough for what we really want to do. Known partial + // overlap is not distinguished from the case where nothing is known. + const LocationSize LS = LocationSize::precise(Size); + Assert(AA->alias(MCII->getSource(), LS, MCII->getDest(), LS) != MustAlias, + "Undefined behavior: memcpy source and destination overlap", &I); + break; + } case Intrinsic::memmove: { MemMoveInst *MMI = cast(&I); // TODO: If the size is known, use it. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5872,12 +5872,33 @@ // node. SDValue Root = isVol ? getRoot() : getMemoryRoot(); SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Align, isVol, - false, isTC, + /* AlwaysInline */ false, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); updateDAGForMaybeTailCall(MC); return; } + case Intrinsic::memcpy_inline: { + const auto &MCI = cast(I); + SDValue Dst = getValue(I.getArgOperand(0)); + SDValue Src = getValue(I.getArgOperand(1)); + SDValue Size = getValue(I.getArgOperand(2)); + assert(isa(Size) && "memcpy_inline needs constant size"); + // @llvm.memcpy.inline defines 0 and 1 to both mean no alignment. + Align DstAlign = MCI.getDestAlign().valueOrOne(); + Align SrcAlign = MCI.getSourceAlign().valueOrOne(); + Align Alignment = commonAlignment(DstAlign, SrcAlign); + bool isVol = MCI.isVolatile(); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + // FIXME: Support passing different dest/src alignments to the memcpy DAG + // node. + SDValue MC = DAG.getMemcpy( + getRoot(), sdl, Dst, Src, Size, Alignment.value(), isVol, + /* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1))); + updateDAGForMaybeTailCall(MC); + return; + } case Intrinsic::memset: { const auto &MSI = cast(I); SDValue Op1 = getValue(I.getArgOperand(0)); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4338,6 +4338,7 @@ visitDbgLabelIntrinsic("label", cast(Call)); break; case Intrinsic::memcpy: + case Intrinsic::memcpy_inline: case Intrinsic::memmove: case Intrinsic::memset: { const auto *MI = cast(&Call); diff --git a/llvm/test/CodeGen/X86/memcpy-inline.ll b/llvm/test/CodeGen/X86/memcpy-inline.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/memcpy-inline.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 | FileCheck %s + +; NOTE: This fails to compile on platform without memcpy +; XFAIL: llc < %s -mtriple=r600-unknown-linux-gnu + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind +declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind + +define void @test1(i8* %a, i8* %b) nounwind { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rsi), %rax +; CHECK-NEXT: movq %rax, (%rdi) +; CHECK-NEXT: retq + tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %a, i8* %b, i64 8, i1 0 ) + ret void +} + +define void @regular_memcpy_calls_external_function(i8* %a, i8* %b) nounwind { +; CHECK-LABEL: regular_memcpy_calls_external_function: +; CHECK: # %bb.0: +; CHECK-NEXT: movl $128, %edx +; CHECK-NEXT: jmp memcpy # TAILCALL + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 128, i1 0 ) + ret void +} + +define void @inlined_copy_doesnt_call_external_function(i8* %a, i8* %b) nounwind { +; CHECK-LABEL: inlined_copy_doesnt_call_external_function: +; CHECK: # %bb.0: +; CHECK-NEXT: movl $128, %ecx +; CHECK-NEXT: rep;movsb (%rsi), %es:(%rdi) +; CHECK-NEXT: retq + tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %a, i8* %b, i64 128, i1 0 ) + ret void +} diff --git a/llvm/test/Other/lint.ll b/llvm/test/Other/lint.ll --- a/llvm/test/Other/lint.ll +++ b/llvm/test/Other/lint.ll @@ -4,6 +4,7 @@ declare fastcc void @bar() declare void @llvm.stackrestore(i8*) declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind +declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind declare void @has_sret(i8* sret %p) declare void @has_noaliases(i32* noalias %p, i32* %q) declare void @one_arg(i32) @@ -80,6 +81,8 @@ ; CHECK: Write to read-only memory call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (i32* @CG to i8*), i8* bitcast (i32* @CG2 to i8*), i64 1, i1 0) +; CHECK: Write to read-only memory +call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* bitcast (i32* @CG to i8*), i8* bitcast (i32* @CG2 to i8*), i64 1, i1 0) ; CHECK: Unusual: noalias argument aliases another argument call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (i32* @CG to i8*), i8* bitcast (i32* @CG to i8*), i64 1, i1 0) @@ -189,3 +192,11 @@ ; CHECK: Undefined behavior: indirectbr with no destinations indirectbr i8* null, [] } + +define i32 @memcpy_inline_same_address() noreturn { + %buf = alloca i64, align 1 + %ptr = bitcast i64* %buf to i8* + ; CHECK: Undefined behavior: memcpy source and destination overlap + call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %ptr, i8* %ptr, i64 1, i1 false) + unreachable +} diff --git a/llvm/test/Verifier/intrinsic-immarg.ll b/llvm/test/Verifier/intrinsic-immarg.ll --- a/llvm/test/Verifier/intrinsic-immarg.ll +++ b/llvm/test/Verifier/intrinsic-immarg.ll @@ -27,6 +27,23 @@ ret void } +declare void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) +define void @memcpy_inline_is_volatile(i8* %dest, i8* %src, i1 %is.volatile) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %is.volatile + ; CHECK-NEXT: call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile) + call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile) + ret void +} + +define void @memcpy_inline_variable_size(i8* %dest, i8* %src, i32 %size) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %size + ; CHECK-NEXT: call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %size, i1 true) + call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %size, i1 true) + ret void +} + declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) define void @memmove(i8* %dest, i8* %src, i1 %is.volatile) { ; CHECK: immarg operand has non-immediate parameter diff --git a/llvm/test/Verifier/memcpy-inline.ll b/llvm/test/Verifier/memcpy-inline.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Verifier/memcpy-inline.ll @@ -0,0 +1,9 @@ +; RUN: not opt -verify < %s 2>&1 | FileCheck %s + +; CHECK: alignment is not a power of two + +define void @foo(i8* %P, i8* %Q) { + call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* align 3 %P, i8* %Q, i32 4, i1 false) + ret void +} +declare void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind