diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -582,6 +582,7 @@ case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: + case Intrinsic::memcpy_inline: return true; default: return false; } @@ -608,8 +609,14 @@ public: // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const IntrinsicInst *I) { - return I->getIntrinsicID() == Intrinsic::memcpy || - I->getIntrinsicID() == Intrinsic::memmove; + switch (I->getIntrinsicID()) { + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memcpy_inline: + return true; + default: + return false; + } } static bool classof(const Value *V) { return isa(V) && classof(cast(V)); @@ -640,6 +647,18 @@ } }; + /// This class wraps the llvm.memcpy.inline intrinsic. + class MemCpyInlineInst : public MemTransferInst { + public: + // Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::memcpy_inline; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + }; + // The common base class for any memset/memmove/memcpy intrinsics; // whether they be atomic or non-atomic. // i.e. llvm.element.unordered.atomic.memset/memcpy/memmove diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -503,6 +503,13 @@ llvm_i32_ty], []>; +// Memcpy semantic that is guaranteed to be inlined. +def int_memcpy_inline + : Intrinsic<[], + [ llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty ], + [ IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>, ReadOnly<1>, + ImmArg<2>, ImmArg<3> ]>; + //===------------------- Standard C Library Intrinsics --------------------===// // diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5787,12 +5787,33 @@ // FIXME: Support passing different dest/src alignments to the memcpy DAG // node. SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, - false, isTC, + /* AlwaysInline */ false, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); updateDAGForMaybeTailCall(MC); return; } + case Intrinsic::memcpy_inline: { + const auto &MCI = cast(I); + SDValue Dst = getValue(I.getArgOperand(0)); + SDValue Src = getValue(I.getArgOperand(1)); + SDValue Size = getValue(I.getArgOperand(2)); + assert(isa(Size) && "memcpy_inline needs constant size"); + // @llvm.memcpy.inline defines 0 and 1 to both mean no alignment. + Align DstAlign = MCI.getDestAlign().valueOrOne(); + Align SrcAlign = MCI.getSourceAlign().valueOrOne(); + Align Alignment = commonAlignment(DstAlign, SrcAlign); + bool isVol = MCI.isVolatile(); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + // FIXME: Support passing different dest/src alignments to the memcpy DAG + // node. + SDValue MC = DAG.getMemcpy( + getRoot(), sdl, Dst, Src, Size, Alignment.value(), isVol, + /* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1))); + updateDAGForMaybeTailCall(MC); + return; + } case Intrinsic::memset: { const auto &MSI = cast(I); SDValue Op1 = getValue(I.getArgOperand(0)); diff --git a/llvm/test/CodeGen/X86/memcpy-inline.ll b/llvm/test/CodeGen/X86/memcpy-inline.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/memcpy-inline.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 | FileCheck %s + +declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind + +define void @test1(i8* %a, i8* %b) nounwind { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rsi), %rax +; CHECK-NEXT: movq %rax, (%rdi) +; CHECK-NEXT: retq + tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %a, i8* %b, i64 8, i1 0 ) + ret void +} \ No newline at end of file