diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -304,9 +304,6 @@
 def ObjCNonFragileRuntime : LangOpt<"ObjCNonFragileRuntime",
                                     "LangOpts.ObjCRuntime.allowsClassStubs()">;
 
-// Language option for CMSE extensions
-def Cmse : LangOpt<"Cmse">;
-
 // Defines targets for target-specific attributes. Empty lists are unchecked.
 class TargetSpec {
   // Specifies Architectures for which the target applies, based off the
@@ -3252,3 +3249,10 @@
   let Subjects = SubjectList<[NonParmVar, Function, Block, ObjCMethod]>;
   let Documentation = [ObjCExternallyRetainedDocs];
 }
+
+def NoBuiltin : InheritableAttr {
+  let Spellings = [Clang<"no_builtin">];
+  let Args = [VariadicStringArgument<"FunctionNames">];
+  let Subjects = SubjectList<[Function]>;
+  let Documentation = [NoBuiltinDocs];
+}
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -3757,7 +3757,7 @@
 def WebAssemblyImportModuleDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{
-Clang supports the ``__attribute__((import_module(<module_name>)))`` 
+Clang supports the ``__attribute__((import_module(<module_name>)))``
 attribute for the WebAssembly target. This attribute may be attached to a
 function declaration, where it modifies how the symbol is to be imported
 within the WebAssembly linking environment.
@@ -3774,7 +3774,7 @@
 def WebAssemblyImportNameDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{
-Clang supports the ``__attribute__((import_name(<name>)))`` 
+Clang supports the ``__attribute__((import_name(<name>)))``
 attribute for the WebAssembly target. This attribute may be attached to a
 function declaration, where it modifies how the symbol is to be imported
 within the WebAssembly linking environment.
@@ -3978,7 +3978,7 @@
 (`start_routine`) is called zero or more times by the `pthread_create` function,
 and that the fourth parameter (`arg`) is passed along. Note that the callback
 behavior of `pthread_create` is automatically recognized by Clang. In addition,
-the declarations of `__kmpc_fork_teams` and `__kmpc_fork_call`, generated for 
+the declarations of `__kmpc_fork_teams` and `__kmpc_fork_call`, generated for
 `#pragma omp target teams` and `#pragma omp parallel`, respectively, are also
 automatically recognized as broker functions. Further functions might be added
 in the future.
@@ -4157,3 +4157,9 @@
 ``__attribute__((malloc))``.
 }];
 }
+
+def NoBuiltinDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+  }];
+}
diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -504,6 +504,7 @@
 BUILTIN(__builtin_vsnprintf, "ic*zcC*a", "nFP:2:")
 BUILTIN(__builtin_thread_pointer, "v*", "nc")
 BUILTIN(__builtin_launder, "v*v*", "nt")
+BUILTIN(__builtin_memcpy_inline, "vv*vC*z", "n")
 LANGBUILTIN(__builtin_is_constant_evaluated, "b", "n", CXX_LANG)
 
 // GCC exception builtins
diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h
--- a/clang/lib/CodeGen/CGBuilder.h
+++ b/clang/lib/CodeGen/CGBuilder.h
@@ -284,6 +284,14 @@
                         Size, IsVolatile);
   }
 
+  using CGBuilderBaseTy::CreateMemCpyInline;
+  llvm::CallInst *CreateMemCpyInline(Address Dest, Address Src,
+                                     uint64_t Size) {
+    return CreateMemCpyInline(
+        Dest.getPointer(), Dest.getAlignment().getQuantity(), Src.getPointer(),
+        Src.getAlignment().getQuantity(), getInt64(Size));
+  }
+
   using CGBuilderBaseTy::CreateMemMove;
   llvm::CallInst *CreateMemMove(Address Dest, Address Src, llvm::Value *Size,
                                 bool IsVolatile = false) {
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2362,7 +2362,18 @@
     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
     return RValue::get(Dest.getPointer());
   }
-
+  case Builtin::BI__builtin_memcpy_inline: {
+    Address Dest = EmitPointerWithAlignment(E->getArg(0));
+    Address Src = EmitPointerWithAlignment(E->getArg(1));
+    uint64_t Size =
+        E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();
+    EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
+                        E->getArg(0)->getExprLoc(), FD, 0);
+    EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
+                        E->getArg(1)->getExprLoc(), FD, 1);
+    Builder.CreateMemCpyInline(Dest, Src, Size);
+    return RValue::get(nullptr);
+  }
   case Builtin::BI__builtin_char_memchr:
     BuiltinID = Builtin::BI__builtin_memchr;
     break;
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1846,6 +1846,15 @@
       FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate);
     if (TargetDecl->hasAttr<ConvergentAttr>())
       FuncAttrs.addAttribute(llvm::Attribute::Convergent);
+    if (const auto *Attr = TargetDecl->getAttr<NoBuiltinAttr>()) {
+      // TODO: check that function names are valid for the TargetLibraryInfo.
+      for(const auto& FunctionName : Attr->functionNames()){
+        SmallString<32> AttributeName;
+        AttributeName += "no-builtin-";
+        AttributeName += FunctionName;
+        FuncAttrs.addAttribute(AttributeName);
+      }
+    }
 
     if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
       AddAttributesFromFunctionProtoType(
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -1104,6 +1104,30 @@
       cast<NamedDecl>(D), AL.getAttributeSpellingListIndex()));
 }
 
+static void handleNoBuiltin(Sema &S, Decl *D, const ParsedAttr &AL) {
+  if (D->hasAttr<NoBuiltinAttr>()) {
+    S.Diag(D->getBeginLoc(), diag::err_attribute_only_once_per_parameter) << AL;
+    return;
+  }
+
+  if (!checkAttributeAtLeastNumArgs(S, AL, 1))
+    return;
+
+  std::vector<StringRef> FunctionNames;
+  for (unsigned I = 0, E = AL.getNumArgs(); I != E; ++I) {
+    StringRef FunctionName;
+    SourceLocation LiteralLoc;
+    if (!S.checkStringLiteralArgumentAttr(AL, I, FunctionName, &LiteralLoc))
+      return;
+    // Check valid function name.
+    FunctionNames.push_back(FunctionName);
+  }
+
+  D->addAttr(::new (S.Context) NoBuiltinAttr(
+      AL.getRange(), S.Context, FunctionNames.data(), FunctionNames.size(),
+      AL.getAttributeSpellingListIndex()));
+}
+
 static void handlePassObjectSizeAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   if (D->hasAttr<PassObjectSizeAttr>()) {
     S.Diag(D->getBeginLoc(), diag::err_attribute_only_once_per_parameter) << AL;
@@ -6746,6 +6770,9 @@
   case ParsedAttr::AT_DiagnoseIf:
     handleDiagnoseIfAttr(S, D, AL);
     break;
+  case ParsedAttr::AT_NoBuiltin:
+    handleNoBuiltin(S, D, AL);
+    break;
   case ParsedAttr::AT_ExtVectorType:
     handleExtVectorTypeAttr(S, D, AL);
     break;
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -11347,6 +11347,27 @@
 other operations necessary to locate the TLS area.  Not all targets support
 this intrinsic.
 
+'``llvm.memcpy.inline``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare i8* @llvm.memcpy.inline.p0i8.p0i8.i32(i8* <dest>, i8* <src>,
+                                                    i32 <len>)
+
+Overview:
+"""""""""
+
+The '``llvm.memcpy.inline``' intrinsic ...
+
+Semantics:
+""""""""""
+
+The '``llvm.memcpy.inline``' intrinsic ...
+
 Standard C Library Intrinsics
 -----------------------------
 
@@ -15154,7 +15175,7 @@
 <t_vector>` of floating point values. This argument must be larger in size
 than the result.
 
-The second and third arguments specify the rounding mode and exception 
+The second and third arguments specify the rounding mode and exception
 behavior as described above.
 
 Semantics:
@@ -15178,7 +15199,7 @@
 Overview:
 """""""""
 
-The '``llvm.experimental.constrained.fpext``' intrinsic extends a 
+The '``llvm.experimental.constrained.fpext``' intrinsic extends a
 floating-point ``value`` to a larger floating-point value.
 
 Arguments:
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -460,6 +460,9 @@
                          MDNode *ScopeTag = nullptr,
                          MDNode *NoAliasTag = nullptr);
 
+  CallInst *CreateMemCpyInline(Value *Dst, unsigned DstAlign, Value *Src,
+                               unsigned SrcAlign, Value *Size);
+
   /// Create and insert an element unordered-atomic memcpy between the
   /// specified pointers.
   ///
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -578,6 +578,7 @@
     static bool classof(const IntrinsicInst *I) {
       switch (I->getIntrinsicID()) {
       case Intrinsic::memcpy:
+      case Intrinsic::memcpy_inline:
       case Intrinsic::memmove:
       case Intrinsic::memset:
         return true;
@@ -606,8 +607,14 @@
   public:
     // Methods for support type inquiry through isa, cast, and dyn_cast:
     static bool classof(const IntrinsicInst *I) {
-      return I->getIntrinsicID() == Intrinsic::memcpy ||
-             I->getIntrinsicID() == Intrinsic::memmove;
+      switch (I->getIntrinsicID()) {
+      case Intrinsic::memcpy:
+      case Intrinsic::memcpy_inline:
+      case Intrinsic::memmove:
+        return true;
+      default:
+        return false;
+      }
     }
     static bool classof(const Value *V) {
       return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
@@ -626,6 +633,18 @@
     }
   };
 
+  /// This class wraps the llvm.memcpy.inline intrinsic.
+  class MemCpyInlineInst : public MemTransferInst {
+  public:
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static bool classof(const IntrinsicInst *I) {
+      return I->getIntrinsicID() == Intrinsic::memcpy_inline;
+    }
+    static bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+  };
+
   /// This class wraps the llvm.memmove intrinsic.
   class MemMoveInst : public MemTransferInst {
   public:
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -483,6 +483,13 @@
                                              llvm_i32_ty],
                                             []>;
 
+// Memcpy semantic that is guaranteed to be inlined.
+def int_memcpy_inline
+    : Intrinsic<[],
+      [ llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty ],
+      [ IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>, ReadOnly<1>,
+      ImmArg<2>, ImmArg<3> ]>;
+
 //===------------------- Standard C Library Intrinsics --------------------===//
 //
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5615,8 +5615,11 @@
   case Intrinsic::longjmp:
     lowerCallToExternalSymbol(I, &"_longjmp"[!TLI.usesUnderscoreLongJmp()]);
     return;
+  case Intrinsic::memcpy_inline:
   case Intrinsic::memcpy: {
-    const auto &MCI = cast<MemCpyInst>(I);
+    const auto &MCI = cast<MemTransferInst>(I);
+    assert((isa<MemCpyInlineInst>(I) || isa<MemCpyInst>(I)) &&
+           "must be a memcpy");
     SDValue Op1 = getValue(I.getArgOperand(0));
     SDValue Op2 = getValue(I.getArgOperand(1));
     SDValue Op3 = getValue(I.getArgOperand(2));
@@ -5628,8 +5631,10 @@
     bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
     // FIXME: Support passing different dest/src alignments to the memcpy DAG
     // node.
+    bool isAlwaysInline =
+        isa<MemCpyInlineInst>(I) || I.hasFnAttr("no-builtin-memcpy");
     SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
-                               false, isTC,
+                               isAlwaysInline, isTC,
                                MachinePointerInfo(I.getArgOperand(0)),
                                MachinePointerInfo(I.getArgOperand(1)));
     updateDAGForMaybeTailCall(MC);
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -96,6 +96,14 @@
   return II;
 }
 
+static void ForwardAttribute(const Function *F, StringRef Attribute,
+                             CallInst *CI) {
+  if (F->hasFnAttribute(Attribute)) {
+    CI->addAttribute(AttributeList::FunctionIndex,
+                     F->getFnAttribute(Attribute));
+  }
+}
+
 CallInst *IRBuilderBase::
 CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
              bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag,
@@ -103,7 +111,8 @@
   Ptr = getCastedInt8PtrValue(Ptr);
   Value *Ops[] = {Ptr, Val, Size, getInt1(isVolatile)};
   Type *Tys[] = { Ptr->getType(), Size->getType() };
-  Module *M = BB->getParent()->getParent();
+  Function *F = BB->getParent();
+  Module *M = F->getParent();
   Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys);
 
   CallInst *CI = createCallHelper(TheFn, Ops, this);
@@ -121,6 +130,8 @@
   if (NoAliasTag)
     CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
 
+  ForwardAttribute(F, "no-builtin-memset", CI);
+
   return CI;
 }
 
@@ -165,7 +176,8 @@
 
   Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)};
   Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
-  Module *M = BB->getParent()->getParent();
+  Function *F = BB->getParent();
+  Module *M = F->getParent();
   Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
 
   CallInst *CI = createCallHelper(TheFn, Ops, this);
@@ -190,6 +202,36 @@
   if (NoAliasTag)
     CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
 
+  ForwardAttribute(F, "no-builtin-memcpy", CI);
+
+  return CI;
+}
+
+CallInst *IRBuilderBase::CreateMemCpyInline(Value *Dst, unsigned DstAlign,
+                                            Value *Src, unsigned SrcAlign,
+                                            Value *Size) {
+  assert((DstAlign == 0 || isPowerOf2_32(DstAlign)) &&
+         "Must be 0 or a power of 2");
+  assert((SrcAlign == 0 || isPowerOf2_32(SrcAlign)) &&
+         "Must be 0 or a power of 2");
+  Dst = getCastedInt8PtrValue(Dst);
+  Src = getCastedInt8PtrValue(Src);
+  Value *IsVolatile = getInt1(false);
+
+  Value *Ops[] = {Dst, Src, Size, IsVolatile};
+  Type *Tys[] = {Dst->getType(), Src->getType(), Size->getType()};
+  Function *F = BB->getParent();
+  Module *M = F->getParent();
+  Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy_inline, Tys);
+
+  CallInst *CI = createCallHelper(TheFn, Ops, this);
+
+  auto *MCI = cast<MemCpyInlineInst>(CI);
+  if (DstAlign > 0)
+    MCI->setDestAlignment(DstAlign);
+  if (SrcAlign > 0)
+    MCI->setSourceAlignment(SrcAlign);
+
   return CI;
 }
 
@@ -245,7 +287,8 @@
 
   Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)};
   Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
-  Module *M = BB->getParent()->getParent();
+  Function *F = BB->getParent();
+  Module *M = F->getParent();
   Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys);
 
   CallInst *CI = createCallHelper(TheFn, Ops, this);
@@ -266,6 +309,8 @@
   if (NoAliasTag)
     CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag);
 
+  ForwardAttribute(F, "no-builtin-memmove", CI);
+
   return CI;
 }
 
diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
--- a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -314,5 +314,9 @@
                                   Size.getValueType(), Align, isVolatile,
                                   AlwaysInline, DstPtrInfo, SrcPtrInfo);
 
+  /// Handle runtime sizes through repmovsb when we AlwaysInline.
+  if (AlwaysInline)
+    return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src, Size, MVT::i8);
+
   return SDValue();
 }
diff --git a/llvm/test/CodeGen/X86/memcpy-inline.ll b/llvm/test/CodeGen/X86/memcpy-inline.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/X86/memcpy-inline.ll
@@ -0,0 +1,14 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 | FileCheck %s
+
+declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+
+define void @test1(i8* %a, i8* %b) nounwind {
+; CHECK-LABEL: test1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq (%rsi), %rax
+; CHECK-NEXT:    movq %rax, (%rdi)
+; CHECK-NEXT:    retq
+	tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %a, i8* %b, i64 8, i1 0 )
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/memcpy.ll b/llvm/test/CodeGen/X86/memcpy.ll
--- a/llvm/test/CodeGen/X86/memcpy.ll
+++ b/llvm/test/CodeGen/X86/memcpy.ll
@@ -7,7 +7,7 @@
 
 
 ; Variable memcpy's should lower to calls.
-define i8* @test1(i8* %a, i8* %b, i64 %n) nounwind {
+define void @test1(i8* %a, i8* %b, i64 %n) nounwind {
 ; LINUX-LABEL: test1:
 ; LINUX:       # %bb.0: # %entry
 ; LINUX-NEXT:    jmp memcpy # TAILCALL
@@ -17,11 +17,11 @@
 ; DARWIN-NEXT:    jmp _memcpy ## TAILCALL
 entry:
 	tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 %n, i1 0 )
-	ret i8* %a
+  ret void
 }
 
 ; Variable memcpy's should lower to calls.
-define i8* @test2(i64* %a, i64* %b, i64 %n) nounwind {
+define void @test2(i64* %a, i64* %b, i64 %n) nounwind {
 ; LINUX-LABEL: test2:
 ; LINUX:       # %bb.0: # %entry
 ; LINUX-NEXT:    jmp memcpy # TAILCALL
@@ -33,7 +33,25 @@
 	%tmp14 = bitcast i64* %a to i8*
 	%tmp25 = bitcast i64* %b to i8*
 	tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %tmp14, i8* align 8 %tmp25, i64 %n, i1 0 )
-	ret i8* %tmp14
+  ret void
+}
+
+; Variable length memcpy's with disabled runtime should lower to repmovsb.
+define void @memcpy_no_runtime(i8* %a, i8* %b, i64 %n) nounwind {
+; LINUX-LABEL: memcpy_no_runtime:
+; LINUX:       # %bb.0: # %entry
+; LINUX-NEXT:    movq %rdx, %rcx
+; LINUX-NEXT:    rep;movsb (%rsi), %es:(%rdi)
+; LINUX-NEXT:    retq
+;
+; DARWIN-LABEL: memcpy_no_runtime:
+; DARWIN:       ## %bb.0: ## %entry
+; DARWIN-NEXT:    movq %rdx, %rcx
+; DARWIN-NEXT:    rep;movsb (%rsi), %es:(%rdi)
+; DARWIN-NEXT:    retq
+entry:
+	tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 %n, i1 0 ) "no-builtin-memcpy"
+  ret void
 }
 
 ; Large constant memcpy's should lower to a call when optimizing for size.