diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -532,16 +532,23 @@
   /// combine functions. Returns true if changed.
   bool tryCombine(MachineInstr &MI);
 
+  /// Emit loads and stores that perform the given memcpy.
+  /// Assumes \p MI is a G_MEMCPY_INLINE
+  /// TODO: implement dynamically sized inline memcpy, and rename: s/bool tryEmit/void emit/
+  bool tryEmitMemcpyInline(MachineInstr &MI);
 private:
   // Memcpy family optimization helpers.
+  bool tryEmitMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
+                           uint64_t KnownLen, Align DstAlign, Align SrcAlign,
+                           bool IsVolatile);
   bool optimizeMemcpy(MachineInstr &MI, Register Dst, Register Src,
-                      unsigned KnownLen, Align DstAlign, Align SrcAlign,
+                      uint64_t KnownLen, uint64_t Limit, Align DstAlign, Align SrcAlign,
                       bool IsVolatile);
   bool optimizeMemmove(MachineInstr &MI, Register Dst, Register Src,
-                       unsigned KnownLen, Align DstAlign, Align SrcAlign,
+                       uint64_t KnownLen, Align DstAlign, Align SrcAlign,
                        bool IsVolatile);
   bool optimizeMemset(MachineInstr &MI, Register Dst, Register Val,
-                      unsigned KnownLen, Align DstAlign, bool IsVolatile);
+                      uint64_t KnownLen, Align DstAlign, bool IsVolatile);
 
   /// Given a non-indexed load or store instruction \p MI, find an offset that
   /// can be usefully and legally folded into it as a post-indexing operation.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -1844,6 +1844,15 @@
                                 DstMMO, SrcMMO);
   }
 
+  MachineInstrBuilder buildMemCpyInline(const SrcOp &DstPtr, const SrcOp &SrcPtr,
+                                  const SrcOp &Size, MachineMemOperand &DstMMO,
+                                  MachineMemOperand &SrcMMO) {
+    auto MIB = buildInstr(TargetOpcode::G_MEMCPY_INLINE, {}, {DstPtr, SrcPtr, Size});
+    MIB.addMemOperand(&DstMMO);
+    MIB.addMemOperand(&SrcMMO);
+    return MIB;
+  }
+
   /// Build and insert \p Dst = G_SBFX \p Src, \p LSB, \p Width.
   MachineInstrBuilder buildSbfx(const DstOp &Dst, const SrcOp &Src,
                                 const SrcOp &LSB, const SrcOp &Width) {
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -739,6 +739,9 @@
 /// llvm.memcpy intrinsic
 HANDLE_TARGET_OPCODE(G_MEMCPY)
 
+/// llvm.memcpy.inline intrinsic
+HANDLE_TARGET_OPCODE(G_MEMCPY_INLINE)
+
 /// llvm.memmove intrinsic
 HANDLE_TARGET_OPCODE(G_MEMMOVE)
 
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -1353,6 +1353,14 @@
   let mayStore = true;
 }
 
+def G_MEMCPY_INLINE : GenericInstruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins ptype0:$dst_addr, ptype1:$src_addr, type2:$size);
+  let hasSideEffects = false;
+  let mayLoad = true;
+  let mayStore = true;
+}
+
 def G_MEMMOVE : GenericInstruction {
   let OutOperandList = (outs);
   let InOperandList = (ins ptype0:$dst_addr, ptype1:$src_addr, type2:$size, untyped_imm_0:$tailcall);
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1218,7 +1218,7 @@
 }
 
 bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
-                                    Register Val, unsigned KnownLen,
+                                    Register Val, uint64_t KnownLen,
                                     Align Alignment, bool IsVolatile) {
   auto &MF = *MI.getParent()->getParent();
   const auto &TLI = *MF.getSubtarget().getTargetLowering();
@@ -1330,8 +1330,46 @@
   return true;
 }
 
+bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) {
+  assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
+
+  Register Dst = MI.getOperand(0).getReg();
+  Register Src = MI.getOperand(1).getReg();
+  Register Len = MI.getOperand(2).getReg();
+
+  auto MMOIt = MI.memoperands_begin();
+  const MachineMemOperand *MemOp = *MMOIt;
+  bool IsVolatile = MemOp->isVolatile();
+
+  // See if this is a constant length copy
+  auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
+  if (!LenVRegAndVal)
+    return false; // FIXME: support dynamically sized G_MEMCPY_INLINE
+
+  uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
+  if (KnownLen == 0) {
+    MI.eraseFromParent();
+    return true;
+  }
+
+  const auto &DstMMO = **MI.memoperands_begin();
+  const auto &SrcMMO = **std::next(MI.memoperands_begin());
+  Align DstAlign = DstMMO.getBaseAlign();
+  Align SrcAlign = SrcMMO.getBaseAlign();
+
+  return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
+}
+
+bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI, Register Dst,
+                                         Register Src, uint64_t KnownLen,
+                                         Align DstAlign, Align SrcAlign,
+                                         bool IsVolatile) {
+  assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
+  return optimizeMemcpy(MI, Dst, Src, KnownLen, std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign, IsVolatile);
+}
+
 bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
-                                    Register Src, unsigned KnownLen,
+                                    Register Src, uint64_t KnownLen, uint64_t Limit,
                                     Align DstAlign, Align SrcAlign,
                                     bool IsVolatile) {
   auto &MF = *MI.getParent()->getParent();
@@ -1343,7 +1381,6 @@
 
   bool DstAlignCanChange = false;
   MachineFrameInfo &MFI = MF.getFrameInfo();
-  bool OptSize = shouldLowerMemFuncForSize(MF);
   Align Alignment = commonAlignment(DstAlign, SrcAlign);
 
   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
@@ -1354,7 +1391,6 @@
   // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
   // if the memcpy is in a tail call position.
 
-  unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize);
   std::vector<LLT> MemOps;
 
   const auto &DstMMO = **MI.memoperands_begin();
@@ -1437,7 +1473,7 @@
 }
 
 bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
-                                     Register Src, unsigned KnownLen,
+                                     Register Src, uint64_t KnownLen,
                                      Align DstAlign, Align SrcAlign,
                                      bool IsVolatile) {
   auto &MF = *MI.getParent()->getParent();
@@ -1550,10 +1586,6 @@
 
   auto MMOIt = MI.memoperands_begin();
   const MachineMemOperand *MemOp = *MMOIt;
-  bool IsVolatile = MemOp->isVolatile();
-  // Don't try to optimize volatile.
-  if (IsVolatile)
-    return false;
 
   Align DstAlign = MemOp->getBaseAlign();
   Align SrcAlign;
@@ -1571,18 +1603,31 @@
   auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
   if (!LenVRegAndVal)
     return false; // Leave it to the legalizer to lower it to a libcall.
-  unsigned KnownLen = LenVRegAndVal->Value.getZExtValue();
+  uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
 
   if (KnownLen == 0) {
     MI.eraseFromParent();
     return true;
   }
 
+  bool IsVolatile = MemOp->isVolatile();
+  if (Opc == TargetOpcode::G_MEMCPY_INLINE)
+    return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
+
+  // Don't try to optimize volatile.
+  if (IsVolatile)
+    return false;
+
   if (MaxLen && KnownLen > MaxLen)
     return false;
 
-  if (Opc == TargetOpcode::G_MEMCPY)
-    return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
+  if (Opc == TargetOpcode::G_MEMCPY) {
+    auto &MF = *MI.getParent()->getParent();
+    const auto &TLI = *MF.getSubtarget().getTargetLowering();
+    bool OptSize = shouldLowerMemFuncForSize(MF);
+    uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
+    return optimizeMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign, IsVolatile);
+  }
   if (Opc == TargetOpcode::G_MEMMOVE)
     return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
   if (Opc == TargetOpcode::G_MEMSET)
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1589,6 +1589,9 @@
   if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) {
     DstAlign = MCI->getDestAlign().valueOrOne();
     SrcAlign = MCI->getSourceAlign().valueOrOne();
+  } else if (auto *MCI = dyn_cast<MemCpyInlineInst>(&CI)) {
+    DstAlign = MCI->getDestAlign().valueOrOne();
+    SrcAlign = MCI->getSourceAlign().valueOrOne();
   } else if (auto *MMI = dyn_cast<MemMoveInst>(&CI)) {
     DstAlign = MMI->getDestAlign().valueOrOne();
     SrcAlign = MMI->getSourceAlign().valueOrOne();
@@ -1597,10 +1600,12 @@
     DstAlign = MSI->getDestAlign().valueOrOne();
   }
 
-  // We need to propagate the tail call flag from the IR inst as an argument.
-  // Otherwise, we have to pessimize and assume later that we cannot tail call
-  // any memory intrinsics.
-  ICall.addImm(CI.isTailCall() ? 1 : 0);
+  if (Opcode != TargetOpcode::G_MEMCPY_INLINE) {
+    // We need to propagate the tail call flag from the IR inst as an argument.
+    // Otherwise, we have to pessimize and assume later that we cannot tail call
+    // any memory intrinsics.
+    ICall.addImm(CI.isTailCall() ? 1 : 0);
+  }
 
   // Create mem operands to store the alignment and volatile info.
   auto VolFlag = IsVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
@@ -2031,6 +2036,8 @@
                             getOrCreateVReg(*CI.getArgOperand(0)),
                             MachineInstr::copyFlagsFromInstruction(CI));
     return true;
+  case Intrinsic::memcpy_inline:
+    return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY_INLINE);
   case Intrinsic::memcpy:
     return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY);
   case Intrinsic::memmove:
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1477,6 +1477,7 @@
     }
     break;
   }
+  case TargetOpcode::G_MEMCPY_INLINE:
   case TargetOpcode::G_MEMCPY:
   case TargetOpcode::G_MEMMOVE: {
     ArrayRef<MachineMemOperand *> MMOs = MI->memoperands();
@@ -1507,6 +1508,10 @@
     if (SrcPtrTy.getAddressSpace() != MMOs[1]->getAddrSpace())
       report("inconsistent load address space", MI);
 
+    if (Opc != TargetOpcode::G_MEMCPY_INLINE)
+      if (!MI->getOperand(3).isImm() || (MI->getOperand(3).getImm() & ~1LL))
+        report("'tail' flag (operand 3) must be an immediate 0 or 1", MI);
+
     break;
   }
   case TargetOpcode::G_BZERO:
@@ -1532,6 +1537,11 @@
     if (DstPtrTy.getAddressSpace() != MMOs[0]->getAddrSpace())
       report("inconsistent " + Twine(Name, " address space"), MI);
 
+    if (!MI->getOperand(MI->getNumOperands() - 1).isImm() ||
+        (MI->getOperand(MI->getNumOperands() - 1).getImm() & ~1LL))
+      report("'tail' flag (last operand) must be an immediate 0 or 1",
+             MI);
+
     break;
   }
   case TargetOpcode::G_VECREDUCE_SEQ_FADD:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
--- a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
@@ -85,6 +85,8 @@
     return Helper.tryCombineConcatVectors(MI);
   case TargetOpcode::G_SHUFFLE_VECTOR:
     return Helper.tryCombineShuffleVector(MI);
+  case TargetOpcode::G_MEMCPY_INLINE:
+    return Helper.tryEmitMemcpyInline(MI);
   case TargetOpcode::G_MEMCPY:
   case TargetOpcode::G_MEMMOVE:
   case TargetOpcode::G_MEMSET: {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
--- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
@@ -272,11 +272,13 @@
     return Helper.tryCombineConcatVectors(MI);
   case TargetOpcode::G_SHUFFLE_VECTOR:
     return Helper.tryCombineShuffleVector(MI);
+  case TargetOpcode::G_MEMCPY_INLINE:
+    return Helper.tryEmitMemcpyInline(MI);
   case TargetOpcode::G_MEMCPY:
   case TargetOpcode::G_MEMMOVE:
   case TargetOpcode::G_MEMSET: {
     // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other
-    // heuristics decide.
+    // heuristics decide (unless it is an llvm.memcpy.inline, of course).
     unsigned MaxLen = EnableOpt ? 0 : 32;
     // Try to inline memcpy type calls if optimizations are enabled.
     if (Helper.tryCombineMemCpyFamily(MI, MaxLen))
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
--- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
@@ -205,6 +205,8 @@
     return true;
 
   switch (MI.getOpcode()) {
+  case TargetOpcode::G_MEMCPY_INLINE:
+    return Helper.tryEmitMemcpyInline(MI);
   case TargetOpcode::G_CONCAT_VECTORS:
     return Helper.tryCombineConcatVectors(MI);
   case TargetOpcode::G_SHUFFLE_VECTOR:
diff --git a/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp b/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
--- a/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp
@@ -42,6 +42,8 @@
   switch (MI.getOpcode()) {
   default:
     return false;
+  case TargetOpcode::G_MEMCPY_INLINE:
+    return Helper.tryEmitMemcpyInline(MI);
   case TargetOpcode::G_LOAD:
   case TargetOpcode::G_SEXTLOAD:
   case TargetOpcode::G_ZEXTLOAD: {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy-opt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy-opt.mir
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy-opt.mir
@@ -0,0 +1,269 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+--- |
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+  target triple = "arm64-apple-darwin"
+
+  define void @test_memcpy1(i32* nocapture %dst, i32* nocapture readonly %src, i64 %len) local_unnamed_addr #0 {
+  entry:
+    %0 = bitcast i32* %dst to i8*
+    %1 = bitcast i32* %src to i8*
+    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 %len, i1 false)
+    ret void
+  }
+
+  declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) #1
+
+  define void @test_memcpy2_const(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #0 {
+  entry:
+    %0 = bitcast i32* %dst to i8*
+    %1 = bitcast i32* %src to i8*
+    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 72, i1 false)
+    ret void
+  }
+
+  define void @test_memcpy2_const_optsize(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #2 {
+  entry:
+    %0 = bitcast i32* %dst to i8*
+    %1 = bitcast i32* %src to i8*
+    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 72, i1 false)
+    ret void
+  }
+
+  define void @test_memcpy2_const_minsize(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #3 {
+  entry:
+    %0 = bitcast i32* %dst to i8*
+    %1 = bitcast i32* %src to i8*
+    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 72, i1 false)
+    ret void
+  }
+
+  define void @test_memcpy3_const_arrays_unaligned(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #0 {
+  entry:
+    %0 = bitcast i32* %dst to i8*
+    %1 = bitcast i32* %src to i8*
+    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 143, i1 false)
+    ret void
+  }
+
+  attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cyclone" "target-features"="+aes,+crypto,+fp-armv8,+neon,+sha2,+zcm,+zcz" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #1 = { argmemonly nounwind }
+  attributes #2 = { optsize }
+  attributes #3 = { minsize }
+
+...
+---
+name:            test_memcpy1
+alignment:       4
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+machineFunctionInfo: {}
+body:             |
+  bb.1.entry:
+    liveins: $x0, $x1, $x2
+
+    ; CHECK-LABEL: name: test_memcpy1
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+    ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    ; CHECK: RET_ReallyLR
+    %0:_(p0) = COPY $x0
+    %1:_(p0) = COPY $x1
+    %2:_(s64) = COPY $x2
+    G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    RET_ReallyLR
+
+...
+---
+name:            test_memcpy2_const
+alignment:       4
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+machineFunctionInfo: {}
+body:             |
+  bb.1.entry:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: test_memcpy2_const
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4)
+    ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4)
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
+    ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load 16 from %ir.1 + 16, align 4)
+    ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store 16 into %ir.0 + 16, align 4)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+    ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p0) :: (load 16 from %ir.1 + 32, align 4)
+    ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CHECK: G_STORE [[LOAD2]](s128), [[GEP3]](p0) :: (store 16 into %ir.0 + 32, align 4)
+    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
+    ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
+    ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[GEP4]](p0) :: (load 16 from %ir.1 + 48, align 4)
+    ; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CHECK: G_STORE [[LOAD3]](s128), [[GEP5]](p0) :: (store 16 into %ir.0 + 48, align 4)
+    ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+    ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64)
+    ; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[GEP6]](p0) :: (load 8 from %ir.1 + 64, align 4)
+    ; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CHECK: G_STORE [[LOAD4]](s64), [[GEP7]](p0) :: (store 8 into %ir.0 + 64, align 4)
+    ; CHECK: RET_ReallyLR
+    %0:_(p0) = COPY $x0
+    %1:_(p0) = COPY $x1
+    %2:_(s64) = G_CONSTANT i64 72
+    G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    RET_ReallyLR
+
+...
+---
+name:            test_memcpy2_const_optsize
+alignment:       4
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+machineFunctionInfo: {}
+body:             |
+  bb.1.entry:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: test_memcpy2_const_optsize
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4)
+    ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4)
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
+    ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load 16 from %ir.1 + 16, align 4)
+    ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store 16 into %ir.0 + 16, align 4)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+    ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p0) :: (load 16 from %ir.1 + 32, align 4)
+    ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CHECK: G_STORE [[LOAD2]](s128), [[GEP3]](p0) :: (store 16 into %ir.0 + 32, align 4)
+    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
+    ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
+    ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[GEP4]](p0) :: (load 16 from %ir.1 + 48, align 4)
+    ; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CHECK: G_STORE [[LOAD3]](s128), [[GEP5]](p0) :: (store 16 into %ir.0 + 48, align 4)
+    ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+    ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64)
+    ; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[GEP6]](p0) :: (load 8 from %ir.1 + 64, align 4)
+    ; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CHECK: G_STORE [[LOAD4]](s64), [[GEP7]](p0) :: (store 8 into %ir.0 + 64, align 4)
+    ; CHECK: RET_ReallyLR
+    %0:_(p0) = COPY $x0
+    %1:_(p0) = COPY $x1
+    %2:_(s64) = G_CONSTANT i64 72
+    G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    RET_ReallyLR
+
+...
+---
+name:            test_memcpy2_const_minsize
+alignment:       4
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+machineFunctionInfo: {}
+body:             |
+  bb.1.entry:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: test_memcpy2_const_minsize
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 72
+    ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[C]](s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    ; CHECK: RET_ReallyLR
+    %0:_(p0) = COPY $x0
+    %1:_(p0) = COPY $x1
+    %2:_(s64) = G_CONSTANT i64 72
+    G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    RET_ReallyLR
+
+...
+---
+name:            test_memcpy3_const_arrays_unaligned
+alignment:       4
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+machineFunctionInfo: {}
+body:             |
+  bb.1.entry:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: test_memcpy3_const_arrays_unaligned
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4)
+    ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4)
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
+    ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load 16 from %ir.1 + 16, align 4)
+    ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store 16 into %ir.0 + 16, align 4)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+    ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p0) :: (load 16 from %ir.1 + 32, align 4)
+    ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CHECK: G_STORE [[LOAD2]](s128), [[GEP3]](p0) :: (store 16 into %ir.0 + 32, align 4)
+    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
+    ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
+    ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[GEP4]](p0) :: (load 16 from %ir.1 + 48, align 4)
+    ; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CHECK: G_STORE [[LOAD3]](s128), [[GEP5]](p0) :: (store 16 into %ir.0 + 48, align 4)
+    ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+    ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64)
+    ; CHECK: [[LOAD4:%[0-9]+]]:_(s128) = G_LOAD [[GEP6]](p0) :: (load 16 from %ir.1 + 64, align 4)
+    ; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CHECK: G_STORE [[LOAD4]](s128), [[GEP7]](p0) :: (store 16 into %ir.0 + 64, align 4)
+    ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 80
+    ; CHECK: [[GEP8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s64)
+    ; CHECK: [[LOAD5:%[0-9]+]]:_(s128) = G_LOAD [[GEP8]](p0) :: (load 16 from %ir.1 + 80, align 4)
+    ; CHECK: [[GEP9:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CHECK: G_STORE [[LOAD5]](s128), [[GEP9]](p0) :: (store 16 into %ir.0 + 80, align 4)
+    ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 96
+    ; CHECK: [[GEP10:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C5]](s64)
+    ; CHECK: [[LOAD6:%[0-9]+]]:_(s128) = G_LOAD [[GEP10]](p0) :: (load 16 from %ir.1 + 96, align 4)
+    ; CHECK: [[GEP11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CHECK: G_STORE [[LOAD6]](s128), [[GEP11]](p0) :: (store 16 into %ir.0 + 96, align 4)
+    ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 112
+    ; CHECK: [[GEP12:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C6]](s64)
+    ; CHECK: [[LOAD7:%[0-9]+]]:_(s128) = G_LOAD [[GEP12]](p0) :: (load 16 from %ir.1 + 112, align 4)
+    ; CHECK: [[GEP13:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CHECK: G_STORE [[LOAD7]](s128), [[GEP13]](p0) :: (store 16 into %ir.0 + 112, align 4)
+    ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 127
+    ; CHECK: [[GEP14:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C7]](s64)
+    ; CHECK: [[LOAD8:%[0-9]+]]:_(s128) = G_LOAD [[GEP14]](p0) :: (load 16 from %ir.1 + 127, align 1, basealign 4)
+    ; CHECK: [[GEP15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; CHECK: G_STORE [[LOAD8]](s128), [[GEP15]](p0) :: (store 16 into %ir.0 + 127, align 1, basealign 4)
+    ; CHECK: RET_ReallyLR
+    %0:_(p0) = COPY $x0
+    %1:_(p0) = COPY $x1
+    %2:_(s64) = G_CONSTANT i64 143
+    G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    RET_ReallyLR
+
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir
--- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir
@@ -4,21 +4,21 @@
   target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
   target triple = "arm64-apple-darwin"
 
-  define void @test_memcpy1(i32* nocapture %dst, i32* nocapture readonly %src, i64 %len) local_unnamed_addr #0 {
+  define void @test_memcpy1(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #0 {
   entry:
     %0 = bitcast i32* %dst to i8*
     %1 = bitcast i32* %src to i8*
-    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 %len, i1 false)
+    tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 13, i1 false)
     ret void
   }
 
-  declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) #1
+  declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) #1
 
   define void @test_memcpy2_const(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #0 {
   entry:
     %0 = bitcast i32* %dst to i8*
     %1 = bitcast i32* %src to i8*
-    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 72, i1 false)
+    tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 72, i1 false)
     ret void
   }
 
@@ -26,7 +26,7 @@
   entry:
     %0 = bitcast i32* %dst to i8*
     %1 = bitcast i32* %src to i8*
-    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 72, i1 false)
+    tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 72, i1 false)
     ret void
   }
 
@@ -34,7 +34,7 @@
   entry:
     %0 = bitcast i32* %dst to i8*
     %1 = bitcast i32* %src to i8*
-    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 72, i1 false)
+    tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 72, i1 false)
     ret void
   }
 
@@ -42,7 +42,7 @@
   entry:
     %0 = bitcast i32* %dst to i8*
     %1 = bitcast i32* %src to i8*
-    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 143, i1 false)
+    tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 143, i1 false)
     ret void
   }
 
@@ -65,17 +65,19 @@
   bb.1.entry:
     liveins: $x0, $x1, $x2
 
+    ; FIXME: This test should be adjusted when we get support for dynamically-sized G_MEMCPY_INLINE
+    ;
     ; CHECK-LABEL: name: test_memcpy1
     ; CHECK: liveins: $x0, $x1, $x2
     ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
     ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
     ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
-    ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    ; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
     ; CHECK: RET_ReallyLR
     %0:_(p0) = COPY $x0
     %1:_(p0) = COPY $x1
     %2:_(s64) = COPY $x2
-    G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    G_MEMCPY_INLINE %0(p0), %1(p0), %2(s64) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
     RET_ReallyLR
 
 ...
@@ -99,30 +101,30 @@
     ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4)
     ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4)
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
-    ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load 16 from %ir.1 + 16, align 4)
-    ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store 16 into %ir.0 + 16, align 4)
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
+    ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from %ir.1 + 16, align 4)
+    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p0) :: (store 16 into %ir.0 + 16, align 4)
     ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
-    ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p0) :: (load 16 from %ir.1 + 32, align 4)
-    ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CHECK: G_STORE [[LOAD2]](s128), [[GEP3]](p0) :: (store 16 into %ir.0 + 32, align 4)
+    ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+    ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p0) :: (load 16 from %ir.1 + 32, align 4)
+    ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store 16 into %ir.0 + 32, align 4)
     ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
-    ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
-    ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[GEP4]](p0) :: (load 16 from %ir.1 + 48, align 4)
-    ; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CHECK: G_STORE [[LOAD3]](s128), [[GEP5]](p0) :: (store 16 into %ir.0 + 48, align 4)
+    ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
+    ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p0) :: (load 16 from %ir.1 + 48, align 4)
+    ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CHECK: G_STORE [[LOAD3]](s128), [[PTR_ADD5]](p0) :: (store 16 into %ir.0 + 48, align 4)
     ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
-    ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64)
-    ; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[GEP6]](p0) :: (load 8 from %ir.1 + 64, align 4)
-    ; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CHECK: G_STORE [[LOAD4]](s64), [[GEP7]](p0) :: (store 8 into %ir.0 + 64, align 4)
+    ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64)
+    ; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD6]](p0) :: (load 8 from %ir.1 + 64, align 4)
+    ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CHECK: G_STORE [[LOAD4]](s64), [[PTR_ADD7]](p0) :: (store 8 into %ir.0 + 64, align 4)
     ; CHECK: RET_ReallyLR
     %0:_(p0) = COPY $x0
     %1:_(p0) = COPY $x1
     %2:_(s64) = G_CONSTANT i64 72
-    G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    G_MEMCPY_INLINE %0(p0), %1(p0), %2(s64) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
     RET_ReallyLR
 
 ...
@@ -146,30 +148,30 @@
     ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4)
     ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4)
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
-    ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load 16 from %ir.1 + 16, align 4)
-    ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store 16 into %ir.0 + 16, align 4)
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
+    ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from %ir.1 + 16, align 4)
+    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p0) :: (store 16 into %ir.0 + 16, align 4)
     ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
-    ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p0) :: (load 16 from %ir.1 + 32, align 4)
-    ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CHECK: G_STORE [[LOAD2]](s128), [[GEP3]](p0) :: (store 16 into %ir.0 + 32, align 4)
+    ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+    ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p0) :: (load 16 from %ir.1 + 32, align 4)
+    ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store 16 into %ir.0 + 32, align 4)
     ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
-    ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
-    ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[GEP4]](p0) :: (load 16 from %ir.1 + 48, align 4)
-    ; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CHECK: G_STORE [[LOAD3]](s128), [[GEP5]](p0) :: (store 16 into %ir.0 + 48, align 4)
+    ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
+    ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p0) :: (load 16 from %ir.1 + 48, align 4)
+    ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CHECK: G_STORE [[LOAD3]](s128), [[PTR_ADD5]](p0) :: (store 16 into %ir.0 + 48, align 4)
     ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
-    ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64)
-    ; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[GEP6]](p0) :: (load 8 from %ir.1 + 64, align 4)
-    ; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CHECK: G_STORE [[LOAD4]](s64), [[GEP7]](p0) :: (store 8 into %ir.0 + 64, align 4)
+    ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64)
+    ; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD6]](p0) :: (load 8 from %ir.1 + 64, align 4)
+    ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CHECK: G_STORE [[LOAD4]](s64), [[PTR_ADD7]](p0) :: (store 8 into %ir.0 + 64, align 4)
     ; CHECK: RET_ReallyLR
     %0:_(p0) = COPY $x0
     %1:_(p0) = COPY $x1
     %2:_(s64) = G_CONSTANT i64 72
-    G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    G_MEMCPY_INLINE %0(p0), %1(p0), %2(s64) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
     RET_ReallyLR
 
 ...
@@ -190,13 +192,33 @@
     ; CHECK: liveins: $x0, $x1
     ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
     ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
-    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 72
-    ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[C]](s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4)
+    ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4)
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
+    ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from %ir.1 + 16, align 4)
+    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p0) :: (store 16 into %ir.0 + 16, align 4)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+    ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p0) :: (load 16 from %ir.1 + 32, align 4)
+    ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store 16 into %ir.0 + 32, align 4)
+    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
+    ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
+    ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p0) :: (load 16 from %ir.1 + 48, align 4)
+    ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CHECK: G_STORE [[LOAD3]](s128), [[PTR_ADD5]](p0) :: (store 16 into %ir.0 + 48, align 4)
+    ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
+    ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64)
+    ; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD6]](p0) :: (load 8 from %ir.1 + 64, align 4)
+    ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CHECK: G_STORE [[LOAD4]](s64), [[PTR_ADD7]](p0) :: (store 8 into %ir.0 + 64, align 4)
     ; CHECK: RET_ReallyLR
     %0:_(p0) = COPY $x0
     %1:_(p0) = COPY $x1
     %2:_(s64) = G_CONSTANT i64 72
-    G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    G_MEMCPY_INLINE %0(p0), %1(p0), %2(s64) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
     RET_ReallyLR
 
 ...
@@ -220,50 +242,50 @@
     ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4)
     ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4)
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
-    ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load 16 from %ir.1 + 16, align 4)
-    ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store 16 into %ir.0 + 16, align 4)
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
+    ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from %ir.1 + 16, align 4)
+    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p0) :: (store 16 into %ir.0 + 16, align 4)
     ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
-    ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p0) :: (load 16 from %ir.1 + 32, align 4)
-    ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CHECK: G_STORE [[LOAD2]](s128), [[GEP3]](p0) :: (store 16 into %ir.0 + 32, align 4)
+    ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+    ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p0) :: (load 16 from %ir.1 + 32, align 4)
+    ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store 16 into %ir.0 + 32, align 4)
     ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
-    ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
-    ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[GEP4]](p0) :: (load 16 from %ir.1 + 48, align 4)
-    ; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CHECK: G_STORE [[LOAD3]](s128), [[GEP5]](p0) :: (store 16 into %ir.0 + 48, align 4)
+    ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
+    ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p0) :: (load 16 from %ir.1 + 48, align 4)
+    ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CHECK: G_STORE [[LOAD3]](s128), [[PTR_ADD5]](p0) :: (store 16 into %ir.0 + 48, align 4)
     ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
-    ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64)
-    ; CHECK: [[LOAD4:%[0-9]+]]:_(s128) = G_LOAD [[GEP6]](p0) :: (load 16 from %ir.1 + 64, align 4)
-    ; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CHECK: G_STORE [[LOAD4]](s128), [[GEP7]](p0) :: (store 16 into %ir.0 + 64, align 4)
+    ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64)
+    ; CHECK: [[LOAD4:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD6]](p0) :: (load 16 from %ir.1 + 64, align 4)
+    ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CHECK: G_STORE [[LOAD4]](s128), [[PTR_ADD7]](p0) :: (store 16 into %ir.0 + 64, align 4)
     ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 80
-    ; CHECK: [[GEP8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s64)
-    ; CHECK: [[LOAD5:%[0-9]+]]:_(s128) = G_LOAD [[GEP8]](p0) :: (load 16 from %ir.1 + 80, align 4)
-    ; CHECK: [[GEP9:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CHECK: G_STORE [[LOAD5]](s128), [[GEP9]](p0) :: (store 16 into %ir.0 + 80, align 4)
+    ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s64)
+    ; CHECK: [[LOAD5:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD8]](p0) :: (load 16 from %ir.1 + 80, align 4)
+    ; CHECK: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CHECK: G_STORE [[LOAD5]](s128), [[PTR_ADD9]](p0) :: (store 16 into %ir.0 + 80, align 4)
     ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 96
-    ; CHECK: [[GEP10:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C5]](s64)
-    ; CHECK: [[LOAD6:%[0-9]+]]:_(s128) = G_LOAD [[GEP10]](p0) :: (load 16 from %ir.1 + 96, align 4)
-    ; CHECK: [[GEP11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CHECK: G_STORE [[LOAD6]](s128), [[GEP11]](p0) :: (store 16 into %ir.0 + 96, align 4)
+    ; CHECK: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C5]](s64)
+    ; CHECK: [[LOAD6:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD10]](p0) :: (load 16 from %ir.1 + 96, align 4)
+    ; CHECK: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CHECK: G_STORE [[LOAD6]](s128), [[PTR_ADD11]](p0) :: (store 16 into %ir.0 + 96, align 4)
     ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 112
-    ; CHECK: [[GEP12:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C6]](s64)
-    ; CHECK: [[LOAD7:%[0-9]+]]:_(s128) = G_LOAD [[GEP12]](p0) :: (load 16 from %ir.1 + 112, align 4)
-    ; CHECK: [[GEP13:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CHECK: G_STORE [[LOAD7]](s128), [[GEP13]](p0) :: (store 16 into %ir.0 + 112, align 4)
+    ; CHECK: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C6]](s64)
+    ; CHECK: [[LOAD7:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD12]](p0) :: (load 16 from %ir.1 + 112, align 4)
+    ; CHECK: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CHECK: G_STORE [[LOAD7]](s128), [[PTR_ADD13]](p0) :: (store 16 into %ir.0 + 112, align 4)
     ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 127
-    ; CHECK: [[GEP14:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C7]](s64)
-    ; CHECK: [[LOAD8:%[0-9]+]]:_(s128) = G_LOAD [[GEP14]](p0) :: (load 16 from %ir.1 + 127, align 1, basealign 4)
-    ; CHECK: [[GEP15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; CHECK: G_STORE [[LOAD8]](s128), [[GEP15]](p0) :: (store 16 into %ir.0 + 127, align 1, basealign 4)
+    ; CHECK: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C7]](s64)
+    ; CHECK: [[LOAD8:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD14]](p0) :: (load 16 from %ir.1 + 127, align 1, basealign 4)
+    ; CHECK: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; CHECK: G_STORE [[LOAD8]](s128), [[PTR_ADD15]](p0) :: (store 16 into %ir.0 + 127, align 1, basealign 4)
     ; CHECK: RET_ReallyLR
     %0:_(p0) = COPY $x0
     %1:_(p0) = COPY $x1
     %2:_(s64) = G_CONSTANT i64 143
-    G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    G_MEMCPY_INLINE %0(p0), %1(p0), %2(s64) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
     RET_ReallyLR
 
 ...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-memcpy-inline.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-memcpy-inline.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-memcpy-inline.ll
@@ -0,0 +1,125 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -mtriple=aarch64-unknown-unknown -global-isel -global-isel-abort=1 -verify-machineinstrs -stop-after=irtranslator %s -o - | FileCheck %s
+
+define void @copy(i8* %dst, i8* %src) {
+  ; CHECK-LABEL: name: copy
+  ; CHECK: bb.1.entry:
+  ; CHECK:   liveins: $x0, $x1
+  ; CHECK:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32)
+  ; CHECK:   G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 0 :: (store 1 into %ir.dst), (load 1 from %ir.src)
+  ; CHECK:   RET_ReallyLR
+entry:
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 false)
+  ret void
+}
+
+define void @inline_copy(i8* %dst, i8* %src) {
+  ; CHECK-LABEL: name: inline_copy
+  ; CHECK: bb.1.entry:
+  ; CHECK:   liveins: $x0, $x1
+  ; CHECK:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32)
+  ; CHECK:   G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (store 1 into %ir.dst), (load 1 from %ir.src)
+  ; CHECK:   RET_ReallyLR
+entry:
+  call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 false)
+  ret void
+}
+
+define void @copy_volatile(i8* %dst, i8* %src) {
+  ; CHECK-LABEL: name: copy_volatile
+  ; CHECK: bb.1.entry:
+  ; CHECK:   liveins: $x0, $x1
+  ; CHECK:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32)
+  ; CHECK:   G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 0 :: (volatile store 1 into %ir.dst), (volatile load 1 from %ir.src)
+  ; CHECK:   RET_ReallyLR
+entry:
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 true)
+  ret void
+}
+
+define void @inline_copy_volatile(i8* %dst, i8* %src) {
+  ; CHECK-LABEL: name: inline_copy_volatile
+  ; CHECK: bb.1.entry:
+  ; CHECK:   liveins: $x0, $x1
+  ; CHECK:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32)
+  ; CHECK:   G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (volatile store 1 into %ir.dst), (volatile load 1 from %ir.src)
+  ; CHECK:   RET_ReallyLR
+entry:
+  call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 true)
+  ret void
+}
+
+define void @tail_copy(i8* %dst, i8* %src) {
+  ; CHECK-LABEL: name: tail_copy
+  ; CHECK: bb.1.entry:
+  ; CHECK:   liveins: $x0, $x1
+  ; CHECK:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32)
+  ; CHECK:   G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 1 :: (store 1 into %ir.dst), (load 1 from %ir.src)
+  ; CHECK:   RET_ReallyLR
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 false)
+  ret void
+}
+
+define void @tail_inline_copy(i8* %dst, i8* %src) {
+  ; CHECK-LABEL: name: tail_inline_copy
+  ; CHECK: bb.1.entry:
+  ; CHECK:   liveins: $x0, $x1
+  ; CHECK:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32)
+  ; CHECK:   G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (store 1 into %ir.dst), (load 1 from %ir.src)
+  ; CHECK:   RET_ReallyLR
+entry:
+  tail call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 false)
+  ret void
+}
+
+define void @tail_copy_volatile(i8* %dst, i8* %src) {
+  ; CHECK-LABEL: name: tail_copy_volatile
+  ; CHECK: bb.1.entry:
+  ; CHECK:   liveins: $x0, $x1
+  ; CHECK:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32)
+  ; CHECK:   G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 1 :: (volatile store 1 into %ir.dst), (volatile load 1 from %ir.src)
+  ; CHECK:   RET_ReallyLR
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 true)
+  ret void
+}
+
+define void @tail_inline_copy_volatile(i8* %dst, i8* %src) {
+  ; CHECK-LABEL: name: tail_inline_copy_volatile
+  ; CHECK: bb.1.entry:
+  ; CHECK:   liveins: $x0, $x1
+  ; CHECK:   [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+  ; CHECK:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32)
+  ; CHECK:   G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (volatile store 1 into %ir.dst), (volatile load 1 from %ir.src)
+  ; CHECK:   RET_ReallyLR
+entry:
+  tail call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 true)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) nounwind
+declare void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) nounwind
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -629,6 +629,9 @@
 # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
 # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: G_MEMCPY_INLINE (opcode 219): 3 type indices, 0 imm indices
+# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
 # DEBUG-NEXT: G_MEMMOVE (opcode {{[0-9]+}}): 3 type indices, 1 imm index
 # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
 # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-memcpy-inline.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-memcpy-inline.mir
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-memcpy-inline.mir
@@ -0,0 +1,81 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs -o - %s | FileCheck %s
+--- |
+  target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
+  target triple = "amdgcn-amd-amdhsa"
+
+  declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64 immarg, i1 immarg) #0
+
+  define void @test_memcpy_inline(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #1 {
+  entry:
+    %0 = bitcast i32* %dst to i8*
+    %1 = bitcast i32* %src to i8*
+    tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 13, i1 false)
+    ret void
+  }
+
+  attributes #0 = { argmemonly nofree nounwind willreturn "target-cpu"="gfx900" }
+  attributes #1 = { "target-cpu"="gfx900" }
+
+...
+---
+name:            test_memcpy_inline
+alignment:       1
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: sgpr_64 }
+  - { id: 3, class: _ }
+  - { id: 4, class: _ }
+  - { id: 5, class: _ }
+  - { id: 6, class: _ }
+  - { id: 7, class: _ }
+  - { id: 8, class: ccr_sgpr_64 }
+liveins:
+  - { reg: '$sgpr30_sgpr31', virtual-reg: '%2' }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo:
+  maxKernArgAlign: 1
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+  argumentInfo:
+    privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+  occupancy:       10
+body:             |
+  bb.1.entry:
+    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+
+    ; CHECK-LABEL: name: test_memcpy_inline
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+    ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[MV1]](p0) :: (load 8 from %ir.1, align 4)
+    ; CHECK: G_STORE [[LOAD]](s64), [[MV]](p0) :: (store 8 into %ir.0, align 4)
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[MV1]], [[C]](s64)
+    ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load 8 from %ir.1 + 5, align 1, basealign 4)
+    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[MV]], [[C]](s64)
+    ; CHECK: G_STORE [[LOAD1]](s64), [[PTR_ADD1]](p0) :: (store 8 into %ir.0 + 5, align 1, basealign 4)
+    ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]]
+    ; CHECK: S_SETPC_B64_return [[COPY5]]
+    %3:_(s32) = COPY $vgpr0
+    %4:_(s32) = COPY $vgpr1
+    %0:_(p0) = G_MERGE_VALUES %3(s32), %4(s32)
+    %5:_(s32) = COPY $vgpr2
+    %6:_(s32) = COPY $vgpr3
+    %1:_(p0) = G_MERGE_VALUES %5(s32), %6(s32)
+    %2:sgpr_64 = COPY $sgpr30_sgpr31
+    %7:_(s64) = G_CONSTANT i64 13
+    G_MEMCPY_INLINE %0(p0), %1(p0), %7(s64) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    %8:ccr_sgpr_64 = COPY %2
+    S_SETPC_B64_return %8
+
+...
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/inline-memcpy.mir b/llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/inline-memcpy.mir
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/inline-memcpy.mir
@@ -0,0 +1,60 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=mips-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32
+--- |
+  ; ModuleID = '../llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/inline-memcpy.ll'
+  source_filename = "../llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/inline-memcpy.ll"
+  target datalayout = "e-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64"
+  target triple = "mipsel-pc-linux-gnu"
+
+  declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64 immarg, i1 immarg) #0
+
+  define void @test_memcpy_inline(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr {
+  entry:
+    %0 = bitcast i32* %dst to i8*
+    %1 = bitcast i32* %src to i8*
+    tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 2, i1 false)
+    ret void
+  }
+
+  attributes #0 = { argmemonly nofree nounwind willreturn }
+
+...
+---
+name:            test_memcpy_inline
+alignment:       4
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+  - { id: 3, class: _ }
+liveins:
+  - { reg: '$a0' }
+  - { reg: '$a1' }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.1.entry:
+    liveins: $a0, $a1
+
+    ; MIPS32-LABEL: name: test_memcpy_inline
+    ; MIPS32: liveins: $a0, $a1
+    ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
+    ; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
+    ; MIPS32: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p0) :: (load 1 from %ir.1, align 4)
+    ; MIPS32: G_STORE [[LOAD]](s8), [[COPY]](p0) :: (store 1 into %ir.0, align 4)
+    ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s32)
+    ; MIPS32: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from %ir.1 + 1, basealign 4)
+    ; MIPS32: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32)
+    ; MIPS32: G_STORE [[LOAD1]](s8), [[PTR_ADD1]](p0) :: (store 1 into %ir.0 + 1, basealign 4)
+    ; MIPS32: RetRA
+    %0:_(p0) = COPY $a0
+    %1:_(p0) = COPY $a1
+    %2:_(s64) = G_CONSTANT i64 2
+    %3:_(s32) = G_TRUNC %2(s64)
+    G_MEMCPY_INLINE %0(p0), %1(p0), %3(s32) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    RetRA
+
+...
diff --git a/llvm/test/MachineVerifier/test_g_bzero.mir b/llvm/test/MachineVerifier/test_g_bzero.mir
--- a/llvm/test/MachineVerifier/test_g_bzero.mir
+++ b/llvm/test/MachineVerifier/test_g_bzero.mir
@@ -27,7 +27,9 @@
     ; CHECK: *** Bad machine code: inconsistent bzero address space ***
     G_BZERO %ptr, %cst2, 0 :: (store 4, addrspace 1)
 
-   ; CHECK: *** Bad machine code: bzero operand must be a pointer ***
+    ; CHECK: *** Bad machine code: bzero operand must be a pointer ***
     G_BZERO %cst1, %cst2, 0 :: (store 4)
 
+    ; CHECK: *** Bad machine code: 'tail' flag (last operand) must be an immediate 0 or 1 ***
+    G_BZERO %ptr, %cst2, 2 :: (store 4)
 ...
diff --git a/llvm/test/MachineVerifier/test_g_memcpy.mir b/llvm/test/MachineVerifier/test_g_memcpy.mir
--- a/llvm/test/MachineVerifier/test_g_memcpy.mir
+++ b/llvm/test/MachineVerifier/test_g_memcpy.mir
@@ -47,4 +47,9 @@
     ; CHECK: *** Bad machine code: memory instruction operand must be a pointer ***
     G_MEMCPY %0, %2, %2, 0 :: (store 4), (load 4)
 
+    ; CHECK: *** Bad machine code: 'tail' flag (operand 3) must be an immediate 0 or 1 ***
+    G_MEMCPY %0, %0, %2, %0 :: (store 4), (load 4)
+
+    ; CHECK: *** Bad machine code: 'tail' flag (operand 3) must be an immediate 0 or 1 ***
+    G_MEMCPY %0, %0, %2, 2 :: (store 4), (load 4)
 ...
diff --git a/llvm/test/MachineVerifier/test_g_memcpy_inline.mir b/llvm/test/MachineVerifier/test_g_memcpy_inline.mir
new file mode 100644
--- /dev/null
+++ b/llvm/test/MachineVerifier/test_g_memcpy_inline.mir
@@ -0,0 +1,49 @@
+#RUN: not --crash llc -o - -march=arm64 -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s
+# REQUIRES: aarch64-registered-target
+---
+name:            test_memcpy_inline
+legalized:       true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+liveins:
+body:             |
+  bb.0:
+
+    %0:_(p0) = G_CONSTANT i64 0
+    %1:_(p0) = G_CONSTANT i64 4
+    %2:_(s64) = G_CONSTANT i64 4
+
+    ; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands ***
+    G_MEMCPY_INLINE %0, %1, %2
+
+    ; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands ***
+    G_MEMCPY_INLINE %0, %1, %2 :: (load 4)
+
+    ; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands ***
+    G_MEMCPY_INLINE %0, %1, %2 :: (store 4)
+
+    ; CHECK: *** Bad machine code: wrong memory operand types ***
+    G_MEMCPY_INLINE %0, %1, %2 :: (load 4), (store 4)
+
+    ; CHECK: *** Bad machine code: inconsistent memory operand sizes ***
+    G_MEMCPY_INLINE %0, %1, %2 :: (store 8), (load 4)
+
+    ; CHECK: *** Bad machine code: inconsistent memory operand sizes ***
+    G_MEMCPY_INLINE %0, %1, %2 :: (store unknown-size), (load 4)
+
+    ; CHECK: *** Bad machine code: inconsistent memory operand sizes ***
+    G_MEMCPY_INLINE %0, %1, %2 :: (store 8), (load unknown-size)
+
+    ; CHECK: *** Bad machine code: inconsistent store address space ***
+    G_MEMCPY_INLINE %0, %1, %2 :: (store 4, addrspace 1), (load 4)
+
+    ; CHECK: *** Bad machine code: inconsistent load address space ***
+    G_MEMCPY_INLINE %0, %1, %2 :: (store 4), (load 4, addrspace 1)
+
+    ; CHECK: *** Bad machine code: memory instruction operand must be a pointer ***
+    G_MEMCPY_INLINE %2, %0, %2 :: (store 4), (load 4)
+
+    ; CHECK: *** Bad machine code: memory instruction operand must be a pointer ***
+    G_MEMCPY_INLINE %0, %2, %2 :: (store 4), (load 4)
+...
diff --git a/llvm/test/MachineVerifier/test_g_memmove.mir b/llvm/test/MachineVerifier/test_g_memmove.mir
new file mode 100644
--- /dev/null
+++ b/llvm/test/MachineVerifier/test_g_memmove.mir
@@ -0,0 +1,55 @@
+#RUN: not --crash llc -o - -march=arm64 -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s
+# REQUIRES: aarch64-registered-target
+---
+name:            test_memmove
+legalized:       true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+liveins:
+body:             |
+  bb.0:
+
+    %0:_(p0) = G_CONSTANT i64 0
+    %1:_(p0) = G_CONSTANT i64 4
+    %2:_(s64) = G_CONSTANT i64 4
+
+    ; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands ***
+    G_MEMMOVE %0, %1, %2, 0
+
+    ; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands ***
+    G_MEMMOVE %0, %1, %2, 0 :: (load 4)
+
+    ; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands ***
+    G_MEMMOVE %0, %1, %2, 0 :: (store 4)
+
+    ; CHECK: *** Bad machine code: wrong memory operand types ***
+    G_MEMMOVE %0, %1, %2, 0 :: (load 4), (store 4)
+
+    ; CHECK: *** Bad machine code: inconsistent memory operand sizes ***
+    G_MEMMOVE %0, %1, %2, 0 :: (store 8), (load 4)
+
+    ; CHECK: *** Bad machine code: inconsistent memory operand sizes ***
+    G_MEMMOVE %0, %1, %2, 0 :: (store unknown-size), (load 4)
+
+    ; CHECK: *** Bad machine code: inconsistent memory operand sizes ***
+    G_MEMMOVE %0, %1, %2, 0 :: (store 8), (load unknown-size)
+
+    ; CHECK: *** Bad machine code: inconsistent store address space ***
+    G_MEMMOVE %0, %1, %2, 0 :: (store 4, addrspace 1), (load 4)
+
+    ; CHECK: *** Bad machine code: inconsistent load address space ***
+    G_MEMMOVE %0, %1, %2, 0 :: (store 4), (load 4, addrspace 1)
+
+    ; CHECK: *** Bad machine code: memory instruction operand must be a pointer ***
+    G_MEMMOVE %2, %0, %2, 0 :: (store 4), (load 4)
+
+    ; CHECK: *** Bad machine code: memory instruction operand must be a pointer ***
+    G_MEMMOVE %0, %2, %2, 0 :: (store 4), (load 4)
+
+    ; CHECK: *** Bad machine code: 'tail' flag (operand 3) must be an immediate 0 or 1 ***
+    G_MEMMOVE %0, %0, %2, %0 :: (store 4), (load 4)
+
+    ; CHECK: *** Bad machine code: 'tail' flag (operand 3) must be an immediate 0 or 1 ***
+    G_MEMMOVE %0, %0, %2, 2 :: (store 4), (load 4)
+...