diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -534,7 +534,7 @@
   // Memcpy family optimization helpers.
   bool optimizeMemcpy(MachineInstr &MI, Register Dst, Register Src,
                       unsigned KnownLen, Align DstAlign, Align SrcAlign,
-                      bool IsVolatile);
+                      bool IsVolatile, bool IsInlined);
   bool optimizeMemmove(MachineInstr &MI, Register Dst, Register Src,
                        unsigned KnownLen, Align DstAlign, Align SrcAlign,
                        bool IsVolatile);
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -1831,7 +1831,8 @@
                                            MachineMemOperand &DstMMO,
                                            MachineMemOperand &SrcMMO) {
     auto MIB = buildInstr(
-        Opcode, {}, {DstPtr, SrcPtr, Size, SrcOp(INT64_C(0) /*isTailCall*/)});
+        Opcode, {}, {DstPtr, SrcPtr, Size, SrcOp(INT64_C(0) /*isTailCall*/),
+                     SrcOp(INT64_C(0) /*isInlined*/)});
     MIB.addMemOperand(&DstMMO);
     MIB.addMemOperand(&SrcMMO);
     return MIB;
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -892,12 +892,13 @@
   }
 };
 
-/// This class wraps the llvm.memcpy intrinsic.
+/// This class wraps the llvm.memcpy and llvm.memcpy.inline intrinsics.
 class MemCpyInst : public MemTransferInst {
 public:
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const IntrinsicInst *I) {
-    return I->getIntrinsicID() == Intrinsic::memcpy;
+    return I->getIntrinsicID() == Intrinsic::memcpy ||
+           I->getIntrinsicID() == Intrinsic::memcpy_inline;
   }
   static bool classof(const Value *V) {
     return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
@@ -917,10 +918,10 @@
 };
 
 /// This class wraps the llvm.memcpy.inline intrinsic.
-class MemCpyInlineInst : public MemTransferInst {
+class MemCpyInlineInst : public MemCpyInst {
 public:
   ConstantInt *getLength() const {
-    return cast<ConstantInt>(MemTransferInst::getLength());
+    return cast<ConstantInt>(MemCpyInst::getLength());
   }
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const IntrinsicInst *I) {
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -1347,7 +1347,7 @@
 
 def G_MEMCPY : GenericInstruction {
   let OutOperandList = (outs);
-  let InOperandList = (ins ptype0:$dst_addr, ptype1:$src_addr, type2:$size, untyped_imm_0:$tailcall);
+  let InOperandList = (ins ptype0:$dst_addr, ptype1:$src_addr, type2:$size, untyped_imm_0:$tailcall, untyped_imm_0:$inlined);
   let hasSideEffects = false;
   let mayLoad = true;
   let mayStore = true;
@@ -1355,7 +1355,7 @@
 
 def G_MEMMOVE : GenericInstruction {
   let OutOperandList = (outs);
-  let InOperandList = (ins ptype0:$dst_addr, ptype1:$src_addr, type2:$size, untyped_imm_0:$tailcall);
+  let InOperandList = (ins ptype0:$dst_addr, ptype1:$src_addr, type2:$size, untyped_imm_0:$tailcall, untyped_imm_0:$inlined);
   let hasSideEffects = false;
   let mayLoad = true;
   let mayStore = true;
@@ -1363,14 +1363,14 @@
 
 def G_MEMSET : GenericInstruction {
   let OutOperandList = (outs);
-  let InOperandList = (ins ptype0:$dst_addr, type1:$value, type2:$size, untyped_imm_0:$tailcall);
+  let InOperandList = (ins ptype0:$dst_addr, type1:$value, type2:$size, untyped_imm_0:$tailcall, untyped_imm_0:$inlined);
   let hasSideEffects = false;
   let mayStore = true;
 }
 
 def G_BZERO : GenericInstruction {
   let OutOperandList = (outs);
-  let InOperandList = (ins ptype0:$dst_addr, type1:$size, untyped_imm_0:$tailcall);
+  let InOperandList = (ins ptype0:$dst_addr, type1:$size, untyped_imm_0:$tailcall, untyped_imm_0:$inlined);
   let hasSideEffects = false;
   let mayStore = true;
 }
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1333,7 +1333,7 @@
 bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
                                     Register Src, unsigned KnownLen,
                                     Align DstAlign, Align SrcAlign,
-                                    bool IsVolatile) {
+                                    bool IsVolatile, bool IsInlined) {
   auto &MF = *MI.getParent()->getParent();
   const auto &TLI = *MF.getSubtarget().getTargetLowering();
   auto &DL = MF.getDataLayout();
@@ -1354,7 +1354,9 @@
   // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
   // if the memcpy is in a tail call position.
 
-  unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize);
+  unsigned Limit = IsInlined
+    ? std::numeric_limits<unsigned>::max()
+    : TLI.getMaxStoresPerMemcpy(OptSize);
   std::vector<LLT> MemOps;
 
   const auto &DstMMO = **MI.memoperands_begin();
@@ -1560,6 +1562,9 @@
   Register Dst = MI.getOperand(0).getReg();
   Register Src = MI.getOperand(1).getReg();
   Register Len = MI.getOperand(2).getReg();
+  bool IsInlined = MI.getOperand(4).getImm();
+  assert((!IsInlined || Opc == TargetOpcode::G_MEMCPY) &&
+         "currently, only memcpy supports inlining");
 
   if (Opc != TargetOpcode::G_MEMSET) {
     assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
@@ -1578,11 +1583,11 @@
     return true;
   }
 
-  if (MaxLen && KnownLen > MaxLen)
+  if (!IsInlined && MaxLen && KnownLen > MaxLen)
     return false;
 
   if (Opc == TargetOpcode::G_MEMCPY)
-    return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
+    return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile, IsInlined);
   if (Opc == TargetOpcode::G_MEMMOVE)
     return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
   if (Opc == TargetOpcode::G_MEMSET)
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1601,6 +1601,9 @@
   // any memory intrinsics.
   ICall.addImm(CI.isTailCall() ? 1 : 0);
 
+  // The "inline" part of llvm.memcpy.inline is also propagated as an argument.
+  ICall.addImm(isa<MemCpyInlineInst>(&CI) ? 1 : 0);
+
   // Create mem operands to store the alignment and volatile info.
   auto VolFlag = IsVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
   ICall.addMemOperand(MF->getMachineMemOperand(
@@ -2030,6 +2033,7 @@
                             getOrCreateVReg(*CI.getArgOperand(0)),
                             MachineInstr::copyFlagsFromInstruction(CI));
     return true;
+  case Intrinsic::memcpy_inline:
   case Intrinsic::memcpy:
     return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY);
   case Intrinsic::memmove:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -557,8 +557,9 @@
   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
 
   SmallVector<CallLowering::ArgInfo, 3> Args;
-  // Add all the args, except for the last which is an imm denoting 'tail'.
-  for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
+  // Add all the args, except for the last which is an imm denoting 'tail'
+  // and 'inlined' respectively.
+  for (unsigned i = 0; i < MI.getNumOperands() - 2; ++i) {
     Register Reg = MI.getOperand(i).getReg();
 
     // Need derive an IR type for call lowering.
@@ -604,8 +605,12 @@
   Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
   Info.Callee = MachineOperand::CreateES(Name);
   Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
-  Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
+  Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 2).getImm() &&
                     isLibCallInTailPosition(MIRBuilder.getTII(), MI);
+#ifndef NDEBUG
+  bool Inlined = MI.getOperand(MI.getNumOperands() - 1).getImm();
+  assert(!Inlined && "inlined memory op should already have been lowered");
+#endif
 
   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
   if (!CLI.lowerCall(MIRBuilder, Info))
@@ -763,6 +768,8 @@
   case TargetOpcode::G_MEMCPY:
   case TargetOpcode::G_MEMMOVE:
   case TargetOpcode::G_MEMSET: {
+    assert((MI.getOpcode() != TargetOpcode::G_MEMCPY ||
+            MI.getOperand(4).getImm() == 0) && "llvm.memcpy.inline should already have been expanded before legalization");
     LegalizeResult Result =
         createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
     if (Result != Legalized)
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1507,6 +1507,12 @@
     if (SrcPtrTy.getAddressSpace() != MMOs[1]->getAddrSpace())
       report("inconsistent load address space", MI);
 
+    if (!MI->getOperand(3).isImm())
+      report("'tail' flag (operand 3) must be an immediate type", MI);
+
+    if (!MI->getOperand(4).isImm())
+      report("'inlined' flag (operand 4) must be an immediate type", MI);
+
     break;
   }
   case TargetOpcode::G_BZERO:
@@ -1532,6 +1538,12 @@
     if (DstPtrTy.getAddressSpace() != MMOs[0]->getAddrSpace())
       report("inconsistent " + Twine(Name, " address space"), MI);
 
+    if (!MI->getOperand(MI->getNumOperands() - 2).isImm())
+      report("'tail' flag (second to last operand) must be an immediate type", MI);
+
+    if (!MI->getOperand(MI->getNumOperands() - 1).isImm())
+      report("'inlined' flag (last operand) must be an immediate type", MI);
+
     break;
   }
   case TargetOpcode::G_VECREDUCE_SEQ_FADD:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
--- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
@@ -90,6 +90,7 @@
       .buildInstr(TargetOpcode::G_BZERO, {},
                   {MI.getOperand(0), MI.getOperand(2)})
       .addImm(MI.getOperand(3).getImm())
+      .addImm(MI.getOperand(4).getImm())
       .addMemOperand(*MI.memoperands_begin());
   MI.eraseFromParent();
   return true;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
--- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
@@ -276,7 +276,7 @@
   case TargetOpcode::G_MEMMOVE:
   case TargetOpcode::G_MEMSET: {
     // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other
-    // heuristics decide.
+    // heuristics decide (unless it is an llvm.memcpy.inline, of course).
     unsigned MaxLen = EnableOpt ? 0 : 32;
     // Try to inline memcpy type calls if optimizations are enabled.
     if (Helper.tryCombineMemCpyFamily(MI, MaxLen))
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
@@ -1138,7 +1138,7 @@
 ; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY $x0
 ; CHECK: [[SRC:%[0-9]+]]:_(p0) = COPY $x1
 ; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2
-; CHECK: G_MEMCPY [[DST]](p0), [[SRC]](p0), [[SIZE]](s64), 0 :: (store 1 into %ir.dst), (load 1 from %ir.src)
+; CHECK: G_MEMCPY [[DST]](p0), [[SRC]](p0), [[SIZE]](s64), 0, 0 :: (store 1 into %ir.dst), (load 1 from %ir.src)
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i1 0)
   ret void
 }
@@ -1148,7 +1148,7 @@
 ; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY $x0
 ; CHECK: [[SRC:%[0-9]+]]:_(p0) = COPY $x1
 ; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2
-; CHECK: G_MEMCPY [[DST]](p0), [[SRC]](p0), [[SIZE]](s64), 1 :: (store 1 into %ir.dst), (load 1 from %ir.src)
+; CHECK: G_MEMCPY [[DST]](p0), [[SRC]](p0), [[SIZE]](s64), 1, 0 :: (store 1 into %ir.dst), (load 1 from %ir.src)
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i1 0)
   ret void
 }
@@ -1159,7 +1159,7 @@
 ; CHECK: [[DST:%[0-9]+]]:_(p1) = COPY $x0
 ; CHECK: [[SRC:%[0-9]+]]:_(p1) = COPY $x1
 ; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2
-; CHECK: G_MEMCPY [[DST]](p1), [[SRC]](p1), [[SIZE]](s64), 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 1)
+; CHECK: G_MEMCPY [[DST]](p1), [[SRC]](p1), [[SIZE]](s64), 0, 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 1)
   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %size, i1 0)
   ret void
 }
@@ -1170,7 +1170,7 @@
 ; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY $x0
 ; CHECK: [[SRC:%[0-9]+]]:_(p0) = COPY $x1
 ; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2
-; CHECK: G_MEMMOVE [[DST]](p0), [[SRC]](p0), [[SIZE]](s64), 0 :: (store 1 into %ir.dst), (load 1 from %ir.src)
+; CHECK: G_MEMMOVE [[DST]](p0), [[SRC]](p0), [[SIZE]](s64), 0, 0 :: (store 1 into %ir.dst), (load 1 from %ir.src)
   call void @llvm.memmove.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i1 0)
   ret void
 }
@@ -1182,7 +1182,7 @@
 ; CHECK: [[SRC_C:%[0-9]+]]:_(s32) = COPY $w1
 ; CHECK: [[SRC:%[0-9]+]]:_(s8) = G_TRUNC [[SRC_C]]
 ; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY $x2
-; CHECK: G_MEMSET [[DST]](p0), [[SRC]](s8), [[SIZE]](s64), 0 :: (store 1 into %ir.dst)
+; CHECK: G_MEMSET [[DST]](p0), [[SRC]](s8), [[SIZE]](s64), 0, 0 :: (store 1 into %ir.dst)
   call void @llvm.memset.p0i8.i64(i8* %dst, i8 %val, i64 %size, i1 0)
   ret void
 }
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/debug-loc-legalize-tail-call.mir b/llvm/test/CodeGen/AArch64/GlobalISel/debug-loc-legalize-tail-call.mir
--- a/llvm/test/CodeGen/AArch64/GlobalISel/debug-loc-legalize-tail-call.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/debug-loc-legalize-tail-call.mir
@@ -36,7 +36,7 @@
     %0:_(p0) = G_IMPLICIT_DEF debug-location !DILocation(line: 0, scope: !6)
     %1:_(s8) = G_CONSTANT i8 0
     %2:_(s64) = G_IMPLICIT_DEF debug-location !DILocation(line: 0, scope: !6)
-    G_MEMSET %0(p0), %1(s8), %2(s64), 1, debug-location !11 :: (store 1)
+    G_MEMSET %0(p0), %1(s8), %2(s64), 1, 0, debug-location !11 :: (store 1)
     DBG_VALUE 0, 0, !9, !DIExpression(), debug-location !12
     RET_ReallyLR debug-location !12
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir
--- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir
@@ -70,12 +70,12 @@
     ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
     ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
     ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
-    ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64), 1, 0 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
     ; CHECK: RET_ReallyLR
     %0:_(p0) = COPY $x0
     %1:_(p0) = COPY $x1
     %2:_(s64) = COPY $x2
-    G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    G_MEMCPY %0(p0), %1(p0), %2(s64), 1, 0 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
     RET_ReallyLR
 
 ...
@@ -99,30 +99,30 @@
     ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4)
     ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4)
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
-    ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load 16 from %ir.1 + 16, align 4)
-    ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store 16 into %ir.0 + 16, align 4)
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
+    ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from %ir.1 + 16, align 4)
+    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p0) :: (store 16 into %ir.0 + 16, align 4)
     ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
-    ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p0) :: (load 16 from %ir.1 + 32, align 4)
-    ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CHECK: G_STORE [[LOAD2]](s128), [[GEP3]](p0) :: (store 16 into %ir.0 + 32, align 4)
+    ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+    ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p0) :: (load 16 from %ir.1 + 32, align 4)
+    ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store 16 into %ir.0 + 32, align 4)
     ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
-    ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
-    ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[GEP4]](p0) :: (load 16 from %ir.1 + 48, align 4)
-    ; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CHECK: G_STORE [[LOAD3]](s128), [[GEP5]](p0) :: (store 16 into %ir.0 + 48, align 4)
+    ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
+    ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p0) :: (load 16 from %ir.1 + 48, align 4)
+    ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CHECK: G_STORE [[LOAD3]](s128), [[PTR_ADD5]](p0) :: (store 16 into %ir.0 + 48, align 4)
     ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
-    ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64)
-    ; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[GEP6]](p0) :: (load 8 from %ir.1 + 64, align 4)
-    ; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CHECK: G_STORE [[LOAD4]](s64), [[GEP7]](p0) :: (store 8 into %ir.0 + 64, align 4)
+    ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64)
+    ; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD6]](p0) :: (load 8 from %ir.1 + 64, align 4)
+    ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CHECK: G_STORE [[LOAD4]](s64), [[PTR_ADD7]](p0) :: (store 8 into %ir.0 + 64, align 4)
     ; CHECK: RET_ReallyLR
     %0:_(p0) = COPY $x0
     %1:_(p0) = COPY $x1
     %2:_(s64) = G_CONSTANT i64 72
-    G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    G_MEMCPY %0(p0), %1(p0), %2(s64), 1, 0 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
     RET_ReallyLR
 
 ...
@@ -146,30 +146,30 @@
     ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4)
     ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4)
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
-    ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load 16 from %ir.1 + 16, align 4)
-    ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store 16 into %ir.0 + 16, align 4)
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
+    ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from %ir.1 + 16, align 4)
+    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p0) :: (store 16 into %ir.0 + 16, align 4)
     ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
-    ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p0) :: (load 16 from %ir.1 + 32, align 4)
-    ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CHECK: G_STORE [[LOAD2]](s128), [[GEP3]](p0) :: (store 16 into %ir.0 + 32, align 4)
+    ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+    ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p0) :: (load 16 from %ir.1 + 32, align 4)
+    ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store 16 into %ir.0 + 32, align 4)
     ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
-    ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
-    ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[GEP4]](p0) :: (load 16 from %ir.1 + 48, align 4)
-    ; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CHECK: G_STORE [[LOAD3]](s128), [[GEP5]](p0) :: (store 16 into %ir.0 + 48, align 4)
+    ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
+    ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p0) :: (load 16 from %ir.1 + 48, align 4)
+    ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CHECK: G_STORE [[LOAD3]](s128), [[PTR_ADD5]](p0) :: (store 16 into %ir.0 + 48, align 4)
     ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
-    ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64)
-    ; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[GEP6]](p0) :: (load 8 from %ir.1 + 64, align 4)
-    ; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CHECK: G_STORE [[LOAD4]](s64), [[GEP7]](p0) :: (store 8 into %ir.0 + 64, align 4)
+    ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64)
+    ; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD6]](p0) :: (load 8 from %ir.1 + 64, align 4)
+    ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CHECK: G_STORE [[LOAD4]](s64), [[PTR_ADD7]](p0) :: (store 8 into %ir.0 + 64, align 4)
     ; CHECK: RET_ReallyLR
     %0:_(p0) = COPY $x0
     %1:_(p0) = COPY $x1
     %2:_(s64) = G_CONSTANT i64 72
-    G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    G_MEMCPY %0(p0), %1(p0), %2(s64), 1, 0 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
     RET_ReallyLR
 
 ...
@@ -191,15 +191,14 @@
     ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
     ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 72
-    ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[C]](s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[C]](s64), 1, 0 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
     ; CHECK: RET_ReallyLR
     %0:_(p0) = COPY $x0
     %1:_(p0) = COPY $x1
     %2:_(s64) = G_CONSTANT i64 72
-    G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    G_MEMCPY %0(p0), %1(p0), %2(s64), 1, 0 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
     RET_ReallyLR
 
-...
 ---
 name:            test_memcpy3_const_arrays_unaligned
 alignment:       4
@@ -220,50 +219,50 @@
     ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4)
     ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4)
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
-    ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load 16 from %ir.1 + 16, align 4)
-    ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store 16 into %ir.0 + 16, align 4)
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
+    ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from %ir.1 + 16, align 4)
+    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p0) :: (store 16 into %ir.0 + 16, align 4)
     ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
-    ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p0) :: (load 16 from %ir.1 + 32, align 4)
-    ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CHECK: G_STORE [[LOAD2]](s128), [[GEP3]](p0) :: (store 16 into %ir.0 + 32, align 4)
+    ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+    ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p0) :: (load 16 from %ir.1 + 32, align 4)
+    ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store 16 into %ir.0 + 32, align 4)
     ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
-    ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
-    ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[GEP4]](p0) :: (load 16 from %ir.1 + 48, align 4)
-    ; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CHECK: G_STORE [[LOAD3]](s128), [[GEP5]](p0) :: (store 16 into %ir.0 + 48, align 4)
+    ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64)
+    ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p0) :: (load 16 from %ir.1 + 48, align 4)
+    ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
+    ; CHECK: G_STORE [[LOAD3]](s128), [[PTR_ADD5]](p0) :: (store 16 into %ir.0 + 48, align 4)
     ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
-    ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64)
-    ; CHECK: [[LOAD4:%[0-9]+]]:_(s128) = G_LOAD [[GEP6]](p0) :: (load 16 from %ir.1 + 64, align 4)
-    ; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CHECK: G_STORE [[LOAD4]](s128), [[GEP7]](p0) :: (store 16 into %ir.0 + 64, align 4)
+    ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64)
+    ; CHECK: [[LOAD4:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD6]](p0) :: (load 16 from %ir.1 + 64, align 4)
+    ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
+    ; CHECK: G_STORE [[LOAD4]](s128), [[PTR_ADD7]](p0) :: (store 16 into %ir.0 + 64, align 4)
     ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 80
-    ; CHECK: [[GEP8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s64)
-    ; CHECK: [[LOAD5:%[0-9]+]]:_(s128) = G_LOAD [[GEP8]](p0) :: (load 16 from %ir.1 + 80, align 4)
-    ; CHECK: [[GEP9:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CHECK: G_STORE [[LOAD5]](s128), [[GEP9]](p0) :: (store 16 into %ir.0 + 80, align 4)
+    ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s64)
+    ; CHECK: [[LOAD5:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD8]](p0) :: (load 16 from %ir.1 + 80, align 4)
+    ; CHECK: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
+    ; CHECK: G_STORE [[LOAD5]](s128), [[PTR_ADD9]](p0) :: (store 16 into %ir.0 + 80, align 4)
     ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 96
-    ; CHECK: [[GEP10:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C5]](s64)
-    ; CHECK: [[LOAD6:%[0-9]+]]:_(s128) = G_LOAD [[GEP10]](p0) :: (load 16 from %ir.1 + 96, align 4)
-    ; CHECK: [[GEP11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CHECK: G_STORE [[LOAD6]](s128), [[GEP11]](p0) :: (store 16 into %ir.0 + 96, align 4)
+    ; CHECK: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C5]](s64)
+    ; CHECK: [[LOAD6:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD10]](p0) :: (load 16 from %ir.1 + 96, align 4)
+    ; CHECK: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
+    ; CHECK: G_STORE [[LOAD6]](s128), [[PTR_ADD11]](p0) :: (store 16 into %ir.0 + 96, align 4)
     ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 112
-    ; CHECK: [[GEP12:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C6]](s64)
-    ; CHECK: [[LOAD7:%[0-9]+]]:_(s128) = G_LOAD [[GEP12]](p0) :: (load 16 from %ir.1 + 112, align 4)
-    ; CHECK: [[GEP13:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CHECK: G_STORE [[LOAD7]](s128), [[GEP13]](p0) :: (store 16 into %ir.0 + 112, align 4)
+    ; CHECK: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C6]](s64)
+    ; CHECK: [[LOAD7:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD12]](p0) :: (load 16 from %ir.1 + 112, align 4)
+    ; CHECK: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
+    ; CHECK: G_STORE [[LOAD7]](s128), [[PTR_ADD13]](p0) :: (store 16 into %ir.0 + 112, align 4)
     ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 127
-    ; CHECK: [[GEP14:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C7]](s64)
-    ; CHECK: [[LOAD8:%[0-9]+]]:_(s128) = G_LOAD [[GEP14]](p0) :: (load 16 from %ir.1 + 127, align 1, basealign 4)
-    ; CHECK: [[GEP15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; CHECK: G_STORE [[LOAD8]](s128), [[GEP15]](p0) :: (store 16 into %ir.0 + 127, align 1, basealign 4)
+    ; CHECK: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C7]](s64)
+    ; CHECK: [[LOAD8:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD14]](p0) :: (load 16 from %ir.1 + 127, align 1, basealign 4)
+    ; CHECK: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
+    ; CHECK: G_STORE [[LOAD8]](s128), [[PTR_ADD15]](p0) :: (store 16 into %ir.0 + 127, align 1, basealign 4)
     ; CHECK: RET_ReallyLR
     %0:_(p0) = COPY $x0
     %1:_(p0) = COPY $x1
     %2:_(s64) = G_CONSTANT i64 143
-    G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    G_MEMCPY %0(p0), %1(p0), %2(s64), 1, 0 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
     RET_ReallyLR
 
 ...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir
--- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir
@@ -55,12 +55,12 @@
     ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
     ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
     ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
-    ; CHECK: G_MEMMOVE [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    ; CHECK: G_MEMMOVE [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64), 1, 0 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
     ; CHECK: RET_ReallyLR
     %0:_(p0) = COPY $x0
     %1:_(p0) = COPY $x1
     %2:_(s64) = COPY $x2
-    G_MEMMOVE %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    G_MEMMOVE %0(p0), %1(p0), %2(s64), 1, 0 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
     RET_ReallyLR
 
 ...
@@ -94,7 +94,7 @@
     %0:_(p0) = COPY $x0
     %1:_(p0) = COPY $x1
     %2:_(s64) = G_CONSTANT i64 48
-    G_MEMMOVE %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    G_MEMMOVE %0(p0), %1(p0), %2(s64), 1, 0 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
     RET_ReallyLR
 
 ...
@@ -111,12 +111,12 @@
     ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
     ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 96
-    ; CHECK: G_MEMMOVE [[COPY]](p0), [[COPY1]](p0), [[C]](s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    ; CHECK: G_MEMMOVE [[COPY]](p0), [[COPY1]](p0), [[C]](s64), 1, 0 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
     ; CHECK: RET_ReallyLR
     %0:_(p0) = COPY $x0
     %1:_(p0) = COPY $x1
     %2:_(s64) = G_CONSTANT i64 96
-    G_MEMMOVE %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    G_MEMMOVE %0(p0), %1(p0), %2(s64), 1, 0 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
     RET_ReallyLR
 
 ...
@@ -156,7 +156,7 @@
     %0:_(p0) = COPY $x0
     %1:_(p0) = COPY $x1
     %2:_(s64) = G_CONSTANT i64 52
-    G_MEMMOVE %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    G_MEMMOVE %0(p0), %1(p0), %2(s64), 1, 0 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
     RET_ReallyLR
 
 ...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir
--- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir
@@ -69,14 +69,14 @@
     ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
     ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
     ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY2]](s32)
-    ; CHECK: G_MEMSET [[COPY]](p0), [[TRUNC]](s8), [[ZEXT]](s64), 1 :: (store 1 into %ir.dst)
+    ; CHECK: G_MEMSET [[COPY]](p0), [[TRUNC]](s8), [[ZEXT]](s64), 1, 0 :: (store 1 into %ir.dst)
     ; CHECK: RET_ReallyLR
     %0:_(p0) = COPY $x0
     %1:_(s32) = COPY $w1
     %2:_(s32) = COPY $w2
     %3:_(s8) = G_TRUNC %1(s32)
     %4:_(s64) = G_ZEXT %2(s32)
-    G_MEMSET %0(p0), %3(s8), %4(s64), 1 :: (store 1 into %ir.dst)
+    G_MEMSET %0(p0), %3(s8), %4(s64), 1, 0 :: (store 1 into %ir.dst)
     RET_ReallyLR
 
 ...
@@ -105,7 +105,7 @@
     %1:_(s32) = COPY $w1
     %3:_(s64) = G_CONSTANT i64 16
     %2:_(s8) = G_TRUNC %1(s32)
-    G_MEMSET %0(p0), %2(s8), %3(s64), 1 :: (store 1 into %ir.dst)
+    G_MEMSET %0(p0), %2(s8), %3(s64), 1, 0 :: (store 1 into %ir.dst)
     RET_ReallyLR
 
 ...
@@ -137,7 +137,7 @@
     %1:_(s32) = G_CONSTANT i32 0
     %3:_(s64) = G_CONSTANT i64 64
     %2:_(s8) = G_TRUNC %1(s32)
-    G_MEMSET %0(p0), %2(s8), %3(s64), 1 :: (store 1 into %ir.dst)
+    G_MEMSET %0(p0), %2(s8), %3(s64), 1, 0 :: (store 1 into %ir.dst)
     RET_ReallyLR
 
 ...
@@ -162,7 +162,7 @@
     %0:_(p0) = COPY $x0
     %1:_(s8) = G_CONSTANT i8 64
     %2:_(s64) = G_CONSTANT i64 16
-    G_MEMSET %0(p0), %1(s8), %2(s64), 1 :: (store 1 into %ir.dst)
+    G_MEMSET %0(p0), %1(s8), %2(s64), 1, 0 :: (store 1 into %ir.dst)
     RET_ReallyLR
 
 ...
@@ -198,7 +198,7 @@
     %1:_(s32) = COPY $w1
     %3:_(s64) = G_CONSTANT i64 60
     %2:_(s8) = G_TRUNC %1(s32)
-    G_MEMSET %0(p0), %2(s8), %3(s64), 1 :: (store 1 into %ir.dst)
+    G_MEMSET %0(p0), %2(s8), %3(s64), 1, 0 :: (store 1 into %ir.dst)
     RET_ReallyLR
 
 ...
@@ -226,7 +226,7 @@
     %0:_(p0) = COPY $x0
     %1:_(s8) = G_CONSTANT i8 64
     %2:_(s64) = G_CONSTANT i64 18
-    G_MEMSET %0(p0), %1(s8), %2(s64), 1 :: (store 1 into %ir.dst)
+    G_MEMSET %0(p0), %1(s8), %2(s64), 1, 0 :: (store 1 into %ir.dst)
     RET_ReallyLR
 
 ...
@@ -254,5 +254,5 @@
     %1:_(s32) = COPY $w1
     %3:_(s64) = G_CONSTANT i64 16
     %2:_(s8) = G_TRUNC %1(s32)
-    G_MEMSET %0(p0), %2(s8), %3(s64), 1 :: (store 1 into %ir.dst)
+    G_MEMSET %0(p0), %2(s8), %3(s64), 1, 0 :: (store 1 into %ir.dst)
     RET_ReallyLR
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-small-memcpy.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-small-memcpy.mir
--- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-small-memcpy.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-small-memcpy.mir
@@ -5,6 +5,7 @@
   target triple = "arm64-apple-darwin"
 
   declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) #1
+  declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) #1
 
   define void @test_small_memcpy(i32* nocapture %dst, i32* nocapture readonly %src) {
   entry:
@@ -22,6 +23,14 @@
     ret void
   }
 
+  define void @test_large_memcpy_inline(i32* nocapture %dst, i32* nocapture readonly %src) {
+  entry:
+    %0 = bitcast i32* %dst to i8*
+    %1 = bitcast i32* %src to i8*
+    tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 36, i1 false)
+    ret void
+  }
+
   attributes #1 = { argmemonly nounwind }
 
 ...
@@ -45,15 +54,15 @@
     ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4)
     ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4)
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
-    ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load 16 from %ir.1 + 16, align 4)
-    ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store 16 into %ir.0 + 16, align 4)
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
+    ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from %ir.1 + 16, align 4)
+    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p0) :: (store 16 into %ir.0 + 16, align 4)
     ; CHECK: RET_ReallyLR
     %0:_(p0) = COPY $x0
     %1:_(p0) = COPY $x1
     %2:_(s64) = G_CONSTANT i64 32
-    G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    G_MEMCPY %0(p0), %1(p0), %2(s64), 1, 0 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
     RET_ReallyLR
 
 ...
@@ -75,12 +84,49 @@
     ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
     ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-    ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[C]](s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[C]](s64), 1, 0 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    ; CHECK: RET_ReallyLR
+    %0:_(p0) = COPY $x0
+    %1:_(p0) = COPY $x1
+    %2:_(s64) = G_CONSTANT i64 36
+    G_MEMCPY %0(p0), %1(p0), %2(s64), 1, 0 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    RET_ReallyLR
+
+...
+---
+name:            test_large_memcpy_inline
+alignment:       4
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: _ }
+  - { id: 1, class: _ }
+  - { id: 2, class: _ }
+machineFunctionInfo: {}
+body:             |
+  bb.1.entry:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: test_large_memcpy_inline
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4)
+    ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4)
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
+    ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from %ir.1 + 16, align 4)
+    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+    ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p0) :: (store 16 into %ir.0 + 16, align 4)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
+    ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64)
+    ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 4 from %ir.1 + 32)
+    ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+    ; CHECK: G_STORE [[LOAD2]](s32), [[PTR_ADD3]](p0) :: (store 4 into %ir.0 + 32)
     ; CHECK: RET_ReallyLR
     %0:_(p0) = COPY $x0
     %1:_(p0) = COPY $x1
     %2:_(s64) = G_CONSTANT i64 36
-    G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+    G_MEMCPY %0(p0), %1(p0), %2(s64), 1, 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
     RET_ReallyLR
 
 ...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bzero.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bzero.mir
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bzero.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bzero.mir
@@ -22,7 +22,7 @@
     ; CHECK: RET_ReallyLR
     %ptr:_(p0) = COPY $x0
     %width:_(s64) = COPY $x1
-    G_BZERO %ptr(p0), %width(s64), 0 :: (store 4)
+    G_BZERO %ptr(p0), %width(s64), 0, 0 :: (store 4)
     RET_ReallyLR
 
 ...
@@ -41,5 +41,5 @@
     ; CHECK: TCRETURNdi &bzero, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $x0, implicit $x1
     %ptr:_(p0) = COPY $x0
     %width:_(s64) = COPY $x1
-    G_BZERO %ptr(p0), %width(s64), 1 :: (store 4)
+    G_BZERO %ptr(p0), %width(s64), 1, 0 :: (store 4)
     RET_ReallyLR
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-et-al.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-et-al.mir
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-et-al.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-et-al.mir
@@ -25,7 +25,7 @@
     %1:_(p0) = COPY $x1
     %2:_(s32) = COPY $w2
     %3:_(s64) = G_ZEXT %2(s32)
-    G_MEMCPY %0(p0), %1(p0), %3(s64), 0 :: (store unknown-size), (load unknown-size)
+    G_MEMCPY %0(p0), %1(p0), %3(s64), 0, 0 :: (store unknown-size), (load unknown-size)
     RET_ReallyLR
 
 ...
@@ -50,7 +50,7 @@
     %1:_(p0) = COPY $x1
     %2:_(s32) = COPY $w2
     %3:_(s64) = G_ZEXT %2(s32)
-    G_MEMCPY %0(p0), %1(p0), %3(s64), 1 :: (store unknown-size), (load unknown-size)
+    G_MEMCPY %0(p0), %1(p0), %3(s64), 1, 0 :: (store unknown-size), (load unknown-size)
     RET_ReallyLR
 
 ...
@@ -78,7 +78,7 @@
     %1:_(p0) = COPY $x1
     %2:_(s32) = COPY $w2
     %3:_(s64) = G_ZEXT %2(s32)
-    G_MEMMOVE %0(p0), %1(p0), %3(s64), 0 :: (store unknown-size), (load unknown-size)
+    G_MEMMOVE %0(p0), %1(p0), %3(s64), 0, 0 :: (store unknown-size), (load unknown-size)
     RET_ReallyLR
 
 ...
@@ -108,7 +108,7 @@
     %2:_(s32) = COPY $w2
     %3:_(s8) = G_TRUNC %1(s32)
     %4:_(s64) = G_ZEXT %2(s32)
-    G_MEMSET %0(p0), %3(s8), %4(s64), 0 :: (store unknown-size)
+    G_MEMSET %0(p0), %3(s8), %4(s64), 0, 0 :: (store unknown-size)
     RET_ReallyLR
 
 ...
@@ -137,7 +137,7 @@
     %1:_(p0) = COPY $x1
     %2:_(s32) = COPY $w2
     %3:_(s64) = G_ZEXT %2(s32)
-    G_MEMCPY %0(p0), %1(p0), %3(s64), 1 :: (store unknown-size), (load unknown-size)
+    G_MEMCPY %0(p0), %1(p0), %3(s64), 1, 0 :: (store unknown-size), (load unknown-size)
     $x0 = COPY %3
     RET_ReallyLR implicit $x0
 
@@ -166,5 +166,5 @@
     %2:_(s32) = COPY $w2
     %4:_(s1) = G_CONSTANT i1 false
     %3:_(s64) = G_ZEXT %2(s32)
-    G_MEMCPY %0(p0), %1(p0), %3(s64), 1 :: (store unknown-size), (load unknown-size)
+    G_MEMCPY %0(p0), %1(p0), %3(s64), 1, 0 :: (store unknown-size), (load unknown-size)
     TCRETURNdi &memset, 0, csr_aarch64_aapcs, implicit $sp
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-with-debug-info.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-with-debug-info.mir
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-with-debug-info.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-with-debug-info.mir
@@ -51,7 +51,7 @@
     %1:_(p0) = COPY $x1, debug-location !DILocation(line: 3, column: 1, scope: !11)
     %2:_(s32) = COPY $w2, debug-location !DILocation(line: 4, column: 1, scope: !11)
     %3:_(s64) = G_ZEXT %2(s32), debug-location !DILocation(line: 5, column: 1, scope: !11)
-    G_MEMCPY %0(p0), %1(p0), %3(s64), 1, debug-location !DILocation(line: 6, column: 1, scope: !11) :: (store unknown-size), (load unknown-size)
+    G_MEMCPY %0(p0), %1(p0), %3(s64), 1, 0, debug-location !DILocation(line: 6, column: 1, scope: !11) :: (store unknown-size), (load unknown-size)
     DBG_VALUE 0, $noreg, !13, !DIExpression(), debug-location !DILocation(line: 6, column: 1, scope: !11)
     DBG_VALUE 0, $noreg, !13, !DIExpression(), debug-location !DILocation(line: 6, column: 1, scope: !11)
     RET_ReallyLR debug-location !DILocation(line: 7, column: 1, scope: !11)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memlib-debug-loc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memlib-debug-loc.mir
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memlib-debug-loc.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memlib-debug-loc.mir
@@ -54,7 +54,7 @@
     %2:_(s32) = COPY $w2
     %3:_(s64) = G_ZEXT %2(s32), debug-location !11
     %4:_(s8) = G_TRUNC %1(s32), debug-location !11
-    G_MEMSET %0(p0), %4(s8), %3(s64), 0, debug-location !11 :: (store 1 into %ir.ptr)
+    G_MEMSET %0(p0), %4(s8), %3(s64), 0, 0, debug-location !11 :: (store 1 into %ir.ptr)
     RET_ReallyLR debug-location !12
 
 ...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-bzero.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-bzero.mir
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-bzero.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-bzero.mir
@@ -24,19 +24,19 @@
     ; DARWIN: liveins: $x0, $x1
     ; DARWIN: %ptr:_(p0) = COPY $x0
     ; DARWIN: %width:_(s64) = COPY $x1
-    ; DARWIN: G_BZERO %ptr(p0), %width(s64), 0 :: (store 4)
+    ; DARWIN: G_BZERO %ptr(p0), %width(s64), 0, 0 :: (store 4)
     ; DARWIN: RET_ReallyLR
     ; UNKNOWN-LABEL: name: bzero_unknown_width
     ; UNKNOWN: liveins: $x0, $x1
     ; UNKNOWN: %ptr:_(p0) = COPY $x0
     ; UNKNOWN: %zero:_(s8) = G_CONSTANT i8 0
     ; UNKNOWN: %width:_(s64) = COPY $x1
-    ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store 4)
+    ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0, 0 :: (store 4)
     ; UNKNOWN: RET_ReallyLR
     %ptr:_(p0) = COPY $x0
     %zero:_(s8) = G_CONSTANT i8 0
     %width:_(s64) = COPY $x1
-    G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store 4)
+    G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0, 0 :: (store 4)
     RET_ReallyLR
 ...
 ---
@@ -49,19 +49,19 @@
     ; DARWIN: liveins: $x0, $x1
     ; DARWIN: %ptr:_(p0) = COPY $x0
     ; DARWIN: %width:_(s64) = COPY $x1
-    ; DARWIN: G_BZERO %ptr(p0), %width(s64), 1 :: (store 4)
+    ; DARWIN: G_BZERO %ptr(p0), %width(s64), 1, 0 :: (store 4)
     ; DARWIN: RET_ReallyLR
     ; UNKNOWN-LABEL: name: bzero_tail_unknown_width
     ; UNKNOWN: liveins: $x0, $x1
     ; UNKNOWN: %ptr:_(p0) = COPY $x0
     ; UNKNOWN: %zero:_(s8) = G_CONSTANT i8 0
     ; UNKNOWN: %width:_(s64) = COPY $x1
-    ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 1 :: (store 4)
+    ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 1, 0 :: (store 4)
     ; UNKNOWN: RET_ReallyLR
     %ptr:_(p0) = COPY $x0
     %zero:_(s8) = G_CONSTANT i8 0
     %width:_(s64) = COPY $x1
-    G_MEMSET %ptr(p0), %zero(s8), %width(s64), 1 :: (store 4)
+    G_MEMSET %ptr(p0), %zero(s8), %width(s64), 1, 0 :: (store 4)
     RET_ReallyLR
 ...
 ---
@@ -76,19 +76,19 @@
     ; DARWIN: liveins: $x0, $x1
     ; DARWIN: %ptr:_(p0) = COPY $x0
     ; DARWIN: %width:_(s64) = G_CONSTANT i64 1024
-    ; DARWIN: G_BZERO %ptr(p0), %width(s64), 0 :: (store 4)
+    ; DARWIN: G_BZERO %ptr(p0), %width(s64), 0, 0 :: (store 4)
     ; DARWIN: RET_ReallyLR
     ; UNKNOWN-LABEL: name: bzero_constant_width
     ; UNKNOWN: liveins: $x0, $x1
     ; UNKNOWN: %ptr:_(p0) = COPY $x0
     ; UNKNOWN: %zero:_(s8) = G_CONSTANT i8 0
     ; UNKNOWN: %width:_(s64) = G_CONSTANT i64 1024
-    ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store 4)
+    ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0, 0 :: (store 4)
     ; UNKNOWN: RET_ReallyLR
     %ptr:_(p0) = COPY $x0
     %zero:_(s8) = G_CONSTANT i8 0
     %width:_(s64) = G_CONSTANT i64 1024
-    G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store 4)
+    G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0, 0 :: (store 4)
     RET_ReallyLR
 ...
 ---
@@ -103,19 +103,19 @@
     ; DARWIN: liveins: $x0, $x1
     ; DARWIN: %ptr:_(p0) = COPY $x0
     ; DARWIN: %width:_(s64) = G_CONSTANT i64 256
-    ; DARWIN: G_BZERO %ptr(p0), %width(s64), 0 :: (store 4)
+    ; DARWIN: G_BZERO %ptr(p0), %width(s64), 0, 0 :: (store 4)
     ; DARWIN: RET_ReallyLR
     ; UNKNOWN-LABEL: name: bzero_constant_width_minsize
     ; UNKNOWN: liveins: $x0, $x1
     ; UNKNOWN: %ptr:_(p0) = COPY $x0
     ; UNKNOWN: %zero:_(s8) = G_CONSTANT i8 0
     ; UNKNOWN: %width:_(s64) = G_CONSTANT i64 256
-    ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store 4)
+    ; UNKNOWN: G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0, 0 :: (store 4)
     ; UNKNOWN: RET_ReallyLR
     %ptr:_(p0) = COPY $x0
     %zero:_(s8) = G_CONSTANT i8 0
     %width:_(s64) = G_CONSTANT i64 256
-    G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0 :: (store 4)
+    G_MEMSET %ptr(p0), %zero(s8), %width(s64), 0, 0 :: (store 4)
     RET_ReallyLR
 ...
 ---
@@ -131,17 +131,17 @@
     ; DARWIN: %ptr:_(p0) = COPY $x0
     ; DARWIN: %not_zero:_(s8) = G_CONSTANT i8 1
     ; DARWIN: %width:_(s64) = G_CONSTANT i64 256
-    ; DARWIN: G_MEMSET %ptr(p0), %not_zero(s8), %width(s64), 0 :: (store 4)
+    ; DARWIN: G_MEMSET %ptr(p0), %not_zero(s8), %width(s64), 0, 0 :: (store 4)
     ; DARWIN: RET_ReallyLR
     ; UNKNOWN-LABEL: name: not_zero
     ; UNKNOWN: liveins: $x0, $x1
     ; UNKNOWN: %ptr:_(p0) = COPY $x0
     ; UNKNOWN: %not_zero:_(s8) = G_CONSTANT i8 1
     ; UNKNOWN: %width:_(s64) = G_CONSTANT i64 256
-    ; UNKNOWN: G_MEMSET %ptr(p0), %not_zero(s8), %width(s64), 0 :: (store 4)
+    ; UNKNOWN: G_MEMSET %ptr(p0), %not_zero(s8), %width(s64), 0, 0 :: (store 4)
     ; UNKNOWN: RET_ReallyLR
     %ptr:_(p0) = COPY $x0
     %not_zero:_(s8) = G_CONSTANT i8 1
     %width:_(s64) = G_CONSTANT i64 256
-    G_MEMSET %ptr(p0), %not_zero(s8), %width(s64), 0 :: (store 4)
+    G_MEMSET %ptr(p0), %not_zero(s8), %width(s64), 0, 0 :: (store 4)
     RET_ReallyLR
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@@ -3918,7 +3918,7 @@
   ; CHECK:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
   ; CHECK:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C6]](s32)
   ; CHECK:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-  ; CHECK:   G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store 8 into stack, align 4, addrspace 5), (dereferenceable load 8 from %ir.val, align 4, addrspace 5)
+  ; CHECK:   G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0, 0 :: (dereferenceable store 8 into stack, align 4, addrspace 5), (dereferenceable load 8 from %ir.val, align 4, addrspace 5)
   ; CHECK:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
   ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
   ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
@@ -3973,11 +3973,11 @@
   ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
   ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C1]](s32)
   ; CHECK:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-  ; CHECK:   G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C2]](s32), 0 :: (dereferenceable store 12 into stack, align 4, addrspace 5), (dereferenceable load 12 from %ir.incoming0, align 4, addrspace 5)
+  ; CHECK:   G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C2]](s32), 0, 0 :: (dereferenceable store 12 into stack, align 4, addrspace 5), (dereferenceable load 12 from %ir.incoming0, align 4, addrspace 5)
   ; CHECK:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
   ; CHECK:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C3]](s32)
   ; CHECK:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
-  ; CHECK:   G_MEMCPY [[PTR_ADD1]](p5), [[COPY9]](p5), [[C4]](s32), 0 :: (dereferenceable store 1 into stack + 32, align 32, addrspace 5), (dereferenceable load 1 from %ir.incoming1, align 32, addrspace 5)
+  ; CHECK:   G_MEMCPY [[PTR_ADD1]](p5), [[COPY9]](p5), [[C4]](s32), 0, 0 :: (dereferenceable store 1 into stack + 32, align 32, addrspace 5), (dereferenceable load 1 from %ir.incoming1, align 32, addrspace 5)
   ; CHECK:   $vgpr0 = COPY [[C]](s32)
   ; CHECK:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
@@ -4029,7 +4029,7 @@
   ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
   ; CHECK:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY18]], [[C]](s32)
   ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
-  ; CHECK:   G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C1]](s32), 0 :: (dereferenceable store 32 into stack, align 4, addrspace 5), (dereferenceable load 32 from %ir.incoming_high_align, align 256, addrspace 5)
+  ; CHECK:   G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C1]](s32), 0, 0 :: (dereferenceable store 32 into stack, align 4, addrspace 5), (dereferenceable load 32 from %ir.incoming_high_align, align 256, addrspace 5)
   ; CHECK:   [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
   ; CHECK:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll
@@ -14,7 +14,7 @@
   ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256
   ; CHECK:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
-  ; CHECK:   G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 3)
+  ; CHECK:   G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0, 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 3)
   ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
   ; CHECK:   S_SETPC_B64_return [[COPY4]]
   call void @llvm.memcpy.p1i8.p3i8.i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i64 256, i1 false)
@@ -31,7 +31,7 @@
   ; CHECK:   [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2
   ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
-  ; CHECK:   G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 3)
+  ; CHECK:   G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0, 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 3)
   ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
   ; CHECK:   S_SETPC_B64_return [[COPY4]]
   call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 256, i1 false)
@@ -49,7 +49,7 @@
   ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; CHECK:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256
   ; CHECK:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
-  ; CHECK:   G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 3)
+  ; CHECK:   G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0, 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 3)
   ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
   ; CHECK:   S_SETPC_B64_return [[COPY4]]
   call void @llvm.memcpy.p1i8.p3i8.i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i16 256, i1 false)
@@ -67,7 +67,7 @@
   ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256
   ; CHECK:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
-  ; CHECK:   G_MEMCPY [[COPY]](p3), [[MV]](p1), [[TRUNC]](s32), 0 :: (store 1 into %ir.dst, addrspace 3), (load 1 from %ir.src, addrspace 1)
+  ; CHECK:   G_MEMCPY [[COPY]](p3), [[MV]](p1), [[TRUNC]](s32), 0, 0 :: (store 1 into %ir.dst, addrspace 3), (load 1 from %ir.src, addrspace 1)
   ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
   ; CHECK:   S_SETPC_B64_return [[COPY4]]
   call void @llvm.memcpy.p3i8.p1i8.i64(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i64 256, i1 false)
@@ -84,7 +84,7 @@
   ; CHECK:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
   ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
-  ; CHECK:   G_MEMCPY [[COPY]](p3), [[MV]](p1), [[C]](s32), 0 :: (store 1 into %ir.dst, addrspace 3), (load 1 from %ir.src, addrspace 1)
+  ; CHECK:   G_MEMCPY [[COPY]](p3), [[MV]](p1), [[C]](s32), 0, 0 :: (store 1 into %ir.dst, addrspace 3), (load 1 from %ir.src, addrspace 1)
   ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
   ; CHECK:   S_SETPC_B64_return [[COPY4]]
   call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 256, i1 false)
@@ -102,7 +102,7 @@
   ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; CHECK:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256
   ; CHECK:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
-  ; CHECK:   G_MEMCPY [[COPY]](p3), [[MV]](p1), [[ZEXT]](s32), 0 :: (store 1 into %ir.dst, addrspace 3), (load 1 from %ir.src, addrspace 1)
+  ; CHECK:   G_MEMCPY [[COPY]](p3), [[MV]](p1), [[ZEXT]](s32), 0, 0 :: (store 1 into %ir.dst, addrspace 3), (load 1 from %ir.src, addrspace 1)
   ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
   ; CHECK:   S_SETPC_B64_return [[COPY4]]
   call void @llvm.memcpy.p3i8.p1i8.i16(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i16 256, i1 false)
@@ -120,7 +120,7 @@
   ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256
   ; CHECK:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
-  ; CHECK:   G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 3)
+  ; CHECK:   G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0, 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 3)
   ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
   ; CHECK:   S_SETPC_B64_return [[COPY4]]
   call void @llvm.memmove.p1i8.p3i8.i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i64 256, i1 false)
@@ -137,7 +137,7 @@
   ; CHECK:   [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2
   ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
-  ; CHECK:   G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 3)
+  ; CHECK:   G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0, 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 3)
   ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
   ; CHECK:   S_SETPC_B64_return [[COPY4]]
   call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 256, i1 false)
@@ -155,7 +155,7 @@
   ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; CHECK:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256
   ; CHECK:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
-  ; CHECK:   G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 3)
+  ; CHECK:   G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0, 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 3)
   ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
   ; CHECK:   S_SETPC_B64_return [[COPY4]]
   call void @llvm.memmove.p1i8.p3i8.i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i16 256, i1 false)
@@ -173,7 +173,7 @@
   ; CHECK:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32)
   ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256
-  ; CHECK:   G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[C]](s64), 0 :: (store 1 into %ir.dst, addrspace 1)
+  ; CHECK:   G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[C]](s64), 0, 0 :: (store 1 into %ir.dst, addrspace 1)
   ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
   ; CHECK:   S_SETPC_B64_return [[COPY4]]
   call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 256, i1 false)
@@ -192,7 +192,7 @@
   ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
   ; CHECK:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32)
-  ; CHECK:   G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store 1 into %ir.dst, addrspace 1)
+  ; CHECK:   G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0, 0 :: (store 1 into %ir.dst, addrspace 1)
   ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
   ; CHECK:   S_SETPC_B64_return [[COPY4]]
   call void @llvm.memset.p1i8.i32(i8 addrspace(1)* %dst, i8 %val, i32 256, i1 false)
@@ -211,7 +211,7 @@
   ; CHECK:   [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; CHECK:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256
   ; CHECK:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s16)
-  ; CHECK:   G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store 1 into %ir.dst, addrspace 1)
+  ; CHECK:   G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0, 0 :: (store 1 into %ir.dst, addrspace 1)
   ; CHECK:   [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
   ; CHECK:   S_SETPC_B64_return [[COPY4]]
   call void @llvm.memset.p1i8.i16(i8 addrspace(1)* %dst, i8 %val, i16 256, i1 false)
@@ -228,7 +228,7 @@
   ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256
   ; CHECK:   [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
-  ; CHECK:   G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[TRUNC1]](s32), 0 :: (store 1 into %ir.dst, addrspace 3)
+  ; CHECK:   G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[TRUNC1]](s32), 0, 0 :: (store 1 into %ir.dst, addrspace 3)
   ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
   ; CHECK:   S_SETPC_B64_return [[COPY3]]
   call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %dst, i8 %val, i64 256, i1 false)
@@ -244,7 +244,7 @@
   ; CHECK:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
   ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
-  ; CHECK:   G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[C]](s32), 0 :: (store 1 into %ir.dst, addrspace 3)
+  ; CHECK:   G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[C]](s32), 0, 0 :: (store 1 into %ir.dst, addrspace 3)
   ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
   ; CHECK:   S_SETPC_B64_return [[COPY3]]
   call void @llvm.memset.p3i8.i32(i8 addrspace(3)* %dst, i8 %val, i32 256, i1 false)
@@ -261,7 +261,7 @@
   ; CHECK:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
   ; CHECK:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256
   ; CHECK:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
-  ; CHECK:   G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[ZEXT]](s32), 0 :: (store 1 into %ir.dst, addrspace 3)
+  ; CHECK:   G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[ZEXT]](s32), 0, 0 :: (store 1 into %ir.dst, addrspace 3)
   ; CHECK:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
   ; CHECK:   S_SETPC_B64_return [[COPY3]]
   call void @llvm.memset.p3i8.i16(i8 addrspace(3)* %dst, i8 %val, i16 256, i1 false)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
@@ -346,7 +346,7 @@
   ; GCN:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
   ; GCN:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C]](s32)
   ; GCN:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; GCN:   G_MEMCPY [[PTR_ADD]](p5), [[COPY9]](p5), [[C1]](s32), 0 :: (dereferenceable store 4 into stack, addrspace 5), (dereferenceable load 4 from %ir.b.byval, addrspace 5)
+  ; GCN:   G_MEMCPY [[PTR_ADD]](p5), [[COPY9]](p5), [[C1]](s32), 0, 0 :: (dereferenceable store 4 into stack, addrspace 5), (dereferenceable load 4 from %ir.b.byval, addrspace 5)
   ; GCN:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
   ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
   ; GCN:   $sgpr4_sgpr5 = COPY [[COPY12]](p4)
@@ -433,7 +433,7 @@
   ; GCN:   $vgpr0 = COPY [[COPY8]](s32)
   ; GCN:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
   ; GCN:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; GCN:   G_MEMCPY [[FRAME_INDEX2]](p5), [[INTTOPTR]](p5), [[C1]](s32), 0 :: (dereferenceable store 4 into %fixed-stack.0, align 16, addrspace 5), (dereferenceable load 4 from `i32 addrspace(5)* inttoptr (i32 16 to i32 addrspace(5)*)`, align 16, addrspace 5)
+  ; GCN:   G_MEMCPY [[FRAME_INDEX2]](p5), [[INTTOPTR]](p5), [[C1]](s32), 0, 0 :: (dereferenceable store 4 into %fixed-stack.0, align 16, addrspace 5), (dereferenceable load 4 from `i32 addrspace(5)* inttoptr (i32 16 to i32 addrspace(5)*)`, align 16, addrspace 5)
   ; GCN:   [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
   ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>)
   ; GCN:   $sgpr4_sgpr5 = COPY [[COPY40]](p4)
@@ -1291,10 +1291,10 @@
   ; GCN:   $vgpr0 = COPY [[COPY8]](s32)
   ; GCN:   [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
   ; GCN:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-  ; GCN:   G_MEMCPY [[FRAME_INDEX36]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store 12 into %fixed-stack.1, align 16, addrspace 5), (dereferenceable load 12 from %ir.alloca0, align 16, addrspace 5)
+  ; GCN:   G_MEMCPY [[FRAME_INDEX36]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0, 0 :: (dereferenceable store 12 into %fixed-stack.1, align 16, addrspace 5), (dereferenceable load 12 from %ir.alloca0, align 16, addrspace 5)
   ; GCN:   [[FRAME_INDEX37:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
   ; GCN:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-  ; GCN:   G_MEMCPY [[FRAME_INDEX37]](p5), [[FRAME_INDEX35]](p5), [[C5]](s32), 0 :: (dereferenceable store 16 into %fixed-stack.0, addrspace 5), (dereferenceable load 16 from %ir.alloca1, align 8, addrspace 5)
+  ; GCN:   G_MEMCPY [[FRAME_INDEX37]](p5), [[FRAME_INDEX35]](p5), [[C5]](s32), 0, 0 :: (dereferenceable store 16 into %fixed-stack.0, addrspace 5), (dereferenceable load 16 from %ir.alloca1, align 8, addrspace 5)
   ; GCN:   [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
   ; GCN:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>)
   ; GCN:   $sgpr4_sgpr5 = COPY [[COPY40]](p4)
@@ -1451,7 +1451,7 @@
   ; GCN:   [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN:   [[FRAME_INDEX35:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2
   ; GCN:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
-  ; GCN:   G_MEMCPY [[FRAME_INDEX35]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store 12 into %fixed-stack.2, align 16, addrspace 5), (dereferenceable load 12 from %ir.alloca, align 16, addrspace 5)
+  ; GCN:   G_MEMCPY [[FRAME_INDEX35]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0, 0 :: (dereferenceable store 12 into %fixed-stack.2, align 16, addrspace 5), (dereferenceable load 12 from %ir.alloca, align 16, addrspace 5)
   ; GCN:   $vgpr0 = COPY [[C1]](s32)
   ; GCN:   $vgpr1 = COPY [[C1]](s32)
   ; GCN:   $vgpr2 = COPY [[C1]](s32)
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/call.ll b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/call.ll
--- a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/call.ll
+++ b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/call.ll
@@ -153,7 +153,7 @@
   ; MIPS32:   [[COPY:%[0-9]+]]:_(p0) = COPY $a0
   ; MIPS32:   [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
   ; MIPS32:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
-  ; MIPS32:   G_MEMCPY [[COPY1]](p0), [[COPY]](p0), [[COPY2]](s32), 0 :: (store 1 into %ir.dest), (load 1 from %ir.src)
+  ; MIPS32:   G_MEMCPY [[COPY1]](p0), [[COPY]](p0), [[COPY2]](s32), 0, 0 :: (store 1 into %ir.dest), (load 1 from %ir.src)
   ; MIPS32:   RetRA
   ; MIPS32_PIC-LABEL: name: call_symbol
   ; MIPS32_PIC: bb.1.entry:
@@ -161,7 +161,7 @@
   ; MIPS32_PIC:   [[COPY:%[0-9]+]]:_(p0) = COPY $a0
   ; MIPS32_PIC:   [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
   ; MIPS32_PIC:   [[COPY2:%[0-9]+]]:_(s32) = COPY $a2
-  ; MIPS32_PIC:   G_MEMCPY [[COPY1]](p0), [[COPY]](p0), [[COPY2]](s32), 0 :: (store 1 into %ir.dest), (load 1 from %ir.src)
+  ; MIPS32_PIC:   G_MEMCPY [[COPY1]](p0), [[COPY]](p0), [[COPY2]](s32), 0, 0 :: (store 1 into %ir.dest), (load 1 from %ir.src)
   ; MIPS32_PIC:   RetRA
 entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 %length, i1 false)
diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/dyn_stackalloc.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/dyn_stackalloc.mir
--- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/dyn_stackalloc.mir
+++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/dyn_stackalloc.mir
@@ -73,7 +73,7 @@
     %8:_(s32) = G_CONSTANT i32 -8
     %9:_(s32) = G_AND %7, %8
     %10:_(p0) = G_DYN_STACKALLOC %9(s32), 0
-    G_MEMSET %10(p0), %0(s8), %1(s32), 0 :: (store 1 into %ir.vla)
+    G_MEMSET %10(p0), %0(s8), %1(s32), 0, 0 :: (store 1 into %ir.vla)
     %11:_(p0) = G_PTR_ADD %10, %1(s32)
     %12:_(p0) = COPY %11(p0)
     G_STORE %13(s8), %12(p0) :: (store 1 into %ir.arrayidx)
diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll b/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll
--- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll
@@ -18,7 +18,7 @@
   ; ALL:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval
   ; ALL:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.f
   ; ALL:   G_STORE [[COPY]](s32), [[FRAME_INDEX1]](p0) :: (store 4 into %ir.coerce.dive2)
-  ; ALL:   G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+  ; ALL:   G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0, 0 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
   ; ALL:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 4 from %ir.coerce.dive13)
   ; ALL:   $xmm0 = COPY [[LOAD]](s32)
   ; ALL:   RET 0, implicit $xmm0
@@ -46,7 +46,7 @@
   ; ALL:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval
   ; ALL:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.d
   ; ALL:   G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store 8 into %ir.coerce.dive2)
-  ; ALL:   G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store 1 into %ir.0, align 8), (load 1 from %ir.1, align 8)
+  ; ALL:   G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0, 0 :: (store 1 into %ir.0, align 8), (load 1 from %ir.1, align 8)
   ; ALL:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 8 from %ir.coerce.dive13)
   ; ALL:   $xmm0 = COPY [[LOAD]](s64)
   ; ALL:   RET 0, implicit $xmm0
@@ -76,7 +76,7 @@
   ; ALL:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
   ; ALL:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64)
   ; ALL:   G_STORE [[COPY1]](s64), [[PTR_ADD]](p0) :: (store 8 into %ir.2)
-  ; ALL:   G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store 1 into %ir.3, align 8), (load 1 from %ir.4, align 8)
+  ; ALL:   G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0, 0 :: (store 1 into %ir.3, align 8), (load 1 from %ir.4, align 8)
   ; ALL:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 8 from %ir.5)
   ; ALL:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64)
   ; ALL:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load 8 from %ir.5 + 8)
@@ -108,7 +108,7 @@
   ; ALL:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval
   ; ALL:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i
   ; ALL:   G_STORE [[COPY]](s32), [[FRAME_INDEX1]](p0) :: (store 4 into %ir.coerce.dive2)
-  ; ALL:   G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
+  ; ALL:   G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0, 0 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4)
   ; ALL:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 4 from %ir.coerce.dive13)
   ; ALL:   $eax = COPY [[LOAD]](s32)
   ; ALL:   RET 0, implicit $eax
@@ -134,7 +134,7 @@
   ; ALL:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval
   ; ALL:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i
   ; ALL:   G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store 8 into %ir.0, align 4)
-  ; ALL:   G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store 1 into %ir.1, align 4), (load 1 from %ir.2, align 4)
+  ; ALL:   G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0, 0 :: (store 1 into %ir.1, align 4), (load 1 from %ir.2, align 4)
   ; ALL:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 8 from %ir.3, align 4)
   ; ALL:   $rax = COPY [[LOAD]](s64)
   ; ALL:   RET 0, implicit $rax
@@ -166,9 +166,9 @@
   ; ALL:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
   ; ALL:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX2]], [[C1]](s64)
   ; ALL:   G_STORE [[COPY1]](s32), [[PTR_ADD]](p0) :: (store 4 into %ir.1)
-  ; ALL:   G_MEMCPY [[FRAME_INDEX1]](p0), [[FRAME_INDEX2]](p0), [[C]](s64), 0 :: (store 1 into %ir.2, align 4), (load 1 from %ir.3, align 4)
-  ; ALL:   G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store 1 into %ir.4, align 4), (load 1 from %ir.5, align 4)
-  ; ALL:   G_MEMCPY [[FRAME_INDEX3]](p0), [[FRAME_INDEX]](p0), [[C]](s64), 0 :: (store 1 into %ir.6, align 8), (load 1 from %ir.7, align 4)
+  ; ALL:   G_MEMCPY [[FRAME_INDEX1]](p0), [[FRAME_INDEX2]](p0), [[C]](s64), 0, 0 :: (store 1 into %ir.2, align 4), (load 1 from %ir.3, align 4)
+  ; ALL:   G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0, 0 :: (store 1 into %ir.4, align 4), (load 1 from %ir.5, align 4)
+  ; ALL:   G_MEMCPY [[FRAME_INDEX3]](p0), [[FRAME_INDEX]](p0), [[C]](s64), 0, 0 :: (store 1 into %ir.6, align 8), (load 1 from %ir.7, align 4)
   ; ALL:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX3]](p0) :: (dereferenceable load 8 from %ir.tmp)
   ; ALL:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s64)
   ; ALL:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load 4 from %ir.tmp + 8, align 8)
@@ -210,7 +210,7 @@
   ; ALL:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
   ; ALL:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64)
   ; ALL:   G_STORE [[COPY1]](s64), [[PTR_ADD]](p0) :: (store 8 into %ir.2, align 4)
-  ; ALL:   G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store 1 into %ir.3, align 4), (load 1 from %ir.4, align 4)
+  ; ALL:   G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0, 0 :: (store 1 into %ir.3, align 4), (load 1 from %ir.4, align 4)
   ; ALL:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 8 from %ir.5, align 4)
   ; ALL:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64)
   ; ALL:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load 8 from %ir.5 + 8, align 4)
diff --git a/llvm/test/MachineVerifier/test_g_bzero.mir b/llvm/test/MachineVerifier/test_g_bzero.mir
--- a/llvm/test/MachineVerifier/test_g_bzero.mir
+++ b/llvm/test/MachineVerifier/test_g_bzero.mir
@@ -15,19 +15,24 @@
     %cst2:_(s8) = G_CONSTANT i8 7
 
     ; CHECK: *** Bad machine code: bzero must have 1 memory operand ***
-    G_BZERO %ptr, %cst2, 0
+    G_BZERO %ptr, %cst2, 0, 0
 
     ; CHECK: *** Bad machine code: bzero memory operand must be a store ***
-    G_BZERO %ptr, %cst2, 0 :: (load 4)
+    G_BZERO %ptr, %cst2, 0, 0 :: (load 4)
 
     ; CHECK: *** Bad machine code: Missing mayLoad flag ***
     ; CHECK: *** Bad machine code: bzero memory operand must be a store ***
-    G_BZERO %ptr, %cst2, 0 :: (load store 4)
+    G_BZERO %ptr, %cst2, 0, 0 :: (load store 4)
 
     ; CHECK: *** Bad machine code: inconsistent bzero address space ***
-    G_BZERO %ptr, %cst2, 0 :: (store 4, addrspace 1)
+    G_BZERO %ptr, %cst2, 0, 0 :: (store 4, addrspace 1)
 
-   ; CHECK: *** Bad machine code: bzero operand must be a pointer ***
-    G_BZERO %cst1, %cst2, 0 :: (store 4)
+    ; CHECK: *** Bad machine code: bzero operand must be a pointer ***
+    G_BZERO %cst1, %cst2, 0, 0 :: (store 4)
 
+    ; CHECK: *** Bad machine code: 'tail' flag (second to last operand) must be an immediate type ***
+    G_BZERO %ptr, %cst2, %cst1, 0 :: (store 4)
+
+    ; CHECK: *** Bad machine code: 'inlined' flag (last operand) must be an immediate type ***
+    G_BZERO %ptr, %cst2, 0, %cst1 :: (store 4)
 ...
diff --git a/llvm/test/MachineVerifier/test_g_memcpy.mir b/llvm/test/MachineVerifier/test_g_memcpy.mir
--- a/llvm/test/MachineVerifier/test_g_memcpy.mir
+++ b/llvm/test/MachineVerifier/test_g_memcpy.mir
@@ -13,38 +13,44 @@
     %0:_(p0) = G_CONSTANT i64 0
     %1:_(p0) = G_CONSTANT i64 4
     %2:_(s64) = G_CONSTANT i64 4
+    %p:_(p0) = G_IMPLICIT_DEF
 
     ; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands ***
-    G_MEMCPY %0, %1, %2, 0
+    G_MEMCPY %0, %1, %2, 0, 0
 
     ; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands ***
-    G_MEMCPY %0, %1, %2, 0 :: (load 4)
+    G_MEMCPY %0, %1, %2, 0, 0 :: (load 4)
 
     ; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands ***
-    G_MEMCPY %0, %1, %2, 0 :: (store 4)
+    G_MEMCPY %0, %1, %2, 0, 0 :: (store 4)
 
     ; CHECK: *** Bad machine code: wrong memory operand types ***
-    G_MEMCPY %0, %1, %2, 0 :: (load 4), (store 4)
+    G_MEMCPY %0, %1, %2, 0, 0 :: (load 4), (store 4)
 
     ; CHECK: *** Bad machine code: inconsistent memory operand sizes ***
-    G_MEMCPY %0, %1, %2, 0 :: (store 8), (load 4)
+    G_MEMCPY %0, %1, %2, 0, 0 :: (store 8), (load 4)
 
     ; CHECK: *** Bad machine code: inconsistent memory operand sizes ***
-    G_MEMCPY %0, %1, %2, 0 :: (store unknown-size), (load 4)
+    G_MEMCPY %0, %1, %2, 0, 0 :: (store unknown-size), (load 4)
 
     ; CHECK: *** Bad machine code: inconsistent memory operand sizes ***
-    G_MEMCPY %0, %1, %2, 0 :: (store 8), (load unknown-size)
+    G_MEMCPY %0, %1, %2, 0, 0 :: (store 8), (load unknown-size)
 
     ; CHECK: *** Bad machine code: inconsistent store address space ***
-    G_MEMCPY %0, %1, %2, 0 :: (store 4, addrspace 1), (load 4)
+    G_MEMCPY %0, %1, %2, 0, 0 :: (store 4, addrspace 1), (load 4)
 
     ; CHECK: *** Bad machine code: inconsistent load address space ***
-    G_MEMCPY %0, %1, %2, 0 :: (store 4), (load 4, addrspace 1)
+    G_MEMCPY %0, %1, %2, 0, 0 :: (store 4), (load 4, addrspace 1)
 
     ; CHECK: *** Bad machine code: memory instruction operand must be a pointer ***
-    G_MEMCPY %2, %0, %2, 0 :: (store 4), (load 4)
+    G_MEMCPY %2, %0, %2, 0, 0 :: (store 4), (load 4)
 
     ; CHECK: *** Bad machine code: memory instruction operand must be a pointer ***
-    G_MEMCPY %0, %2, %2, 0 :: (store 4), (load 4)
+    G_MEMCPY %0, %2, %2, 0, 0 :: (store 4), (load 4)
 
+    ; CHECK: *** Bad machine code: 'tail' flag (operand 3) must be an immediate type ***
+    G_MEMCPY %0, %0, %2, %0, 0 :: (store 4), (load 4)
+
+    ; CHECK: *** Bad machine code: 'inlined' flag (operand 4) must be an immediate type ***
+    G_MEMCPY %0, %0, %2, 0, %0 :: (store 4), (load 4)
 ...
diff --git a/llvm/test/MachineVerifier/test_g_memset.mir b/llvm/test/MachineVerifier/test_g_memset.mir
--- a/llvm/test/MachineVerifier/test_g_memset.mir
+++ b/llvm/test/MachineVerifier/test_g_memset.mir
@@ -15,19 +15,24 @@
     %2:_(s8) = G_CONSTANT i8 7
 
     ; CHECK: *** Bad machine code: memset must have 1 memory operand ***
-    G_MEMSET %0, %1, %2, 0
+    G_MEMSET %0, %1, %2, 0, 0
 
     ; CHECK: *** Bad machine code: memset memory operand must be a store ***
-    G_MEMSET %0, %1, %2, 0 :: (load 4)
+    G_MEMSET %0, %1, %2, 0, 0 :: (load 4)
 
     ; CHECK: *** Bad machine code: Missing mayLoad flag ***
     ; CHECK: *** Bad machine code: memset memory operand must be a store ***
-    G_MEMSET %0, %1, %2, 0 :: (load store 4)
+    G_MEMSET %0, %1, %2, 0, 0 :: (load store 4)
 
     ; CHECK: *** Bad machine code: inconsistent memset address space ***
-    G_MEMSET %0, %1, %2, 0 :: (store 4, addrspace 1)
+    G_MEMSET %0, %1, %2, 0, 0 :: (store 4, addrspace 1)
 
-   ; CHECK: *** Bad machine code: memset operand must be a pointer ***
-    G_MEMSET %1, %1, %2, 0 :: (store 4)
+    ; CHECK: *** Bad machine code: memset operand must be a pointer ***
+    G_MEMSET %1, %1, %2, 0, 0 :: (store 4)
 
+    ; CHECK: *** Bad machine code: 'tail' flag (second to last operand) must be an immediate type ***
+    G_MEMSET %0, %1, %2, %2, 0 :: (store 4)
+
+    ; CHECK: *** Bad machine code: 'inlined' flag (last operand) must be an immediate type ***
+    G_MEMSET %0, %1, %2, 0, %2 :: (store 4)
 ...