Index: llvm/trunk/include/llvm/CodeGen/GlobalISel/CallLowering.h
===================================================================
--- llvm/trunk/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ llvm/trunk/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -52,8 +52,8 @@
             ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy{}, bool IsFixed = true)
         : Regs(Regs.begin(), Regs.end()), Ty(Ty), Flags(Flags),
           IsFixed(IsFixed) {
-      assert(Regs.size() == 1 && "Can't handle multiple regs yet");
-      assert((Ty->isVoidTy() == (Regs[0] == 0)) &&
+      // FIXME: We should have just one way of saying "no register".
+      assert((Ty->isVoidTy() == (Regs.empty() || Regs[0] == 0)) &&
              "only void types should have no register");
     }
   };
@@ -139,6 +139,24 @@
   void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL,
                    const FuncInfoTy &FuncInfo) const;
 
+  /// Generate instructions for packing \p SrcRegs into one big register
+  /// corresponding to the aggregate type \p PackedTy.
+  ///
+  /// \param SrcRegs should contain one virtual register for each base type in
+  ///                \p PackedTy, as returned by computeValueLLTs.
+  ///
+  /// \return The packed register.
+  Register packRegs(ArrayRef<Register> SrcRegs, Type *PackedTy,
+                    MachineIRBuilder &MIRBuilder) const;
+
+  /// Generate instructions for unpacking \p SrcReg into the \p DstRegs
+  /// corresponding to the aggregate type \p PackedTy.
+  ///
+  /// \param DstRegs should contain one virtual register for each base type in
+  ///        \p PackedTy, as returned by computeValueLLTs.
+  void unpackRegs(ArrayRef<Register> DstRegs, Register SrcReg, Type *PackedTy,
+                  MachineIRBuilder &MIRBuilder) const;
+
   /// Invoke Handler::assignArg on each of the given \p Args and then use
   /// \p Callback to move them to the assigned locations.
   ///
@@ -182,19 +200,19 @@
     return false;
   }
 
-
   /// This hook must be implemented to lower the incoming (formal)
-  /// arguments, described by \p Args, for GlobalISel. Each argument
-  /// must end up in the related virtual register described by VRegs.
-  /// In other words, the first argument should end up in VRegs[0],
-  /// the second in VRegs[1], and so on.
+  /// arguments, described by \p VRegs, for GlobalISel. Each argument
+  /// must end up in the related virtual registers described by \p VRegs.
+  /// In other words, the first argument should end up in \c VRegs[0],
+  /// the second in \c VRegs[1], and so on. For each argument, there will be one
+  /// register for each non-aggregate type, as returned by \c computeValueLLTs.
   /// \p MIRBuilder is set to the proper insertion for the argument
   /// lowering.
   ///
   /// \return True if the lowering succeeded, false otherwise.
   virtual bool lowerFormalArguments(MachineIRBuilder &MIRBuilder,
                                     const Function &F,
-                                    ArrayRef<Register> VRegs) const {
+                                    ArrayRef<ArrayRef<Register>> VRegs) const {
     return false;
   }
 
Index: llvm/trunk/lib/CodeGen/GlobalISel/CallLowering.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ llvm/trunk/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -115,6 +116,47 @@
                                     const DataLayout &DL,
                                     const CallInst &FuncInfo) const;
 
+Register CallLowering::packRegs(ArrayRef<Register> SrcRegs, Type *PackedTy,
+                                MachineIRBuilder &MIRBuilder) const {
+  assert(SrcRegs.size() > 1 && "Nothing to pack");
+
+  const DataLayout &DL = MIRBuilder.getMF().getDataLayout();
+  MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+
+  LLT PackedLLT = getLLTForType(*PackedTy, DL);
+
+  SmallVector<LLT, 8> LLTs;
+  SmallVector<uint64_t, 8> Offsets;
+  computeValueLLTs(DL, *PackedTy, LLTs, &Offsets);
+  assert(LLTs.size() == SrcRegs.size() && "Regs / types mismatch");
+
+  Register Dst = MRI->createGenericVirtualRegister(PackedLLT);
+  MIRBuilder.buildUndef(Dst);
+  for (unsigned i = 0; i < SrcRegs.size(); ++i) {
+    Register NewDst = MRI->createGenericVirtualRegister(PackedLLT);
+    MIRBuilder.buildInsert(NewDst, Dst, SrcRegs[i], Offsets[i]);
+    Dst = NewDst;
+  }
+
+  return Dst;
+}
+
+void CallLowering::unpackRegs(ArrayRef<Register> DstRegs, Register SrcReg,
+                              Type *PackedTy,
+                              MachineIRBuilder &MIRBuilder) const {
+  assert(DstRegs.size() > 1 && "Nothing to unpack");
+
+  const DataLayout &DL = MIRBuilder.getMF().getDataLayout();
+
+  SmallVector<LLT, 8> LLTs;
+  SmallVector<uint64_t, 8> Offsets;
+  computeValueLLTs(DL, *PackedTy, LLTs, &Offsets);
+  assert(LLTs.size() == DstRegs.size() && "Regs / types mismatch");
+
+  for (unsigned i = 0; i < DstRegs.size(); ++i)
+    MIRBuilder.buildExtract(DstRegs[i], SrcReg, Offsets[i]);
+}
+
 bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
                                      ArrayRef<ArgInfo> Args,
                                      ValueHandler &Handler) const {
Index: llvm/trunk/lib/CodeGen/GlobalISel/IRTranslator.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ llvm/trunk/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2274,16 +2274,17 @@
   EntryBB->addSuccessor(&getMBB(F.front()));
 
   // Lower the actual args into this basic block.
-  SmallVector<Register, 8> VRegArgs;
+  SmallVector<ArrayRef<Register>, 8> VRegArgs;
   for (const Argument &Arg: F.args()) {
     if (DL->getTypeStoreSize(Arg.getType()) == 0)
       continue; // Don't handle zero sized types.
-    VRegArgs.push_back(
-        MRI->createGenericVirtualRegister(getLLTForType(*Arg.getType(), *DL)));
+    ArrayRef<Register> VRegs = getOrCreateVRegs(Arg);
+    VRegArgs.push_back(VRegs);
 
-    if (Arg.hasSwiftErrorAttr())
-      SwiftError.setCurrentVReg(EntryBB, SwiftError.getFunctionArg(),
-                                VRegArgs.back());
+    if (Arg.hasSwiftErrorAttr()) {
+      assert(VRegs.size() == 1 && "Too many vregs for Swift error");
+      SwiftError.setCurrentVReg(EntryBB, SwiftError.getFunctionArg(), VRegs[0]);
+    }
   }
 
   // We don't currently support translating swifterror or swiftself functions.
@@ -2306,20 +2307,6 @@
     return false;
   }
 
-  auto ArgIt = F.arg_begin();
-  for (auto &VArg : VRegArgs) {
-    // If the argument is an unsplit scalar then don't use unpackRegs to avoid
-    // creating redundant copies.
-    if (!valueIsSplit(*ArgIt, VMap.getOffsets(*ArgIt))) {
-      auto &VRegs = *VMap.getVRegs(cast<Value>(*ArgIt));
-      assert(VRegs.empty() && "VRegs already populated?");
-      VRegs.push_back(VArg);
-    } else {
-      unpackRegs(*ArgIt, VArg, *EntryBuilder.get());
-    }
-    ArgIt++;
-  }
-
   // Need to visit defs before uses when translating instructions.
   GISelObserverWrapper WrapperObserver;
   if (EnableCSE && CSEInfo)
Index: llvm/trunk/lib/Target/AArch64/AArch64CallLowering.h
===================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64CallLowering.h
+++ llvm/trunk/lib/Target/AArch64/AArch64CallLowering.h
@@ -38,7 +38,7 @@
                    Register SwiftErrorVReg) const override;
 
   bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
-                            ArrayRef<Register> VRegs) const override;
+                            ArrayRef<ArrayRef<Register>> VRegs) const override;
 
   bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
                  const MachineOperand &Callee, const ArgInfo &OrigRet,
Index: llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp
+++ llvm/trunk/lib/Target/AArch64/AArch64CallLowering.cpp
@@ -203,7 +203,6 @@
   SmallVector<EVT, 4> SplitVTs;
   SmallVector<uint64_t, 4> Offsets;
   ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
-  assert(OrigArg.Regs.size() == 1 && "Can't handle multple regs yet");
 
   if (SplitVTs.size() == 1) {
     // No splitting to do, but we want to replace the original type (e.g. [1 x
@@ -213,6 +212,24 @@
     return;
   }
 
+  if (OrigArg.Regs.size() > 1) {
+    // Create one ArgInfo for each virtual register in the original ArgInfo.
+    assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch");
+
+    bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
+        OrigArg.Ty, CallConv, false);
+    for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) {
+      Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx);
+      SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags,
+                             OrigArg.IsFixed);
+      if (NeedsRegBlock)
+        SplitArgs.back().Flags.setInConsecutiveRegs();
+    }
+
+    SplitArgs.back().Flags.setInConsecutiveRegsLast();
+    return;
+  }
+
   unsigned FirstRegIdx = SplitArgs.size();
   bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
       OrigArg.Ty, CallConv, false);
@@ -351,9 +368,9 @@
   return Success;
 }
 
-bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
-                                               const Function &F,
-                                               ArrayRef<Register> VRegs) const {
+bool AArch64CallLowering::lowerFormalArguments(
+    MachineIRBuilder &MIRBuilder, const Function &F,
+    ArrayRef<ArrayRef<Register>> VRegs) const {
   MachineFunction &MF = MIRBuilder.getMF();
   MachineBasicBlock &MBB = MIRBuilder.getMBB();
   MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -364,26 +381,14 @@
   for (auto &Arg : F.args()) {
     if (DL.getTypeStoreSize(Arg.getType()) == 0)
       continue;
+
     ArgInfo OrigArg{VRegs[i], Arg.getType()};
     setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
-    bool Split = false;
-    LLT Ty = MRI.getType(VRegs[i]);
-    Register Dst = VRegs[i];
 
     splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv(),
-                      [&](unsigned Reg, uint64_t Offset) {
-                        if (!Split) {
-                          Split = true;
-                          Dst = MRI.createGenericVirtualRegister(Ty);
-                          MIRBuilder.buildUndef(Dst);
-                        }
-                        unsigned Tmp = MRI.createGenericVirtualRegister(Ty);
-                        MIRBuilder.buildInsert(Tmp, Dst, Reg, Offset);
-                        Dst = Tmp;
+                      [&](Register Reg, uint64_t Offset) {
+                        llvm_unreachable("Args should already be split");
                       });
-
-    if (Dst != VRegs[i])
-      MIRBuilder.buildCopy(VRegs[i], Dst);
     ++i;
   }
 
Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h
@@ -35,7 +35,7 @@
   bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
                    ArrayRef<Register> VRegs) const override;
   bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
-                            ArrayRef<Register> VRegs) const override;
+                            ArrayRef<ArrayRef<Register>> VRegs) const override;
   static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
   static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg);
 };
Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -193,9 +193,9 @@
   }
 }
 
-bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
-                                              const Function &F,
-                                              ArrayRef<Register> VRegs) const {
+bool AMDGPUCallLowering::lowerFormalArguments(
+    MachineIRBuilder &MIRBuilder, const Function &F,
+    ArrayRef<ArrayRef<Register>> VRegs) const {
   // AMDGPU_GS and AMDGP_HS are not supported yet.
   if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
       F.getCallingConv() == CallingConv::AMDGPU_HS)
@@ -275,9 +275,16 @@
       uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
       ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
 
+      ArrayRef<Register> OrigArgRegs = VRegs[i];
+      Register ArgReg =
+          OrigArgRegs.size() == 1
+              ? OrigArgRegs[0]
+              : MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL));
       unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
       ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
-      lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, VRegs[i]);
+      lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, ArgReg);
+      if (OrigArgRegs.size() > 1)
+        unpackRegs(OrigArgRegs, ArgReg, ArgTy, MIRBuilder);
       ++i;
     }
 
@@ -295,7 +302,8 @@
 
     // We can only hanlde simple value types at the moment.
     ISD::ArgFlagsTy Flags;
-    ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()};
+    assert(VRegs[i].size() == 1 && "Can't lower into more than one register");
+    ArgInfo OrigArg{VRegs[i][0], CurOrigArg->getType()};
     setArgFlags(OrigArg, i + 1, DL, F);
     Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
 
@@ -348,10 +356,12 @@
          OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
        if (Skipped.test(OrigArgIdx))
           continue;
-      CCValAssign &VA = ArgLocs[i++];
-      MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]);
-      MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
-      MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg());
+       assert(VRegs[OrigArgIdx].size() == 1 &&
+              "Can't lower into more than 1 reg");
+       CCValAssign &VA = ArgLocs[i++];
+       MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx][0]);
+       MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
+       MIRBuilder.buildCopy(VRegs[OrigArgIdx][0], VA.getLocReg());
     }
 
     allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), IsShader);
Index: llvm/trunk/lib/Target/ARM/ARMCallLowering.h
===================================================================
--- llvm/trunk/lib/Target/ARM/ARMCallLowering.h
+++ llvm/trunk/lib/Target/ARM/ARMCallLowering.h
@@ -36,7 +36,7 @@
                    ArrayRef<Register> VRegs) const override;
 
   bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
-                            ArrayRef<Register> VRegs) const override;
+                            ArrayRef<ArrayRef<Register>> VRegs) const override;
 
   bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
                  const MachineOperand &Callee, const ArgInfo &OrigRet,
Index: llvm/trunk/lib/Target/ARM/ARMCallLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/ARM/ARMCallLowering.cpp
+++ llvm/trunk/lib/Target/ARM/ARMCallLowering.cpp
@@ -195,11 +195,11 @@
 
   SmallVector<EVT, 4> SplitVTs;
   ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, nullptr, nullptr, 0);
-  assert(OrigArg.Regs.size() == 1 && "Can't handle multple regs yet");
 
   if (SplitVTs.size() == 1) {
     // Even if there is no splitting to do, we still want to replace the
     // original type (e.g. pointer type -> integer).
+    assert(OrigArg.Regs.size() == 1 && "Regs / types mismatch");
     auto Flags = OrigArg.Flags;
     unsigned OriginalAlignment = DL.getABITypeAlignment(OrigArg.Ty);
     Flags.setOrigAlign(OriginalAlignment);
@@ -208,6 +208,34 @@
     return;
   }
 
+  if (OrigArg.Regs.size() > 1) {
+    // Create one ArgInfo for each virtual register.
+    assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch");
+    for (unsigned i = 0, e = SplitVTs.size(); i != e; ++i) {
+      EVT SplitVT = SplitVTs[i];
+      Type *SplitTy = SplitVT.getTypeForEVT(Ctx);
+      auto Flags = OrigArg.Flags;
+
+      unsigned OriginalAlignment = DL.getABITypeAlignment(SplitTy);
+      Flags.setOrigAlign(OriginalAlignment);
+
+      bool NeedsConsecutiveRegisters =
+          TLI.functionArgumentNeedsConsecutiveRegisters(
+              SplitTy, F.getCallingConv(), F.isVarArg());
+      if (NeedsConsecutiveRegisters) {
+        Flags.setInConsecutiveRegs();
+        if (i == e - 1)
+          Flags.setInConsecutiveRegsLast();
+      }
+
+      // FIXME: We also want to split SplitTy further.
+      Register PartReg = OrigArg.Regs[i];
+      SplitArgs.emplace_back(PartReg, SplitTy, Flags, OrigArg.IsFixed);
+    }
+
+    return;
+  }
+
   for (unsigned i = 0, e = SplitVTs.size(); i != e; ++i) {
     EVT SplitVT = SplitVTs[i];
     Type *SplitTy = SplitVT.getTypeForEVT(Ctx);
@@ -424,9 +452,9 @@
 
 } // end anonymous namespace
 
-bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
-                                           const Function &F,
-                                           ArrayRef<Register> VRegs) const {
+bool ARMCallLowering::lowerFormalArguments(
+    MachineIRBuilder &MIRBuilder, const Function &F,
+    ArrayRef<ArrayRef<Register>> VRegs) const {
   auto &TLI = *getTLI<ARMTargetLowering>();
   auto Subtarget = TLI.getSubtarget();
 
@@ -457,20 +485,15 @@
   FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo(),
                               AssignFn);
 
-  SmallVector<ArgInfo, 8> ArgInfos;
-  SmallVector<Register, 4> SplitRegs;
+  SmallVector<ArgInfo, 8> SplitArgInfos;
   unsigned Idx = 0;
   for (auto &Arg : F.args()) {
-    ArgInfo AInfo(VRegs[Idx], Arg.getType());
-    setArgFlags(AInfo, Idx + AttributeList::FirstArgIndex, DL, F);
-
-    SplitRegs.clear();
+    ArgInfo OrigArgInfo(VRegs[Idx], Arg.getType());
+    setArgFlags(OrigArgInfo, Idx + AttributeList::FirstArgIndex, DL, F);
 
-    splitToValueTypes(AInfo, ArgInfos, MF,
-                      [&](Register Reg) { SplitRegs.push_back(Reg); });
-
-    if (!SplitRegs.empty())
-      MIRBuilder.buildMerge(VRegs[Idx], SplitRegs);
+    splitToValueTypes(OrigArgInfo, SplitArgInfos, MF, [&](Register Reg) {
+      llvm_unreachable("Args should already be split");
+    });
 
     Idx++;
   }
@@ -478,7 +501,7 @@
   if (!MBB.empty())
     MIRBuilder.setInstr(*MBB.begin());
 
-  if (!handleAssignments(MIRBuilder, ArgInfos, ArgHandler))
+  if (!handleAssignments(MIRBuilder, SplitArgInfos, ArgHandler))
     return false;
 
   // Move back to the end of the basic block.
Index: llvm/trunk/lib/Target/Mips/MipsCallLowering.h
===================================================================
--- llvm/trunk/lib/Target/Mips/MipsCallLowering.h
+++ llvm/trunk/lib/Target/Mips/MipsCallLowering.h
@@ -66,7 +66,7 @@
                    ArrayRef<Register> VRegs) const override;
 
   bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
-                            ArrayRef<Register> VRegs) const override;
+                            ArrayRef<ArrayRef<Register>> VRegs) const override;
 
   bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
                  const MachineOperand &Callee, const ArgInfo &OrigRet,
Index: llvm/trunk/lib/Target/Mips/MipsCallLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/Mips/MipsCallLowering.cpp
+++ llvm/trunk/lib/Target/Mips/MipsCallLowering.cpp
@@ -445,9 +445,9 @@
   return true;
 }
 
-bool MipsCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
-                                            const Function &F,
-                                            ArrayRef<Register> VRegs) const {
+bool MipsCallLowering::lowerFormalArguments(
+    MachineIRBuilder &MIRBuilder, const Function &F,
+    ArrayRef<ArrayRef<Register>> VRegs) const {
 
   // Quick exit if there aren't any args.
   if (F.arg_empty())
Index: llvm/trunk/lib/Target/X86/X86CallLowering.h
===================================================================
--- llvm/trunk/lib/Target/X86/X86CallLowering.h
+++ llvm/trunk/lib/Target/X86/X86CallLowering.h
@@ -32,7 +32,7 @@
                    ArrayRef<Register> VRegs) const override;
 
   bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
-                            ArrayRef<Register> VRegs) const override;
+                            ArrayRef<ArrayRef<Register>> VRegs) const override;
 
   bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
                  const MachineOperand &Callee, const ArgInfo &OrigRet,
Index: llvm/trunk/lib/Target/X86/X86CallLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86CallLowering.cpp
+++ llvm/trunk/lib/Target/X86/X86CallLowering.cpp
@@ -320,9 +320,9 @@
 
 } // end anonymous namespace
 
-bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
-                                           const Function &F,
-                                           ArrayRef<Register> VRegs) const {
+bool X86CallLowering::lowerFormalArguments(
+    MachineIRBuilder &MIRBuilder, const Function &F,
+    ArrayRef<ArrayRef<Register>> VRegs) const {
   if (F.arg_empty())
     return true;
 
@@ -344,14 +344,14 @@
         Arg.hasAttribute(Attribute::StructRet) ||
         Arg.hasAttribute(Attribute::SwiftSelf) ||
         Arg.hasAttribute(Attribute::SwiftError) ||
-        Arg.hasAttribute(Attribute::Nest))
+        Arg.hasAttribute(Attribute::Nest) || VRegs[Idx].size() > 1)
       return false;
 
     ArgInfo OrigArg(VRegs[Idx], Arg.getType());
     setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F);
     if (!splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
                            [&](ArrayRef<Register> Regs) {
-                             MIRBuilder.buildMerge(VRegs[Idx], Regs);
+                             MIRBuilder.buildMerge(VRegs[Idx][0], Regs);
                            }))
       return false;
     Idx++;
Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
===================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
@@ -860,6 +860,17 @@
   ret void
 }
 
+; CHECK-LABEL: name: test_trivial_extract_ptr
+; CHECK: [[STRUCT:%[0-9]+]]:_(p0) = COPY $x0
+; CHECK: [[VAL32:%[0-9]+]]:_(s32) = COPY $w1
+; CHECK: [[VAL:%[0-9]+]]:_(s8) = G_TRUNC [[VAL32]]
+; CHECK: G_STORE [[VAL]](s8), [[STRUCT]](p0)
+define void @test_trivial_extract_ptr([1 x i8*] %s, i8 %val) {
+  %addr = extractvalue [1 x i8*] %s, 0
+  store i8 %val, i8* %addr
+  ret void
+}
+
 ; CHECK-LABEL: name: test_insertvalue
 ; CHECK: %0:_(p0) = COPY $x0
 ; CHECK: %1:_(s32) = COPY $w1
@@ -898,7 +909,7 @@
 
 define [1 x i8*] @test_trivial_insert_ptr([1 x i8*] %s, i8* %val) {
 ; CHECK-LABEL: name: test_trivial_insert_ptr
-; CHECK: [[STRUCT:%[0-9]+]]:_(s64) = COPY $x0
+; CHECK: [[STRUCT:%[0-9]+]]:_(p0) = COPY $x0
 ; CHECK: [[VAL:%[0-9]+]]:_(p0) = COPY $x1
 ; CHECK: $x0 = COPY [[VAL]]
   %res = insertvalue [1 x i8*] %s, i8* %val, 0
Index: llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator.ll
===================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator.ll
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/call-translator.ll
@@ -65,21 +65,13 @@
 ; CHECK: [[I8:%[0-9]+]]:_(s8) = G_TRUNC [[I8_C]]
 ; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY $x2
 
-; CHECK: [[UNDEF:%[0-9]+]]:_(s192) = G_IMPLICIT_DEF
-; CHECK: [[ARG0:%[0-9]+]]:_(s192) = G_INSERT [[UNDEF]], [[DBL]](s64), 0
-; CHECK: [[ARG1:%[0-9]+]]:_(s192) = G_INSERT [[ARG0]], [[I64]](s64), 64
-; CHECK: [[ARG2:%[0-9]+]]:_(s192) = G_INSERT [[ARG1]], [[I8]](s8), 128
-; CHECK: [[ARG:%[0-9]+]]:_(s192) = COPY [[ARG2]]
-; CHECK: [[EXTA0:%[0-9]+]]:_(s64) = G_EXTRACT [[ARG]](s192), 0
-; CHECK: [[EXTA1:%[0-9]+]]:_(s64) = G_EXTRACT [[ARG]](s192), 64
-; CHECK: [[EXTA2:%[0-9]+]]:_(s8) = G_EXTRACT [[ARG]](s192), 128
-; CHECK: G_STORE [[EXTA0]](s64), [[ADDR]](p0) :: (store 8 into %ir.addr)
+; CHECK: G_STORE [[DBL]](s64), [[ADDR]](p0) :: (store 8 into %ir.addr)
 ; CHECK: [[CST1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
 ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST1]](s64)
-; CHECK: G_STORE [[EXTA1]](s64), [[GEP1]](p0) :: (store 8 into %ir.addr + 8)
+; CHECK: G_STORE [[I64]](s64), [[GEP1]](p0) :: (store 8 into %ir.addr + 8)
 ; CHECK: [[CST2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
 ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_GEP [[ADDR]], [[CST2]](s64)
-; CHECK: G_STORE [[EXTA2]](s8), [[GEP2]](p0) :: (store 1 into %ir.addr + 16, align 8)
+; CHECK: G_STORE [[I8]](s8), [[GEP2]](p0) :: (store 1 into %ir.addr + 16, align 8)
 ; CHECK: RET_ReallyLR
 define void @test_struct_formal({double, i64, i8} %in, {double, i64, i8}* %addr) {
   store {double, i64, i8} %in, {double, i64, i8}* %addr
Index: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
@@ -675,14 +675,14 @@
   ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
   ; HSA-VI:   [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
   ; HSA-VI:   [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 16 from `{ i32, i64 } addrspace(4)* undef`, addrspace 4)
+  ; HSA-VI:   [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s128), 0
+  ; HSA-VI:   [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s128), 64
   ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
   ; HSA-VI:   [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
   ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 1 from `i8 addrspace(4)* undef`, align 16, addrspace 4)
   ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
   ; HSA-VI:   [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64)
   ; HSA-VI:   [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p4) :: (non-temporal invariant load 16 from `{ i32, i64 } addrspace(4)* undef`, align 8, addrspace 4)
-  ; HSA-VI:   [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s128), 0
-  ; HSA-VI:   [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s128), 64
   ; HSA-VI:   [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD2]](s128), 0
   ; HSA-VI:   [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD2]](s128), 64
   ; HSA-VI:   S_ENDPGM
@@ -707,14 +707,14 @@
   ; HSA-VI:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
   ; HSA-VI:   [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C]](s64)
   ; HSA-VI:   [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[GEP]](p4) :: (non-temporal invariant load 12 from `<{ i32, i64 }> addrspace(4)* undef`, align 16, addrspace 4)
+  ; HSA-VI:   [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s96), 0
+  ; HSA-VI:   [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s96), 32
   ; HSA-VI:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
   ; HSA-VI:   [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C1]](s64)
   ; HSA-VI:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[GEP1]](p4) :: (non-temporal invariant load 1 from `i8 addrspace(4)* undef`, align 4, addrspace 4)
   ; HSA-VI:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 13
   ; HSA-VI:   [[GEP2:%[0-9]+]]:_(p4) = G_GEP [[COPY]], [[C2]](s64)
   ; HSA-VI:   [[LOAD2:%[0-9]+]]:_(s96) = G_LOAD [[GEP2]](p4) :: (non-temporal invariant load 12 from `<{ i32, i64 }> addrspace(4)* undef`, align 1, addrspace 4)
-  ; HSA-VI:   [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s96), 0
-  ; HSA-VI:   [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s96), 32
   ; HSA-VI:   [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD2]](s96), 0
   ; HSA-VI:   [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD2]](s96), 32
   ; HSA-VI:   S_ENDPGM
Index: llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll
===================================================================
--- llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll
+++ llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll
@@ -202,12 +202,9 @@
 ; CHECK: liveins: $r0, $r1
 ; CHECK: [[R0:%[0-9]+]]:_(s32) = COPY $r0
 ; CHECK: [[R1:%[0-9]+]]:_(s32) = COPY $r1
-; CHECK: [[ARG_ARR:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32)
-; CHECK: [[EXT1:%[0-9]+]]:_(s32) = G_EXTRACT [[ARG_ARR]](s64), 0
-; CHECK: [[EXT2:%[0-9]+]]:_(s32) = G_EXTRACT [[ARG_ARR]](s64), 32
 ; CHECK: [[IMPDEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-; CHECK: [[INS1:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF]], [[EXT1]](s32), 0
-; CHECK: [[INS2:%[0-9]+]]:_(s64) = G_INSERT [[INS1]], [[EXT2]](s32), 32
+; CHECK: [[INS1:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF]], [[R0]](s32), 0
+; CHECK: [[INS2:%[0-9]+]]:_(s64) = G_INSERT [[INS1]], [[R1]](s32), 32
 ; CHECK: ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def $sp, implicit $sp
 ; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS2]](s64)
 ; CHECK: $r0 = COPY [[R0]]
@@ -244,18 +241,12 @@
 ; CHECK: [[R1:%[0-9]+]]:_(s32) = COPY $r1
 ; CHECK: [[R2:%[0-9]+]]:_(s32) = COPY $r2
 ; CHECK: [[R3:%[0-9]+]]:_(s32) = COPY $r3
-; CHECK: [[ARG_ARR0:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32)
-; CHECK: [[ARG_ARR1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R2]](s32), [[R3]](s32)
-; CHECK: [[EXT1:%[0-9]+]]:_(s32) = G_EXTRACT [[ARG_ARR0]](s64), 0
-; CHECK: [[EXT2:%[0-9]+]]:_(s32) = G_EXTRACT [[ARG_ARR0]](s64), 32
-; CHECK: [[EXT3:%[0-9]+]]:_(s32) = G_EXTRACT [[ARG_ARR1]](s64), 0
-; CHECK: [[EXT4:%[0-9]+]]:_(s32) = G_EXTRACT [[ARG_ARR1]](s64), 32
 ; CHECK: [[IMPDEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-; CHECK: [[INS1:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF]], [[EXT1]](s32), 0
-; CHECK: [[INS2:%[0-9]+]]:_(s64) = G_INSERT [[INS1]], [[EXT2]](s32), 32
+; CHECK: [[INS1:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF]], [[R0]](s32), 0
+; CHECK: [[INS2:%[0-9]+]]:_(s64) = G_INSERT [[INS1]], [[R1]](s32), 32
 ; CHECK: [[IMPDEF2:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-; CHECK: [[INS3:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF2]], [[EXT3]](s32), 0
-; CHECK: [[INS4:%[0-9]+]]:_(s64) = G_INSERT [[INS3]], [[EXT4]](s32), 32
+; CHECK: [[INS3:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF2]], [[R2]](s32), 0
+; CHECK: [[INS4:%[0-9]+]]:_(s64) = G_INSERT [[INS3]], [[R3]](s32), 32
 ; CHECK: ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def $sp, implicit $sp
 ; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS2]](s64)
 ; CHECK: [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS4]](s64)
@@ -291,8 +282,9 @@
 ; CHECK: [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[FIRST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[FIRST_STACK_ID]]
 ; CHECK: [[LAST_STACK_ELEMENT_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[LAST_STACK_ID]]
 ; CHECK: [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[LAST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[LAST_STACK_ID]]
-; CHECK: [[ARG_ARR:%[0-9]+]]:_(s640) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32), [[R3]](s32), [[FIRST_STACK_ELEMENT]](s32), {{.*}}, [[LAST_STACK_ELEMENT]](s32)
-; CHECK: [[INS:%[0-9]+]]:_(s640) = G_INSERT {{.*}}, {{.*}}(s32), 608
+; CHECK: [[IMPDEF:%[0-9]+]]:_(s640) = G_IMPLICIT_DEF
+; CHECK: [[INS1:%[0-9]+]]:_(s640) = G_INSERT [[IMPDEF]], [[R0]](s32), 0
+; CHECK: [[INS:%[0-9]+]]:_(s640) = G_INSERT {{.*}}, [[LAST_STACK_ELEMENT]](s32), 608
 ; CHECK: ADJCALLSTACKDOWN 64, 0, 14, $noreg, implicit-def $sp, implicit $sp
 ; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32), [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32), {{.*}}, [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS]](s640)
 ; CHECK: $r0 = COPY [[R0]]
@@ -336,14 +328,10 @@
 ; BIG: [[ARR1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ARR1_1]](s32), [[ARR1_0]](s32)
 ; CHECK: [[ARR2_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[ARR2_ID]]
 ; CHECK: [[ARR2:%[0-9]+]]:_(s64) = G_LOAD [[ARR2_FI]]{{.*}}load 8 from %fixed-stack.[[ARR2_ID]]
-; CHECK: [[ARR_MERGED:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[ARR0]](s64), [[ARR1]](s64), [[ARR2]](s64)
-; CHECK: [[EXT1:%[0-9]+]]:_(s64) = G_EXTRACT [[ARR_MERGED]](s192), 0
-; CHECK: [[EXT2:%[0-9]+]]:_(s64) = G_EXTRACT [[ARR_MERGED]](s192), 64
-; CHECK: [[EXT3:%[0-9]+]]:_(s64) = G_EXTRACT [[ARR_MERGED]](s192), 128
 ; CHECK: [[IMPDEF:%[0-9]+]]:_(s192) = G_IMPLICIT_DEF
-; CHECK: [[INS1:%[0-9]+]]:_(s192) = G_INSERT [[IMPDEF]], [[EXT1]](s64), 0
-; CHECK: [[INS2:%[0-9]+]]:_(s192) = G_INSERT [[INS1]], [[EXT2]](s64), 64
-; CHECK: [[INS3:%[0-9]+]]:_(s192) = G_INSERT [[INS2]], [[EXT3]](s64), 128
+; CHECK: [[INS1:%[0-9]+]]:_(s192) = G_INSERT [[IMPDEF]], [[ARR0]](s64), 0
+; CHECK: [[INS2:%[0-9]+]]:_(s192) = G_INSERT [[INS1]], [[ARR1]](s64), 64
+; CHECK: [[INS3:%[0-9]+]]:_(s192) = G_INSERT [[INS2]], [[ARR2]](s64), 128
 ; CHECK: ADJCALLSTACKDOWN 8, 0, 14, $noreg, implicit-def $sp, implicit $sp
 ; CHECK: [[ARR0:%[0-9]+]]:_(s64), [[ARR1:%[0-9]+]]:_(s64), [[ARR2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[INS3]](s192)
 ; CHECK: [[ARR0_0:%[0-9]+]]:_(s32), [[ARR0_1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARR0]](s64)
@@ -401,32 +389,19 @@
 ; CHECK: [[Z2:%[0-9]+]]:_(s64) = G_LOAD [[Z2_FI]]{{.*}}load 8
 ; CHECK: [[Z3_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[Z3_ID]]
 ; CHECK: [[Z3:%[0-9]+]]:_(s64) = G_LOAD [[Z3_FI]]{{.*}}load 8
-; CHECK: [[X_ARR:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[X0]](s64), [[X1]](s64), [[X2]](s64)
-; CHECK: [[Y_ARR:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32), [[Y2]](s32)
-; CHECK: [[Z_ARR:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[Z0]](s64), [[Z1]](s64), [[Z2]](s64), [[Z3]](s64)
-; CHECK: [[EXT1:%[0-9]+]]:_(s64) = G_EXTRACT [[X_ARR]](s192), 0
-; CHECK: [[EXT2:%[0-9]+]]:_(s64) = G_EXTRACT [[X_ARR]](s192), 64
-; CHECK: [[EXT3:%[0-9]+]]:_(s64) = G_EXTRACT [[X_ARR]](s192), 128
-; CHECK: [[EXT4:%[0-9]+]]:_(s32) = G_EXTRACT [[Y_ARR]](s96), 0
-; CHECK: [[EXT5:%[0-9]+]]:_(s32) = G_EXTRACT [[Y_ARR]](s96), 32
-; CHECK: [[EXT6:%[0-9]+]]:_(s32) = G_EXTRACT [[Y_ARR]](s96), 64
-; CHECK: [[EXT7:%[0-9]+]]:_(s64) = G_EXTRACT [[Z_ARR]](s256), 0
-; CHECK: [[EXT8:%[0-9]+]]:_(s64) = G_EXTRACT [[Z_ARR]](s256), 64
-; CHECK: [[EXT9:%[0-9]+]]:_(s64) = G_EXTRACT [[Z_ARR]](s256), 128
-; CHECK: [[EXT10:%[0-9]+]]:_(s64) = G_EXTRACT [[Z_ARR]](s256), 192
 ; CHECK: [[IMPDEF:%[0-9]+]]:_(s192) = G_IMPLICIT_DEF
-; CHECK: [[INS1:%[0-9]+]]:_(s192) = G_INSERT [[IMPDEF]], [[EXT1]](s64), 0
-; CHECK: [[INS2:%[0-9]+]]:_(s192) = G_INSERT [[INS1]], [[EXT2]](s64), 64
-; CHECK: [[INS3:%[0-9]+]]:_(s192) = G_INSERT [[INS2]], [[EXT3]](s64), 128
+; CHECK: [[INS1:%[0-9]+]]:_(s192) = G_INSERT [[IMPDEF]], [[X0]](s64), 0
+; CHECK: [[INS2:%[0-9]+]]:_(s192) = G_INSERT [[INS1]], [[X1]](s64), 64
+; CHECK: [[INS3:%[0-9]+]]:_(s192) = G_INSERT [[INS2]], [[X2]](s64), 128
 ; CHECK: [[IMPDEF2:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
-; CHECK: [[INS4:%[0-9]+]]:_(s96) = G_INSERT [[IMPDEF2]], [[EXT4]](s32), 0
-; CHECK: [[INS5:%[0-9]+]]:_(s96) = G_INSERT [[INS4]], [[EXT5]](s32), 32
-; CHECK: [[INS6:%[0-9]+]]:_(s96) = G_INSERT [[INS5]], [[EXT6]](s32), 64
+; CHECK: [[INS4:%[0-9]+]]:_(s96) = G_INSERT [[IMPDEF2]], [[Y0]](s32), 0
+; CHECK: [[INS5:%[0-9]+]]:_(s96) = G_INSERT [[INS4]], [[Y1]](s32), 32
+; CHECK: [[INS6:%[0-9]+]]:_(s96) = G_INSERT [[INS5]], [[Y2]](s32), 64
 ; CHECK: [[IMPDEF3:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF
-; CHECK: [[INS7:%[0-9]+]]:_(s256) = G_INSERT [[IMPDEF3]], [[EXT7]](s64), 0
-; CHECK: [[INS8:%[0-9]+]]:_(s256) = G_INSERT [[INS7]], [[EXT8]](s64), 64
-; CHECK: [[INS9:%[0-9]+]]:_(s256) = G_INSERT [[INS8]], [[EXT9]](s64), 128
-; CHECK: [[INS10:%[0-9]+]]:_(s256) = G_INSERT [[INS9]], [[EXT10]](s64), 192
+; CHECK: [[INS7:%[0-9]+]]:_(s256) = G_INSERT [[IMPDEF3]], [[Z0]](s64), 0
+; CHECK: [[INS8:%[0-9]+]]:_(s256) = G_INSERT [[INS7]], [[Z1]](s64), 64
+; CHECK: [[INS9:%[0-9]+]]:_(s256) = G_INSERT [[INS8]], [[Z2]](s64), 128
+; CHECK: [[INS10:%[0-9]+]]:_(s256) = G_INSERT [[INS9]], [[Z3]](s64), 192
 ; CHECK: ADJCALLSTACKDOWN 32, 0, 14, $noreg, implicit-def $sp, implicit $sp
 ; CHECK: [[X0:%[0-9]+]]:_(s64), [[X1:%[0-9]+]]:_(s64), [[X2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[INS3]](s192)
 ; CHECK: [[Y0:%[0-9]+]]:_(s32), [[Y1:%[0-9]+]]:_(s32), [[Y2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS6]](s96)
@@ -494,8 +469,7 @@
 ; CHECK: [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[FIRST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[FIRST_STACK_ID]]
 ; CHECK: [[LAST_STACK_ELEMENT_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[LAST_STACK_ID]]
 ; CHECK: [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[LAST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[LAST_STACK_ID]]
-; CHECK: [[ARG_ARR:%[0-9]+]]:_(s768) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32), [[R3]](s32), [[FIRST_STACK_ELEMENT]](s32), {{.*}}, [[LAST_STACK_ELEMENT]](s32)
-; CHECK: [[INS:%[0-9]+]]:_(s768) = G_INSERT {{.*}}, {{.*}}(s32), 736
+; CHECK: [[INS:%[0-9]+]]:_(s768) = G_INSERT {{.*}}, [[LAST_STACK_ELEMENT]](s32), 736
 ; CHECK: ADJCALLSTACKDOWN 80, 0, 14, $noreg, implicit-def $sp, implicit $sp
 ; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32), [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32), {{.*}}, [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS]](s768)
 ; CHECK: $r0 = COPY [[R0]]
@@ -536,12 +510,9 @@
 ; CHECK: liveins: $r0, $r1
 ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY $r0
 ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY $r1
-; CHECK: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32)
-; CHECK: [[EXT1:%[0-9]+]]:_(s32) = G_EXTRACT [[X]](s64), 0
-; CHECK: [[EXT2:%[0-9]+]]:_(s32) = G_EXTRACT [[X]](s64), 32
 ; CHECK: [[IMPDEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-; CHECK: [[INS1:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF]], [[EXT1]](s32), 0
-; CHECK: [[INS2:%[0-9]+]]:_(s64) = G_INSERT [[INS1]], [[EXT2]](s32), 32
+; CHECK: [[INS1:%[0-9]+]]:_(s64) = G_INSERT [[IMPDEF]], [[X0]](s32), 0
+; CHECK: [[INS2:%[0-9]+]]:_(s64) = G_INSERT [[INS1]], [[X1]](s32), 32
 ; CHECK: ADJCALLSTACKDOWN 0, 0, 14, $noreg, implicit-def $sp, implicit $sp
 ; CHECK: [[X0:%[0-9]+]]:_(s32), [[X1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INS2]](s64)
 ; CHECK-DAG: $r0 = COPY [[X0]](s32)