diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
--- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -99,6 +99,15 @@
   using StatepointSpillMapTy = DenseMap<const Value *, Optional<int>>;
   DenseMap<const Instruction *, StatepointSpillMapTy> StatepointSpillMaps;
 
+  /// For each statepoint keep mapping from original derived pointer to
+  /// the index of Statepoint node result defining its new value.
+  using DerivedPtrMapTy = DenseMap<const Value *, unsigned>;
+  DenseMap<const Instruction *, DerivedPtrMapTy> DerivedPtrMap;
+
+  /// For each statepoint keep virtual registers its result values has
+  /// been exported to.
+  DenseMap<const Instruction *, SmallVector<unsigned, 8>> StatepointRegs;
+
   /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in
   /// the entry block.  This allows the allocas to be efficiently referenced
   /// anywhere in the function.
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -82,6 +82,28 @@
   return N;
 }
 
+/// Return starting index of GC operand list.
+// FIXME: need a better place for this. Put it in StackMaps?
+static unsigned getStatepointGCArgStartIdx(MachineInstr *MI) {
+  assert(MI->getOpcode() == TargetOpcode::STATEPOINT &&
+         "STATEPOINT node expected");
+  unsigned OperIdx = StatepointOpers(MI).getNumDeoptArgsIdx();
+  unsigned NumDeopts = MI->getOperand(OperIdx).getImm();
+  // At this point stack references has not been lowered yet, so they
+  // take single operand.
+  ++OperIdx;
+  while (NumDeopts--) {
+    MachineOperand &MO = MI->getOperand(OperIdx);
+    if (MO.isImm() && MO.getImm() == StackMaps::ConstantOp) {
+      ++OperIdx;
+      assert(MI->getOperand(OperIdx).isImm() &&
+             "Unexpected statepoint operand");
+    }
+    ++OperIdx;
+  }
+  return OperIdx;
+}
+
 /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
 /// implicit physical register output.
 void InstrEmitter::
@@ -200,6 +222,8 @@
   bool HasVRegVariadicDefs = !MF->getTarget().usesPhysRegsForValues() &&
                              II.isVariadic() && II.variadicOpsAreDefs();
   unsigned NumVRegs = HasVRegVariadicDefs ? NumResults : II.getNumDefs();
+  if (Node->getMachineOpcode() == TargetOpcode::STATEPOINT)
+    NumVRegs = NumResults;
   for (unsigned i = 0; i < NumVRegs; ++i) {
     // If the specific node value is only used by a CopyToReg and the dest reg
     // is a vreg in the same register class, use the CopyToReg'd destination
@@ -821,6 +845,8 @@
       NumDefs = NumResults;
     }
     ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC);
+  } else if (Opc == TargetOpcode::STATEPOINT) {
+    NumDefs = NumResults;
   }
 
   unsigned NumImpUses = 0;
@@ -970,6 +996,17 @@
   if (!UsedRegs.empty() || II.getImplicitDefs() || II.hasOptionalDef())
     MIB->setPhysRegsDeadExcept(UsedRegs, *TRI);
 
+  // STATEPOINT is too 'dynamic' to have meaningful machine description.
+  // We have to manually tie operands.
+  if (Opc == TargetOpcode::STATEPOINT && NumDefs > 0) {
+    assert(!HasPhysRegOuts && "STATEPOINT mishandled");
+    MachineInstr *MI = MIB;
+    unsigned GCArgsStart = getStatepointGCArgStartIdx(MI);
+    unsigned Use = GCArgsStart + 1;
+    for (unsigned Def = 0; Def < NumDefs; ++Def, Use += 2)
+      MI->tieOperands(Def, Use);
+  }
+
   // Run post-isel target hook to adjust this instruction if needed.
   if (II.hasPostISelHook())
     TLI->AdjustInstrPostInstrSelection(*MIB, Node);
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -125,8 +125,7 @@
     PhysReg = Reg;
   } else if (Def->isMachineOpcode()) {
     const MCInstrDesc &II = TII->get(Def->getMachineOpcode());
-    if (ResNo >= II.getNumDefs() &&
-        II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg)
+    if (ResNo >= II.getNumDefs() && II.hasImplicitDefOfPhysReg(Reg))
       PhysReg = Reg;
   }
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -66,6 +66,10 @@
     "use-registers-for-deopt-values", cl::Hidden, cl::init(false),
     cl::desc("Allow using registers for non pointer deopt args"));
 
+cl::opt<bool> UseRegistersForGCPointers(
+    "use-registers-for-gcptrs", cl::Hidden, cl::init(false),
+    cl::desc("Allow using registers for GC pointer meta args"));
+
 static void pushStackMapConstant(SmallVectorImpl<SDValue>& Ops,
                                  SelectionDAGBuilder &Builder, uint64_t Value) {
   SDLoc L = Builder.getCurSDLoc();
@@ -220,6 +224,14 @@
   return None;
 }
 
+// Return true if V is a values which need not to be relocated/spilled.
+static bool isConstantVal(SDValue V) {
+  if (V.getValueSizeInBits() > 64)
+    return false;
+  return (isa<ConstantSDNode>(V) || isa<ConstantFPSDNode>(V) ||
+          isa<FrameIndexSDNode>(V) || V.isUndef());
+}
+
 /// Try to find existing copies of the incoming values in stack slots used for
 /// statepoint spilling.  If we can find a spill slot for the incoming value,
 /// mark that slot as allocated, and reuse the same slot for this safepoint.
@@ -229,12 +241,8 @@
                                              SelectionDAGBuilder &Builder) {
   SDValue Incoming = Builder.getValue(IncomingValue);
 
-  if (isa<ConstantSDNode>(Incoming) || isa<ConstantFPSDNode>(Incoming) ||
-      isa<FrameIndexSDNode>(Incoming) || Incoming.isUndef()) {
-    // We won't need to spill this, so no need to check for previously
-    // allocated stack slots
+  if (isConstantVal(Incoming))
     return;
-  }
 
   SDValue OldLocation = Builder.StatepointLowering.getLocation(Incoming);
   if (OldLocation.getNode())
@@ -274,6 +282,29 @@
   Builder.StatepointLowering.setLocation(Incoming, Loc);
 }
 
+/// Sort Ptrs vector so that constants, allocas and undefs
+/// contiguously occupy end of vector.
+/// Synchroniously update Bases and Relocs vectors.
+static unsigned sortGCPtrs(SmallVectorImpl<const Value *> &Bases,
+                           SmallVectorImpl<const Value *> &Ptrs,
+                           SmallVectorImpl<const GCRelocateInst *> &Relocs,
+                           SelectionDAGBuilder &Builder) {
+  unsigned CurPos = 0;
+  for (unsigned i = 0, e = Ptrs.size(); i < e; ++i) {
+    SDValue SDV = Builder.getValue(Ptrs[i]);
+    if (isConstantVal(SDV) || SDV.getValueType().getSizeInBits() > 64) {
+      continue;
+    }
+    if (CurPos < i) {
+      std::swap(Bases[CurPos], Bases[i]);
+      std::swap(Ptrs[CurPos], Ptrs[i]);
+      std::swap(Relocs[CurPos], Relocs[i]);
+    }
+    ++CurPos;
+  }
+  return CurPos;
+}
+
 /// Extract call from statepoint, lower it and return pointer to the
 /// call node. Also update NodeMap so that getValue(statepoint) will
 /// reference lowered call result
@@ -367,7 +398,7 @@
                                  StoreMMO);
 
     MMO = getMachineMemOperand(MF, *cast<FrameIndexSDNode>(Loc));
-    
+
     Builder.StatepointLowering.setLocation(Incoming, Loc);
   }
 
@@ -456,7 +487,9 @@
 /// will be set to the last value spilled (if any were).
 static void
 lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
-                        SmallVectorImpl<MachineMemOperand*> &MemRefs,                                    SelectionDAGBuilder::StatepointLoweringInfo &SI,
+                        SmallVectorImpl<MachineMemOperand *> &MemRefs,
+                        unsigned NumVRegGCArgs,
+                        SelectionDAGBuilder::StatepointLoweringInfo &SI,
                         SelectionDAGBuilder &Builder) {
   // Lower the deopt and gc arguments for this statepoint.  Layout will be:
   // deopt argument length, deopt arguments.., gc arguments...
@@ -513,7 +546,9 @@
   };
 
   auto requireSpillSlot = [&](const Value *V) {
-    return !(LiveInDeopt || UseRegistersForDeoptValues) || isGCValue(V);
+    if (isGCValue(V))
+      return !UseRegistersForGCPointers || V->getType()->isVectorTy();
+    return !(LiveInDeopt || UseRegistersForDeoptValues);
   };
 
   // Before we actually start lowering (and allocating spill slots for values),
@@ -525,7 +560,7 @@
     if (requireSpillSlot(V))
       reservePreviousStackSlotForValue(V, Builder);
   }
-  for (unsigned i = 0; i < SI.Bases.size(); ++i) {
+  for (unsigned i = NumVRegGCArgs; i < SI.Bases.size(); ++i) {
     reservePreviousStackSlotForValue(SI.Bases[i], Builder);
     reservePreviousStackSlotForValue(SI.Ptrs[i], Builder);
   }
@@ -558,16 +593,25 @@
   // arrays interwoven with each (lowered) base pointer immediately followed by
   // it's (lowered) derived pointer.  i.e
   // (base[0], ptr[0], base[1], ptr[1], ...)
+  // Lower first `NumVRegGCArgs` base AND derived pointers through VRegs.
+  // In future we might use more sophisticated strategy for choosing which
+  // pointers to pass via virtual registers, but for now this simple approach
+  // looks good enough. Take into account these facts:
+  //  - NumVRegGCArgs is limited by the  max number of tied registers in MI;
+  //  - We relocate (and so need tied defs for) only derived pointers;
+  //  - Quite often base and derived pointer are the same.
+  auto &SL = Builder.StatepointLowering;
   for (unsigned i = 0; i < SI.Bases.size(); ++i) {
+    bool RequireSpillSlot = (i >= NumVRegGCArgs);
     const Value *Base = SI.Bases[i];
-    lowerIncomingStatepointValue(Builder.getValue(Base),
-                                 /*RequireSpillSlot*/ true, Ops, MemRefs,
-                                 Builder);
+    lowerIncomingStatepointValue(Builder.getValue(Base), RequireSpillSlot, Ops,
+                                 MemRefs, Builder);
 
     const Value *Ptr = SI.Ptrs[i];
-    lowerIncomingStatepointValue(Builder.getValue(Ptr),
-                                 /*RequireSpillSlot*/ true, Ops, MemRefs,
-                                 Builder);
+    SDValue SDV = Builder.getValue(Ptr);
+    lowerIncomingStatepointValue(SDV, RequireSpillSlot, Ops, MemRefs, Builder);
+    if (!RequireSpillSlot && !SL.getLocation(SDV))
+      SL.setLocation(SDV, Builder.DAG.getConstant(i, SDLoc(), MVT::i64));
   }
 
   // If there are any explicit spill slots passed to the statepoint, record
@@ -595,6 +639,7 @@
   // values, while previous loops account only values with unique SDValues.
   const Instruction *StatepointInstr = SI.StatepointInstr;
   auto &SpillMap = Builder.FuncInfo.StatepointSpillMaps[StatepointInstr];
+  auto &DPtrMap = Builder.FuncInfo.DerivedPtrMap[StatepointInstr];
 
   for (const GCRelocateInst *Relocate : SI.GCRelocates) {
     const Value *V = Relocate->getDerivedPtr();
@@ -602,7 +647,12 @@
     SDValue Loc = Builder.StatepointLowering.getLocation(SDV);
 
     if (Loc.getNode()) {
-      SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex();
+      if (auto FI = dyn_cast<FrameIndexSDNode>(Loc)) {
+        SpillMap[V] = FI->getIndex();
+      } else {
+        DPtrMap[V] = cast<ConstantSDNode>(Loc)->getZExtValue();
+        SpillMap[V] = None;
+      }
     } else {
       // Record value as visited, but not spilled. This is case for allocas
       // and constants. For this values we can avoid emitting spill load while
@@ -642,10 +692,26 @@
       StatepointLowering.scheduleRelocCall(*Reloc);
 #endif
 
+  unsigned NumVRegs = 0;
+
+  if (UseRegistersForGCPointers) {
+    const unsigned MaxTiedRegs = 15U;
+
+    // Sort vectors so that elements which need relocation are laid out
+    // contiguously at the beginning of vectors.
+    // This is dictated by the SDNode implementation: due to size limit, one
+    // cannot put vector into class derived from SDNode. So we map N results
+    // of Statepoint node to the first N derived pointers.
+    NumVRegs = sortGCPtrs(SI.Bases, SI.Ptrs, SI.GCRelocates, *this);
+    NumVRegs = std::min(NumVRegs, MaxTiedRegs);
+  }
+
+  LLVM_DEBUG(dbgs() << "NumVRegs = " << NumVRegs << "\n");
+
   // Lower statepoint vmstate and gcstate arguments
   SmallVector<SDValue, 10> LoweredMetaArgs;
   SmallVector<MachineMemOperand*, 16> MemRefs;
-  lowerStatepointMetaArgs(LoweredMetaArgs, MemRefs, SI, *this);
+  lowerStatepointMetaArgs(LoweredMetaArgs, MemRefs, NumVRegs, SI, *this);
 
   // Now that we've emitted the spills, we need to update the root so that the
   // call sequence is ordered correctly.
@@ -757,10 +823,13 @@
   if (Glue.getNode())
     Ops.push_back(Glue);
 
-  // Compute return values.  Provide a glue output since we consume one as
-  // input.  This allows someone else to chain off us as needed.
-  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SmallVector<EVT, 8> NodeTys;
+  for (unsigned i = 0; i < NumVRegs; ++i)
+    NodeTys.push_back(getValue(SI.Ptrs[i]).getValueType());
+  NodeTys.push_back(MVT::Other);
+  NodeTys.push_back(MVT::Glue);
 
+  unsigned NumResults = NodeTys.size();
   MachineSDNode *StatepointMCNode =
     DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops);
   DAG.setNodeMemRefs(StatepointMCNode, MemRefs);
@@ -775,7 +844,7 @@
     SmallVector<SDValue, 8> TEOps;
 
     // Add chain
-    TEOps.push_back(SDValue(StatepointMCNode, 0));
+    TEOps.push_back(SDValue(StatepointMCNode, NumResults - 2));
 
     // Add GC transition arguments
     for (const Value *V : SI.GCTransitionArgs) {
@@ -785,7 +854,7 @@
     }
 
     // Add glue
-    TEOps.push_back(SDValue(StatepointMCNode, 1));
+    TEOps.push_back(SDValue(StatepointMCNode, NumResults - 1));
 
     SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
 
@@ -796,7 +865,12 @@
   }
 
   // Replace original call
-  DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root
+  // Call: ch,glue = CALL ...
+  // Statepoint: [gc relocates],ch,glue = STATEPOINT ...
+  unsigned NumSinkValues = SinkNode->getNumValues();
+  SDValue StatepointValues[2] = {SDValue(SinkNode, NumSinkValues - 2),
+                                 SDValue(SinkNode, NumSinkValues - 1)};
+  DAG.ReplaceAllUsesWith(CallNode, StatepointValues);
   // Remove original call node
   DAG.DeleteNode(CallNode);
 
@@ -809,7 +883,12 @@
   // previously emitted STATEPOINT value.  Unfortunately, this doesn't appear
   // to actually be possible today.
 
-  return ReturnVal;
+  // SDValue must have type to be used as MERGE_VALUES operand. Use void UNDEF
+  // as a placeholder for void functions.
+  if (!ReturnVal)
+    ReturnVal = DAG.getUNDEF(MVT::isVoid);
+  return DAG.getMergeValues({ReturnVal, SDValue(StatepointMCNode, 0)},
+                            getCurSDLoc());
 }
 
 void
@@ -879,21 +958,48 @@
   SI.NumPatchBytes = I.getNumPatchBytes();
   SI.EHPadBB = EHPadBB;
 
-  SDValue ReturnValue = LowerAsSTATEPOINT(SI);
+  SDValue Merge = LowerAsSTATEPOINT(SI);
+  assert(Merge->getOpcode() == ISD::MERGE_VALUES);
 
   // Export the result value if needed
+  const BasicBlock *BB = I.getParent();
+  std::vector<const GCRelocateInst *> RV = I.getGCRelocates();
+  bool NeedExport = llvm::any_of(
+      RV, [&BB](const GCRelocateInst *R) { return R->getParent() != BB; });
+
+  // If any of relocates or result value will be used in different basic
+  // block, we need to export them manually. Default exporting mechanism
+  // will not work here because it is based on IR Value types, and
+  // IR statepoint has different type than the actual call or relocates.
+  // It means that by default llvm will create export register of the wrong
+  // type (always i32 - TokenTy - in our case). So instead we need to create
+  // export registers manually.
+  // TODO: To eliminate this problem we can remove gc.result/gc.relocate
+  //       intrinsics completely and make statepoint call to return a tuple.
+  setValue(&I, Merge);
+  if (NeedExport) {
+    LLVMContext *Context = DAG.getContext();
+    SDNode *STV = Merge->getOperand(1).getNode();
+    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+    const DataLayout &DL = DAG.getDataLayout();
+    for (unsigned i = 0, e = STV->getNumValues() - 2; i < e; ++i) {
+      Value *DerivedPtr = SI.GCRelocates[i]->getDerivedPtr();
+      SDValue Res(STV, i);
+      Type *Ty = DerivedPtr->getType();
+      unsigned Reg = FuncInfo.CreateRegs(Ty);
+      RegsForValue RFV(*Context, TLI, DL, Reg, Ty, None);
+      SDValue Chain = DAG.getEntryNode();
+
+      RFV.getCopyToRegs(Res, DAG, getCurSDLoc(), Chain, nullptr, DerivedPtr);
+      PendingExports.push_back(Chain);
+      FuncInfo.StatepointRegs[SI.StatepointInstr].push_back(Reg);
+    }
+  }
   const GCResultInst *GCResult = I.getGCResult();
   Type *RetTy = I.getActualReturnType();
   if (!RetTy->isVoidTy() && GCResult) {
     if (GCResult->getParent() != I.getParent()) {
-      // Result value will be used in a different basic block so we need to
-      // export it now.  Default exporting mechanism will not work here because
-      // statepoint call has a different type than the actual call. It means
-      // that by default llvm will create export register of the wrong type
-      // (always i32 in our case). So instead we need to create export register
-      // with correct type manually.
-      // TODO: To eliminate this problem we can remove gc.result intrinsics
-      //       completely and make statepoint call to return a tuple.
+      SDValue ReturnValue = Merge->getOperand(0);
       unsigned Reg = FuncInfo.CreateRegs(RetTy);
       RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
                        DAG.getDataLayout(), Reg, RetTy,
@@ -903,16 +1009,7 @@
       RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr);
       PendingExports.push_back(Chain);
       FuncInfo.ValueMap[&I] = Reg;
-    } else {
-      // Result value will be used in a same basic block. Don't export it or
-      // perform any explicit register copies.
-      // We'll replace the actuall call node shortly. gc_result will grab
-      // this value.
-      setValue(&I, ReturnValue);
     }
-  } else {
-    // The token value is never used from here on, just generate a poison value
-    setValue(&I, DAG.getIntPtrConstant(-1, getCurSDLoc()));
   }
 }
 
@@ -943,7 +1040,9 @@
 
   // NB! The GC arguments are deliberately left empty.
 
-  if (SDValue ReturnVal = LowerAsSTATEPOINT(SI)) {
+  auto Ret = LowerAsSTATEPOINT(SI);
+  assert(Ret->getOpcode() == ISD::MERGE_VALUES);
+  if (SDValue ReturnVal = Ret.getOperand(0)) {
     ReturnVal = lowerRangeToAssertZExt(DAG, *Call, ReturnVal);
     setValue(Call, ReturnVal);
   }
@@ -974,17 +1073,21 @@
     assert(CopyFromReg.getNode());
     setValue(&CI, CopyFromReg);
   } else {
-    setValue(&CI, getValue(I));
+    SDValue SD = getValue(I);
+    if (SD->getOpcode() == ISD::MERGE_VALUES)
+      SD = SD->getOperand(0);
+    setValue(&CI, SD);
   }
 }
 
 void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
+  const BasicBlock *StatepointBB = Relocate.getStatepoint()->getParent();
 #ifndef NDEBUG
   // Consistency check
   // We skip this check for relocates not in the same basic block as their
   // statepoint. It would be too expensive to preserve validation info through
   // different basic blocks.
-  if (Relocate.getStatepoint()->getParent() == Relocate.getParent())
+  if (StatepointBB == Relocate.getParent())
     StatepointLowering.relocCallVisited(Relocate);
 
   auto *Ty = Relocate.getType()->getScalarType();
@@ -1007,6 +1110,35 @@
   assert(SlotIt != SpillMap.end() && "Relocating not lowered gc value");
   Optional<int> DerivedPtrLocation = SlotIt->second;
 
+  auto &DPtrMap = FuncInfo.DerivedPtrMap[Relocate.getStatepoint()];
+  auto It = DPtrMap.find(Relocate.getDerivedPtr());
+  if (It != DPtrMap.end()) {
+    // This GC ptr is lowered through VReg.
+    unsigned Index = It->second;
+    SDValue Result;
+    auto &StatepointRegs = FuncInfo.StatepointRegs[Relocate.getStatepoint()];
+    if (StatepointBB != Relocate.getParent()) {
+      // Statepoint is in different basic block. Default getValue() mechanism
+      // does not work here, so we need create CopyFromRegs manually.
+      // See comment in LowerStatepoint for details.
+      assert(Index < StatepointRegs.size());
+      unsigned InReg = StatepointRegs[Index];
+      RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
+                       DAG.getDataLayout(), InReg, DerivedPtr->getType(),
+                       None); // This is not an ABI copy.
+      SDValue Chain = DAG.getEntryNode();
+      Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
+                                   DerivedPtr);
+    } else {
+      SDNode *Statepoint = getValue(Relocate.getStatepoint()).getNode();
+      if (Statepoint->getOpcode() == ISD::MERGE_VALUES)
+        Statepoint = Statepoint->getOperand(1).getNode();
+      Result = SDValue(Statepoint, Index);
+    }
+    setValue(&Relocate, Result);
+    return;
+  }
+
   // We didn't need to spill these special cases (constants and allocas).
   // See the handling in spillIncomingValueForStatepoint for detail.
   if (!DerivedPtrLocation) {
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -1041,9 +1041,15 @@
   // Inherit previous memory operands.
   MIB.cloneMemRefs(*MI);
 
-  for (auto &MO : MI->operands()) {
+  for (unsigned i = 0; i < MI->getNumOperands(); ++i) {
+    MachineOperand &MO = MI->getOperand(i);
     if (!MO.isFI()) {
+      unsigned TiedTo = i;
+      if (MO.isReg() && MO.isTied())
+        TiedTo = MI->findTiedOperandIdx(i);
       MIB.add(MO);
+      if (TiedTo < i)
+        MIB->tieOperands(TiedTo, MIB->getNumOperands() - 1);
       continue;
     }
 
diff --git a/llvm/test/CodeGen/X86/statepoint-call-lowering.ll b/llvm/test/CodeGen/X86/statepoint-call-lowering.ll
--- a/llvm/test/CodeGen/X86/statepoint-call-lowering.ll
+++ b/llvm/test/CodeGen/X86/statepoint-call-lowering.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECK-ALL %s
+; RUN: llc -verify-machineinstrs -use-registers-for-gcptrs=true < %s | FileCheck --check-prefixes=CHECK-VREG,CHECK-ALL %s
 ; This file contains a collection of basic tests to ensure we didn't
 ; screw up normal call lowering when there are no deopt or gc arguments.
 
@@ -16,15 +17,15 @@
 declare void @varargf(i32, ...)
 
 define i1 @test_i1_return() gc "statepoint-example" {
-; CHECK-LABEL: test_i1_return:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    callq return_i1
-; CHECK-NEXT:  .Ltmp0:
-; CHECK-NEXT:    popq %rcx
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    retq
+; CHECK-ALL-LABEL: test_i1_return:
+; CHECK-ALL:       # %bb.0: # %entry
+; CHECK-ALL-NEXT:    pushq %rax
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-ALL-NEXT:    callq return_i1
+; CHECK-ALL-NEXT:  .Ltmp0:
+; CHECK-ALL-NEXT:    popq %rcx
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-ALL-NEXT:    retq
 ; This is just checking that a i1 gets lowered normally when there's no extra
 ; state arguments to the statepoint
 entry:
@@ -34,15 +35,15 @@
 }
 
 define i32 @test_i32_return() gc "statepoint-example" {
-; CHECK-LABEL: test_i32_return:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    callq return_i32
-; CHECK-NEXT:  .Ltmp1:
-; CHECK-NEXT:    popq %rcx
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    retq
+; CHECK-ALL-LABEL: test_i32_return:
+; CHECK-ALL:       # %bb.0: # %entry
+; CHECK-ALL-NEXT:    pushq %rax
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-ALL-NEXT:    callq return_i32
+; CHECK-ALL-NEXT:  .Ltmp1:
+; CHECK-ALL-NEXT:    popq %rcx
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-ALL-NEXT:    retq
 entry:
   %safepoint_token = tail call token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* @return_i32, i32 0, i32 0, i32 0, i32 0)
   %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(token %safepoint_token)
@@ -50,15 +51,15 @@
 }
 
 define i32* @test_i32ptr_return() gc "statepoint-example" {
-; CHECK-LABEL: test_i32ptr_return:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    callq return_i32ptr
-; CHECK-NEXT:  .Ltmp2:
-; CHECK-NEXT:    popq %rcx
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    retq
+; CHECK-ALL-LABEL: test_i32ptr_return:
+; CHECK-ALL:       # %bb.0: # %entry
+; CHECK-ALL-NEXT:    pushq %rax
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-ALL-NEXT:    callq return_i32ptr
+; CHECK-ALL-NEXT:  .Ltmp2:
+; CHECK-ALL-NEXT:    popq %rcx
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-ALL-NEXT:    retq
 entry:
   %safepoint_token = tail call token (i64, i32, i32* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p0i32f(i64 0, i32 0, i32* ()* @return_i32ptr, i32 0, i32 0, i32 0, i32 0)
   %call1 = call i32* @llvm.experimental.gc.result.p0i32(token %safepoint_token)
@@ -66,15 +67,15 @@
 }
 
 define float @test_float_return() gc "statepoint-example" {
-; CHECK-LABEL: test_float_return:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    callq return_float
-; CHECK-NEXT:  .Ltmp3:
-; CHECK-NEXT:    popq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    retq
+; CHECK-ALL-LABEL: test_float_return:
+; CHECK-ALL:       # %bb.0: # %entry
+; CHECK-ALL-NEXT:    pushq %rax
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-ALL-NEXT:    callq return_float
+; CHECK-ALL-NEXT:  .Ltmp3:
+; CHECK-ALL-NEXT:    popq %rax
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-ALL-NEXT:    retq
 entry:
   %safepoint_token = tail call token (i64, i32, float ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_f32f(i64 0, i32 0, float ()* @return_float, i32 0, i32 0, i32 0, i32 0)
   %call1 = call float @llvm.experimental.gc.result.f32(token %safepoint_token)
@@ -82,15 +83,15 @@
 }
 
 define %struct @test_struct_return() gc "statepoint-example" {
-; CHECK-LABEL: test_struct_return:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    callq return_struct
-; CHECK-NEXT:  .Ltmp4:
-; CHECK-NEXT:    popq %rcx
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    retq
+; CHECK-ALL-LABEL: test_struct_return:
+; CHECK-ALL:       # %bb.0: # %entry
+; CHECK-ALL-NEXT:    pushq %rax
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-ALL-NEXT:    callq return_struct
+; CHECK-ALL-NEXT:  .Ltmp4:
+; CHECK-ALL-NEXT:    popq %rcx
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-ALL-NEXT:    retq
 entry:
   %safepoint_token = tail call token (i64, i32, %struct ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_structf(i64 0, i32 0, %struct ()* @return_struct, i32 0, i32 0, i32 0, i32 0)
   %call1 = call %struct @llvm.experimental.gc.result.struct(token %safepoint_token)
@@ -108,6 +109,22 @@
 ; CHECK-NEXT:    popq %rcx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
+; CHECK-VREG-LABEL: test_relocate:
+; CHECK-VREG:       # %bb.0: # %entry
+; CHECK-VREG-NEXT:    pushq %rbx
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-VREG-NEXT:    subq $16, %rsp
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-VREG-NEXT:    .cfi_offset %rbx, -16
+; CHECK-VREG-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p)
+; CHECK-VREG-NEXT:    callq return_i1
+; CHECK-VREG-NEXT:  .Ltmp5:
+; CHECK-VREG-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx
+; CHECK-VREG-NEXT:    addq $16, %rsp
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-VREG-NEXT:    popq %rbx
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-VREG-NEXT:    retq
 ; Check that an ununsed relocate has no code-generation impact
 entry:
   %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* %a)]
@@ -117,17 +134,17 @@
 }
 
 define void @test_void_vararg() gc "statepoint-example" {
-; CHECK-LABEL: test_void_vararg:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    movl $42, %edi
-; CHECK-NEXT:    movl $43, %esi
-; CHECK-NEXT:    callq varargf
-; CHECK-NEXT:  .Ltmp6:
-; CHECK-NEXT:    popq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    retq
+; CHECK-ALL-LABEL: test_void_vararg:
+; CHECK-ALL:       # %bb.0: # %entry
+; CHECK-ALL-NEXT:    pushq %rax
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-ALL-NEXT:    movl $42, %edi
+; CHECK-ALL-NEXT:    movl $43, %esi
+; CHECK-ALL-NEXT:    callq varargf
+; CHECK-ALL-NEXT:  .Ltmp6:
+; CHECK-ALL-NEXT:    popq %rax
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-ALL-NEXT:    retq
 ; Check a statepoint wrapping a *void* returning vararg function works
 entry:
   %safepoint_token = tail call token (i64, i32, void (i32, ...)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64 0, i32 0, void (i32, ...)* @varargf, i32 2, i32 0, i32 42, i32 43, i32 0, i32 0)
@@ -137,15 +154,15 @@
 }
 
 define i1 @test_i1_return_patchable() gc "statepoint-example" {
-; CHECK-LABEL: test_i1_return_patchable:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    nopl (%rax)
-; CHECK-NEXT:  .Ltmp7:
-; CHECK-NEXT:    popq %rcx
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    retq
+; CHECK-ALL-LABEL: test_i1_return_patchable:
+; CHECK-ALL:       # %bb.0: # %entry
+; CHECK-ALL-NEXT:    pushq %rax
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-ALL-NEXT:    nopl (%rax)
+; CHECK-ALL-NEXT:  .Ltmp7:
+; CHECK-ALL-NEXT:    popq %rcx
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-ALL-NEXT:    retq
 ; A patchable variant of test_i1_return
 entry:
   %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 3, i1 ()*null, i32 0, i32 0, i32 0, i32 0)
@@ -188,6 +205,44 @@
 ; CHECK-NEXT:    popq %rbp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
+; CHECK-VREG-LABEL: test_cross_bb:
+; CHECK-VREG:       # %bb.0: # %entry
+; CHECK-VREG-NEXT:    pushq %rbp
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-VREG-NEXT:    pushq %r14
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-VREG-NEXT:    pushq %rbx
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-VREG-NEXT:    subq $16, %rsp
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-VREG-NEXT:    .cfi_offset %rbx, -32
+; CHECK-VREG-NEXT:    .cfi_offset %r14, -24
+; CHECK-VREG-NEXT:    .cfi_offset %rbp, -16
+; CHECK-VREG-NEXT:    movl %esi, %ebp
+; CHECK-VREG-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p)
+; CHECK-VREG-NEXT:    callq return_i1
+; CHECK-VREG-NEXT:  .Ltmp8:
+; CHECK-VREG-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx
+; CHECK-VREG-NEXT:    testb $1, %bpl
+; CHECK-VREG-NEXT:    je .LBB8_2
+; CHECK-VREG-NEXT:  # %bb.1: # %left
+; CHECK-VREG-NEXT:    movl %eax, %r14d
+; CHECK-VREG-NEXT:    movq %rbx, %rdi
+; CHECK-VREG-NEXT:    callq consume
+; CHECK-VREG-NEXT:    movl %r14d, %eax
+; CHECK-VREG-NEXT:    jmp .LBB8_3
+; CHECK-VREG-NEXT:  .LBB8_2: # %right
+; CHECK-VREG-NEXT:    movb $1, %al
+; CHECK-VREG-NEXT:  .LBB8_3: # %right
+; CHECK-VREG-NEXT:    addq $16, %rsp
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-VREG-NEXT:    popq %rbx
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-VREG-NEXT:    popq %r14
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-VREG-NEXT:    popq %rbp
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-VREG-NEXT:    retq
 entry:
   %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0) ["gc-live" (i32 addrspace(1)* %a)]
   br i1 %external_cond, label %left, label %right
@@ -207,31 +262,31 @@
 declare void @consume_attributes(i32, i8* nest, i32, %struct2* byval)
 
 define void @test_attributes(%struct2* byval %s) gc "statepoint-example" {
-; CHECK-LABEL: test_attributes:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    subq $8, %rsp
-; CHECK-NEXT:    .cfi_adjust_cfa_offset 8
-; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
-; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
-; CHECK-NEXT:    movl $42, %edi
-; CHECK-NEXT:    xorl %r10d, %r10d
-; CHECK-NEXT:    movl $17, %esi
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_adjust_cfa_offset 8
-; CHECK-NEXT:    pushq %rdx
-; CHECK-NEXT:    .cfi_adjust_cfa_offset 8
-; CHECK-NEXT:    pushq %rcx
-; CHECK-NEXT:    .cfi_adjust_cfa_offset 8
-; CHECK-NEXT:    callq consume_attributes
-; CHECK-NEXT:  .Ltmp9:
-; CHECK-NEXT:    addq $32, %rsp
-; CHECK-NEXT:    .cfi_adjust_cfa_offset -32
-; CHECK-NEXT:    popq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    retq
+; CHECK-ALL-LABEL: test_attributes:
+; CHECK-ALL:       # %bb.0: # %entry
+; CHECK-ALL-NEXT:    pushq %rax
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-ALL-NEXT:    subq $8, %rsp
+; CHECK-ALL-NEXT:    .cfi_adjust_cfa_offset 8
+; CHECK-ALL-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; CHECK-ALL-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; CHECK-ALL-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; CHECK-ALL-NEXT:    movl $42, %edi
+; CHECK-ALL-NEXT:    xorl %r10d, %r10d
+; CHECK-ALL-NEXT:    movl $17, %esi
+; CHECK-ALL-NEXT:    pushq %rax
+; CHECK-ALL-NEXT:    .cfi_adjust_cfa_offset 8
+; CHECK-ALL-NEXT:    pushq %rdx
+; CHECK-ALL-NEXT:    .cfi_adjust_cfa_offset 8
+; CHECK-ALL-NEXT:    pushq %rcx
+; CHECK-ALL-NEXT:    .cfi_adjust_cfa_offset 8
+; CHECK-ALL-NEXT:    callq consume_attributes
+; CHECK-ALL-NEXT:  .Ltmp9:
+; CHECK-ALL-NEXT:    addq $32, %rsp
+; CHECK-ALL-NEXT:    .cfi_adjust_cfa_offset -32
+; CHECK-ALL-NEXT:    popq %rax
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-ALL-NEXT:    retq
 entry:
 ; Check that arguments with attributes are lowered correctly.
 ; We call a function that has a nest argument and a byval argument.
diff --git a/llvm/test/CodeGen/X86/statepoint-duplicates-export.ll b/llvm/test/CodeGen/X86/statepoint-duplicates-export.ll
--- a/llvm/test/CodeGen/X86/statepoint-duplicates-export.ll
+++ b/llvm/test/CodeGen/X86/statepoint-duplicates-export.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc  -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc  -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECK-ALL %s
+; RUN: llc  -verify-machineinstrs -use-registers-for-gcptrs=true < %s | FileCheck --check-prefixes=CHECK-VREG,CHECK-ALL %s
 
 ; Check that we can export values of "duplicated" gc.relocates without a crash
 ; "duplicate" here means maps to same SDValue.  We previously had an
@@ -12,18 +13,18 @@
 declare void @func()
 
 define i1 @test() gc "statepoint-example" {
-; CHECK-LABEL: test:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    callq func
-; CHECK-NEXT:  .Ltmp0:
-; CHECK-NEXT:    callq func
-; CHECK-NEXT:  .Ltmp1:
-; CHECK-NEXT:    movb $1, %al
-; CHECK-NEXT:    popq %rcx
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    retq
+; CHECK-ALL-LABEL: test:
+; CHECK-ALL:       # %bb.0: # %entry
+; CHECK-ALL-NEXT:    pushq %rax
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-ALL-NEXT:    callq func
+; CHECK-ALL-NEXT:  .Ltmp0:
+; CHECK-ALL-NEXT:    callq func
+; CHECK-ALL-NEXT:  .Ltmp1:
+; CHECK-ALL-NEXT:    movb $1, %al
+; CHECK-ALL-NEXT:    popq %rcx
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-ALL-NEXT:    retq
 entry:
   %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* null, i32 addrspace(1)* null)
   %base = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 7, i32 7)
diff --git a/llvm/test/CodeGen/X86/statepoint-invoke.ll b/llvm/test/CodeGen/X86/statepoint-invoke.ll
--- a/llvm/test/CodeGen/X86/statepoint-invoke.ll
+++ b/llvm/test/CodeGen/X86/statepoint-invoke.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -verify-machineinstrs < %s 2>&1 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-ALL %s
+; RUN: llc -verify-machineinstrs -use-registers-for-gcptrs=true < %s | FileCheck --check-prefixes=CHECK-VREG,CHECK-ALL %s
 
 target triple = "x86_64-pc-linux-gnu"
 
@@ -31,6 +32,41 @@
 ; CHECK-NEXT:    addq $24, %rsp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
+; CHECK-VREG-LABEL: test_basic:
+; CHECK-VREG:       # %bb.0: # %entry
+; CHECK-VREG-NEXT:    pushq %r14
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-VREG-NEXT:    pushq %rbx
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-VREG-NEXT:    subq $24, %rsp
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-VREG-NEXT:    .cfi_offset %rbx, -24
+; CHECK-VREG-NEXT:    .cfi_offset %r14, -16
+; CHECK-VREG-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p)
+; CHECK-VREG-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p)
+; CHECK-VREG-NEXT:  .Ltmp0:
+; CHECK-VREG-NEXT:    callq some_call
+; CHECK-VREG-NEXT:  .Ltmp3:
+; CHECK-VREG-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx
+; CHECK-VREG-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14
+; CHECK-VREG-NEXT:  .Ltmp1:
+; CHECK-VREG-NEXT:  # %bb.1: # %normal_return
+; CHECK-VREG-NEXT:    movq %rbx, %rax
+; CHECK-VREG-NEXT:  .LBB0_2: # %normal_return
+; CHECK-VREG-NEXT:    addq $24, %rsp
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-VREG-NEXT:    popq %rbx
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-VREG-NEXT:    popq %r14
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-VREG-NEXT:    retq
+; CHECK-VREG-NEXT:  .LBB0_3: # %exceptional_return
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-VREG-NEXT:  .Ltmp2:
+; CHECK-VREG-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx
+; CHECK-VREG-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14
+; CHECK-VREG-NEXT:    movq %r14, %rax
+; CHECK-VREG-NEXT:    jmp .LBB0_2
                                      i64 addrspace(1)* %obj1)
 gc "statepoint-example" personality i32 ()* @"personality_function" {
 entry:
@@ -52,11 +88,11 @@
   %obj1.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 1, i32 1)
   ret i64 addrspace(1)* %obj1.relocated1
 }
-; CHECK-LABEL: GCC_except_table{{[0-9]+}}:
-; CHECK: .uleb128  .Ltmp{{[0-9]+}}-.Ltmp{{[0-9]+}}
-; CHECK: .uleb128  .Ltmp{{[0-9]+}}-.Lfunc_begin{{[0-9]+}}
-; CHECK: .byte  0
-; CHECK: .p2align 4
+; CHECK-ALL-LABEL: GCC_except_table{{[0-9]+}}:
+; CHECK-ALL: .uleb128  .Ltmp{{[0-9]+}}-.Ltmp{{[0-9]+}}
+; CHECK-ALL: .uleb128  .Ltmp{{[0-9]+}}-.Lfunc_begin{{[0-9]+}}
+; CHECK-ALL: .byte  0
+; CHECK-ALL: .p2align 4
 
 define i64 addrspace(1)* @test_result(i64 addrspace(1)* %obj,
 ; CHECK-LABEL: test_result:
@@ -79,6 +115,31 @@
 ; CHECK-NEXT:    popq %rcx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
+; CHECK-VREG-LABEL: test_result:
+; CHECK-VREG:       # %bb.0: # %entry
+; CHECK-VREG-NEXT:    pushq %rbx
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-VREG-NEXT:    subq $16, %rsp
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-VREG-NEXT:    .cfi_offset %rbx, -16
+; CHECK-VREG-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p)
+; CHECK-VREG-NEXT:  .Ltmp4:
+; CHECK-VREG-NEXT:    callq some_other_call
+; CHECK-VREG-NEXT:  .Ltmp7:
+; CHECK-VREG-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx
+; CHECK-VREG-NEXT:  .Ltmp5:
+; CHECK-VREG-NEXT:  .LBB1_1: # %normal_return
+; CHECK-VREG-NEXT:    addq $16, %rsp
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-VREG-NEXT:    popq %rbx
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-VREG-NEXT:    retq
+; CHECK-VREG-NEXT:  .LBB1_2: # %exceptional_return
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-VREG-NEXT:  .Ltmp6:
+; CHECK-VREG-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx
+; CHECK-VREG-NEXT:    movq %rbx, %rax
+; CHECK-VREG-NEXT:    jmp .LBB1_1
                                       i64 addrspace(1)* %obj1)
   gc "statepoint-example" personality i32 ()* @personality_function {
 entry:
@@ -95,11 +156,11 @@
   %obj.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 0, i32 0)
   ret i64 addrspace(1)* %obj.relocated
 }
-; CHECK-LABEL: GCC_except_table{{[0-9]+}}:
-; CHECK: .uleb128 .Ltmp{{[0-9]+}}-.Ltmp{{[0-9]+}}
-; CHECK: .uleb128 .Ltmp{{[0-9]+}}-.Lfunc_begin{{[0-9]+}}
-; CHECK: .byte 0
-; CHECK: .p2align 4
+; CHECK-ALL-LABEL: GCC_except_table{{[0-9]+}}:
+; CHECK-ALL: .uleb128 .Ltmp{{[0-9]+}}-.Ltmp{{[0-9]+}}
+; CHECK-ALL: .uleb128 .Ltmp{{[0-9]+}}-.Lfunc_begin{{[0-9]+}}
+; CHECK-ALL: .byte 0
+; CHECK-ALL: .p2align 4
 
 define i64 addrspace(1)* @test_same_val(i1 %cond, i64 addrspace(1)* %val1, i64 addrspace(1)* %val2, i64 addrspace(1)* %val3)
 ; CHECK-LABEL: test_same_val:
@@ -153,6 +214,80 @@
 ; CHECK-NEXT:  .Ltmp13:
 ; CHECK-NEXT:    movq (%rsp), %rax
 ; CHECK-NEXT:    jmp .LBB2_6
+; CHECK-VREG-LABEL: test_same_val:
+; CHECK-VREG:       # %bb.0: # %entry
+; CHECK-VREG-NEXT:    pushq %rbp
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-VREG-NEXT:    pushq %r15
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-VREG-NEXT:    pushq %r14
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-VREG-NEXT:    pushq %rbx
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-VREG-NEXT:    subq $24, %rsp
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-VREG-NEXT:    .cfi_offset %rbx, -40
+; CHECK-VREG-NEXT:    .cfi_offset %r14, -32
+; CHECK-VREG-NEXT:    .cfi_offset %r15, -24
+; CHECK-VREG-NEXT:    .cfi_offset %rbp, -16
+; CHECK-VREG-NEXT:    movq %rdx, %rbx
+; CHECK-VREG-NEXT:    movq %rsi, %rbp
+; CHECK-VREG-NEXT:    movl %edi, %r14d
+; CHECK-VREG-NEXT:    testb $1, %r14b
+; CHECK-VREG-NEXT:    je .LBB2_2
+; CHECK-VREG-NEXT:  # %bb.1: # %left
+; CHECK-VREG-NEXT:  .Ltmp11:
+; CHECK-VREG-NEXT:    movq %rbp, %rdi
+; CHECK-VREG-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p)
+; CHECK-VREG-NEXT:    movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p)
+; CHECK-VREG-NEXT:    callq some_call
+; CHECK-VREG-NEXT:  .Ltmp14:
+; CHECK-VREG-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp
+; CHECK-VREG-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx
+; CHECK-VREG-NEXT:  .Ltmp12:
+; CHECK-VREG-NEXT:    jmp .LBB2_4
+; CHECK-VREG-NEXT:  .LBB2_2: # %right
+; CHECK-VREG-NEXT:    movq %rcx, %r15
+; CHECK-VREG-NEXT:  .Ltmp8:
+; CHECK-VREG-NEXT:    movq %rbp, %rdi
+; CHECK-VREG-NEXT:    movq %r15, {{[-0-9]+}}(%r{{[sb]}}p)
+; CHECK-VREG-NEXT:    movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p)
+; CHECK-VREG-NEXT:    callq some_call
+; CHECK-VREG-NEXT:  .Ltmp15:
+; CHECK-VREG-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx
+; CHECK-VREG-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15
+; CHECK-VREG-NEXT:  .Ltmp9:
+; CHECK-VREG-NEXT:  # %bb.3: # %right.relocs
+; CHECK-VREG-NEXT:    movq %r15, %rbp
+; CHECK-VREG-NEXT:  .LBB2_4: # %normal_return
+; CHECK-VREG-NEXT:    testb $1, %r14b
+; CHECK-VREG-NEXT:    cmoveq %rbx, %rbp
+; CHECK-VREG-NEXT:  .LBB2_5: # %normal_return
+; CHECK-VREG-NEXT:    movq %rbp, %rax
+; CHECK-VREG-NEXT:  .LBB2_6: # %normal_return
+; CHECK-VREG-NEXT:    addq $24, %rsp
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-VREG-NEXT:    popq %rbx
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-VREG-NEXT:    popq %r14
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-VREG-NEXT:    popq %r15
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-VREG-NEXT:    popq %rbp
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-VREG-NEXT:    retq
+; CHECK-VREG-NEXT:  .LBB2_8: # %exceptional_return.right
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-VREG-NEXT:  .Ltmp10:
+; CHECK-VREG-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx
+; CHECK-VREG-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r15
+; CHECK-VREG-NEXT:    movq %rbx, %rax
+; CHECK-VREG-NEXT:    jmp .LBB2_6
+; CHECK-VREG-NEXT:  .LBB2_7: # %exceptional_return.left
+; CHECK-VREG-NEXT:  .Ltmp13:
+; CHECK-VREG-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp
+; CHECK-VREG-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx
+; CHECK-VREG-NEXT:    jmp .LBB2_5
   gc "statepoint-example" personality i32 ()* @"personality_function" {
 entry:
   br i1 %cond, label %left, label %right
@@ -195,23 +330,23 @@
 }
 
 define i64 addrspace(1)* @test_null_undef(i64 addrspace(1)* %val1)
-; CHECK-LABEL: test_null_undef:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:  .Ltmp16:
-; CHECK-NEXT:    callq some_call
-; CHECK-NEXT:  .Ltmp19:
-; CHECK-NEXT:  .Ltmp17:
-; CHECK-NEXT:  .LBB3_1: # %normal_return
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    popq %rcx
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    retq
-; CHECK-NEXT:  .LBB3_2: # %exceptional_return
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:  .Ltmp18:
-; CHECK-NEXT:    jmp .LBB3_1
+; CHECK-ALL-LABEL: test_null_undef:
+; CHECK-ALL:       # %bb.0: # %entry
+; CHECK-ALL-NEXT:    pushq %rax
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-ALL-NEXT:  .Ltmp16:
+; CHECK-ALL-NEXT:    callq some_call
+; CHECK-ALL-NEXT:  .Ltmp19:
+; CHECK-ALL-NEXT:  .Ltmp17:
+; CHECK-ALL-NEXT:  .LBB3_1: # %normal_return
+; CHECK-ALL-NEXT:    xorl %eax, %eax
+; CHECK-ALL-NEXT:    popq %rcx
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-ALL-NEXT:    retq
+; CHECK-ALL-NEXT:  .LBB3_2: # %exceptional_return
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-ALL-NEXT:  .Ltmp18:
+; CHECK-ALL-NEXT:    jmp .LBB3_1
        gc "statepoint-example" personality i32 ()* @"personality_function" {
 entry:
   %sp1 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 0) ["gc-live"(i64 addrspace(1)* null, i64 addrspace(1)* undef)]
@@ -231,26 +366,26 @@
 }
 
 define i64 addrspace(1)* @test_alloca_and_const(i64 addrspace(1)* %val1)
-; CHECK-LABEL: test_alloca_and_const:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:  .Ltmp20:
-; CHECK-NEXT:    callq some_call
-; CHECK-NEXT:  .Ltmp23:
-; CHECK-NEXT:  .Ltmp21:
-; CHECK-NEXT:  # %bb.1: # %normal_return
-; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT:    popq %rcx
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    retq
-; CHECK-NEXT:  .LBB4_2: # %exceptional_return
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:  .Ltmp22:
-; CHECK-NEXT:    movl $15, %eax
-; CHECK-NEXT:    popq %rcx
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    retq
+; CHECK-ALL-LABEL: test_alloca_and_const:
+; CHECK-ALL:       # %bb.0: # %entry
+; CHECK-ALL-NEXT:    pushq %rax
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-ALL-NEXT:  .Ltmp20:
+; CHECK-ALL-NEXT:    callq some_call
+; CHECK-ALL-NEXT:  .Ltmp23:
+; CHECK-ALL-NEXT:  .Ltmp21:
+; CHECK-ALL-NEXT:  # %bb.1: # %normal_return
+; CHECK-ALL-NEXT:    leaq {{[0-9]+}}(%rsp), %rax
+; CHECK-ALL-NEXT:    popq %rcx
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-ALL-NEXT:    retq
+; CHECK-ALL-NEXT:  .LBB4_2: # %exceptional_return
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-ALL-NEXT:  .Ltmp22:
+; CHECK-ALL-NEXT:    movl $15, %eax
+; CHECK-ALL-NEXT:    popq %rcx
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-ALL-NEXT:    retq
        gc "statepoint-example" personality i32 ()* @"personality_function" {
 entry:
   %a = alloca i32
diff --git a/llvm/test/CodeGen/X86/statepoint-no-extra-const.ll b/llvm/test/CodeGen/X86/statepoint-no-extra-const.ll
--- a/llvm/test/CodeGen/X86/statepoint-no-extra-const.ll
+++ b/llvm/test/CodeGen/X86/statepoint-no-extra-const.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown -use-registers-for-gcptrs=true | FileCheck --check-prefix=CHECK-VREG %s
 
 define i8 addrspace(1)* @no_extra_const(i8 addrspace(1)* %obj) gc "statepoint-example" {
 ; CHECK-LABEL:   no_extra_const:
@@ -13,6 +14,23 @@
 ; CHECK-NEXT:    popq	%rcx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq
+; CHECK-VREG-LABEL: no_extra_const:
+; CHECK-VREG:       # %bb.0: # %entry
+; CHECK-VREG-NEXT:    pushq %rbx
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-VREG-NEXT:    subq $16, %rsp
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-VREG-NEXT:    .cfi_offset %rbx, -16
+; CHECK-VREG-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p)
+; CHECK-VREG-NEXT:    nopl 8(%rax)
+; CHECK-VREG-NEXT:  .Ltmp0:
+; CHECK-VREG-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx
+; CHECK-VREG-NEXT:    movq %rbx, %rax
+; CHECK-VREG-NEXT:    addq $16, %rsp
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-VREG-NEXT:    popq %rbx
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-VREG-NEXT:    retq
 entry:
   %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 4, void ()* null, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %obj)
   %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
diff --git a/llvm/test/CodeGen/X86/statepoint-regs.ll b/llvm/test/CodeGen/X86/statepoint-regs.ll
--- a/llvm/test/CodeGen/X86/statepoint-regs.ll
+++ b/llvm/test/CodeGen/X86/statepoint-regs.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -verify-machineinstrs -O3 -use-registers-for-deopt-values -restrict-statepoint-remat=true < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -O3 -use-registers-for-deopt-values -restrict-statepoint-remat=true < %s | FileCheck --check-prefixes=CHECK,CHECK-SPILL %s
+; RUN: llc -verify-machineinstrs -O3 -use-registers-for-deopt-values -restrict-statepoint-remat=true -use-registers-for-gcptrs=true < %s | FileCheck --check-prefixes=CHECK,CHECK-VREG %s
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.11.0"
 
@@ -97,23 +98,49 @@
 
 ; A gc-value must be spilled even if it is also a deopt value.
 define  i32 addrspace(1)* @test5(i32 %a, i32 addrspace(1)* %p) gc "statepoint-example" {
-; CHECK-LABEL: test5:
-; CHECK:       ## %bb.0: ## %entry
-; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    subq $16, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    .cfi_offset %rbx, -16
-; CHECK-NEXT:    movl %edi, %ebx
-; CHECK-NEXT:    movq %rsi, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    callq _bar
-; CHECK-NEXT:  Ltmp5:
-; CHECK-NEXT:    callq _bar
-; CHECK-NEXT:  Ltmp6:
-; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT:    addq $16, %rsp
-; CHECK-NEXT:    popq %rbx
-; CHECK-NEXT:    retq
+; CHECK-SPILL-LABEL: test5:
+; CHECK-SPILL:       ## %bb.0: ## %entry
+; CHECK-SPILL-NEXT:    pushq %rbx
+; CHECK-SPILL-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SPILL-NEXT:    subq $16, %rsp
+; CHECK-SPILL-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SPILL-NEXT:    .cfi_offset %rbx, -16
+; CHECK-SPILL-NEXT:    movl %edi, %ebx
+; CHECK-SPILL-NEXT:    movq %rsi, {{[0-9]+}}(%rsp)
+; CHECK-SPILL-NEXT:    callq _bar
+; CHECK-SPILL-NEXT:  Ltmp5:
+; CHECK-SPILL-NEXT:    callq _bar
+; CHECK-SPILL-NEXT:  Ltmp6:
+; CHECK-SPILL-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; CHECK-SPILL-NEXT:    addq $16, %rsp
+; CHECK-SPILL-NEXT:    popq %rbx
+; CHECK-SPILL-NEXT:    retq
+;
+; CHECK-VREG-LABEL: test5:
+; CHECK-VREG:       ## %bb.0: ## %entry
+; CHECK-VREG-NEXT:    pushq %rbp
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-VREG-NEXT:    pushq %rbx
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-VREG-NEXT:    pushq %rax
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-VREG-NEXT:    .cfi_offset %rbx, -24
+; CHECK-VREG-NEXT:    .cfi_offset %rbp, -16
+; CHECK-VREG-NEXT:    movq %rsi, (%rsp)
+; CHECK-VREG-NEXT:    movl %edi, %ebp
+; CHECK-VREG-NEXT:    callq _bar
+; CHECK-VREG-NEXT:  Ltmp5:
+; CHECK-VREG-NEXT:    movq (%rsp), %rbx
+; CHECK-VREG-NEXT:    movq %rbx, (%rsp)
+; CHECK-VREG-NEXT:    callq _bar
+; CHECK-VREG-NEXT:  Ltmp6:
+; CHECK-VREG-NEXT:    movq (%rsp), %rbx
+; CHECK-VREG-NEXT:    movq %rbx, %rax
+; CHECK-VREG-NEXT:    addq $8, %rsp
+; CHECK-VREG-NEXT:    popq %rbx
+; CHECK-VREG-NEXT:    popq %rbp
+; CHECK-VREG-NEXT:    retq
+
 entry:
   %token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %p, i32 addrspace(1)* %p) ["deopt"(i32 %a)]
   %p2 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token,  i32 8, i32 8)
@@ -672,30 +699,27 @@
   ret void
 }
 
-define i32 addrspace(1)*  @test_fpconst_deopt(i32 addrspace(1)* %in) gc "statepoint-example" {
+define void @test_fpconst_deopt(i32 addrspace(1)* %in) gc "statepoint-example" {
 ; CHECK-LABEL: test_fpconst_deopt:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    movq %rdi, (%rsp)
 ; CHECK-NEXT:    nopl 8(%rax,%rax)
 ; CHECK-NEXT:  Ltmp18:
-; CHECK-NEXT:    movq (%rsp), %rax
-; CHECK-NEXT:    popq %rcx
+; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    retq
-    %statepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2, i32 5, void ()* nonnull @bar, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %in) ["deopt" (
+    %statepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2, i32 5, void ()* nonnull @bar, i32 0, i32 0, i32 0, i32 0) ["deopt" (
     float 0x40421A1CA0000000, float 0x40459A1CA0000000, float 0x40401A1CA0000000, float 0x40479A1CA0000000, float 0x403C343940000000,
     float 0x403E343940000000, float 0x40469A1CA0000000, float 0x40489A1CA0000000, float 0x404A9A1CA0000000, float 0x40499A1CA0000000,
     float 0xC05FCD2F20000000, float 0xC05C0D2F20000000, float 0xC060269780000000, float 0xC05B8D2F20000000, float 0xC060669780000000,
     float 0xC05B0D2F20000000, float 0xC060A69780000000, float 0xC05A8D2F20000000, float 0xC060E69780000000, float 0x40439A1CA0000000)]
-    %out = call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %statepoint_token, i32 7, i32 7)
-    ret i32 addrspace(1)* %out
+    ret void
 }
 
 ; CHECK-LABEL: __LLVM_StackMaps:
 ; CHECK: .long   Ltmp18-_test_fpconst_deopt
 ; CHECK-NEXT: .short	0
-; CHECK-NEXT: .short	25
+; CHECK-NEXT: .short	23
 ; CHECK-NEXT: .byte	4
 ; CHECK-NEXT: .byte	0
 ; CHECK-NEXT: .short	8
diff --git a/llvm/test/CodeGen/X86/statepoint-uniqueing.ll b/llvm/test/CodeGen/X86/statepoint-uniqueing.ll
--- a/llvm/test/CodeGen/X86/statepoint-uniqueing.ll
+++ b/llvm/test/CodeGen/X86/statepoint-uniqueing.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK-SPILL,CHECK %s
+; RUN: llc -verify-machineinstrs -use-registers-for-gcptrs=true < %s | FileCheck --check-prefixes=CHECK-VREG,CHECK %s
 
 target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc-linux-gnu"
@@ -12,20 +13,41 @@
 
 ;; Two gc.relocates of the same input, should require only a single spill/fill
 define void @test_gcrelocate_uniqueing(i32 addrspace(1)* %ptr) gc "statepoint-example" {
-; CHECK-LABEL: test_gcrelocate_uniqueing:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    movq %rdi, (%rsp)
-; CHECK-NEXT:    callq f
-; CHECK-NEXT:  .Ltmp0:
-; CHECK-NEXT:    movq (%rsp), %rdi
-; CHECK-NEXT:    movq %rdi, %rsi
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    callq use
-; CHECK-NEXT:    popq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    retq
+; CHECK-ALL-LABEL: test_gcrelocate_uniqueing:
+; CHECK-ALL:       # %bb.0:
+; CHECK-ALL-NEXT:    pushq %rax
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-ALL-NEXT:    movq %rdi, (%rsp)
+; CHECK-ALL-NEXT:    callq f
+; CHECK-ALL-NEXT:  .Ltmp0:
+; CHECK-ALL-NEXT:    movq (%rsp), %rdi
+; CHECK-ALL-NEXT:    movq %rdi, %rsi
+; CHECK-ALL-NEXT:    xorl %eax, %eax
+; CHECK-ALL-NEXT:    callq use
+; CHECK-ALL-NEXT:    popq %rax
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-ALL-NEXT:    retq
+
+; CHECK-VREG-LABEL: test_gcrelocate_uniqueing:
+; CHECK-VREG:       # %bb.0:
+; CHECK-VREG-NEXT:    pushq %rbx
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-VREG-NEXT:    subq $16, %rsp
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-VREG-NEXT:    .cfi_offset %rbx, -16
+; CHECK-VREG-NEXT:    movq %rdi, 8(%rsp)
+; CHECK-VREG-NEXT:    callq f
+; CHECK-VREG-NEXT:  .Ltmp0:
+; CHECK-VREG-NEXT:    movq 8(%rsp), %rbx
+; CHECK-VREG-NEXT:    movq %rbx, %rdi
+; CHECK-VREG-NEXT:    movq %rbx, %rsi
+; CHECK-VREG-NEXT:    xorl %eax, %eax
+; CHECK-VREG-NEXT:    callq use
+; CHECK-VREG-NEXT:    addq $16, %rsp
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-VREG-NEXT:    popq %rbx
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-VREG-NEXT:    retq
   %tok = tail call token (i64, i32, void ()*, i32, i32, ...)
       @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %ptr, i32 addrspace(1)* %ptr) ["deopt" (i32 addrspace(1)* %ptr, i32 undef)]
   %a = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %tok, i32 7, i32 7)
@@ -36,20 +58,41 @@
 
 ;; Two gc.relocates of a bitcasted pointer should only require a single spill/fill
 define void @test_gcptr_uniqueing(i32 addrspace(1)* %ptr) gc "statepoint-example" {
-; CHECK-LABEL: test_gcptr_uniqueing:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    movq %rdi, (%rsp)
-; CHECK-NEXT:    callq f
-; CHECK-NEXT:  .Ltmp1:
-; CHECK-NEXT:    movq (%rsp), %rdi
-; CHECK-NEXT:    movq %rdi, %rsi
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    callq use
-; CHECK-NEXT:    popq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    retq
+; CHECK-ALL-LABEL: test_gcptr_uniqueing:
+; CHECK-ALL:       # %bb.0:
+; CHECK-ALL-NEXT:    pushq %rax
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-ALL-NEXT:    movq %rdi, (%rsp)
+; CHECK-ALL-NEXT:    callq f
+; CHECK-ALL-NEXT:  .Ltmp1:
+; CHECK-ALL-NEXT:    movq (%rsp), %rdi
+; CHECK-ALL-NEXT:    movq %rdi, %rsi
+; CHECK-ALL-NEXT:    xorl %eax, %eax
+; CHECK-ALL-NEXT:    callq use
+; CHECK-ALL-NEXT:    popq %rax
+; CHECK-ALL-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-ALL-NEXT:    retq
+
+; CHECK-VREG-LABEL: test_gcptr_uniqueing:
+; CHECK-VREG:       # %bb.0:
+; CHECK-VREG-NEXT:    pushq %rbx
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-VREG-NEXT:    subq $16, %rsp
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-VREG-NEXT:    .cfi_offset %rbx, -16
+; CHECK-VREG-NEXT:    movq %rdi, 8(%rsp)
+; CHECK-VREG-NEXT:    callq f
+; CHECK-VREG-NEXT:  .Ltmp1:
+; CHECK-VREG-NEXT:    movq 8(%rsp), %rbx
+; CHECK-VREG-NEXT:    movq %rbx, %rdi
+; CHECK-VREG-NEXT:    movq %rbx, %rsi
+; CHECK-VREG-NEXT:    xorl %eax, %eax
+; CHECK-VREG-NEXT:    callq use
+; CHECK-VREG-NEXT:    addq $16, %rsp
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-VREG-NEXT:    popq %rbx
+; CHECK-VREG-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-VREG-NEXT:    retq
   %ptr2 = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)*
   %tok = tail call token (i64, i32, void ()*, i32, i32, ...)
       @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %ptr, i8 addrspace(1)* %ptr2) ["deopt" (i32 addrspace(1)* %ptr, i32 undef)]