Index: lib/CodeGen/SelectionDAG/StatepointLowering.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -113,84 +113,137 @@
   llvm_unreachable("infinite loop?");
 }
 
+/// Utility function for reservePreviousStackSlotForValue. Tries to find
+/// stack slot index to which we have spilled value for previous statepoints.
+/// LookUpDepth specifies maximum DFS depth this function is allowed to look.
+static Optional<int> findPreviousSpillSlot(const Value *Val,
+                                           SelectionDAGBuilder &Builder,
+                                           int LookUpDepth) {
+  // Can not look any futher - give up now
+  if (LookUpDepth <= 0)
+    return Optional<int>();
+
+  // Spill location is known for gc relocates
+  if (isGCRelocate(Val)) {
+    GCRelocateOperands RelocOps(cast<Instruction>(Val));
+
+    FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap =
+        Builder.FuncInfo.StatepointRelocatedValues[RelocOps.getStatepoint()];
+
+    auto It = SpillMap.find(RelocOps.getDerivedPtr());
+    if (It == SpillMap.end())
+      return Optional<int>();
+
+    return It->second;
+  }
+
+  // Look through cast instructions. Most of them are nops.
+  if (const CastInst *Cast = dyn_cast<CastInst>(Val)) {
+    return findPreviousSpillSlot(Cast->getOperand(0), Builder, LookUpDepth - 1);
+  }
+
+  // Look through phi nodes
+  // All incoming values should have same known stack slot, otherwise result
+  // is unknown.
+  if (const PHINode *Phi = dyn_cast<PHINode>(Val)) {
+    Optional<int> MergedResult = None;
+
+    for (auto &IncomingValue : Phi->incoming_values()) {
+      Optional<int> SpillSlot =
+          findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1);
+      if (!SpillSlot.hasValue())
+        return Optional<int>();
+
+      if (MergedResult.hasValue() && *MergedResult != *SpillSlot)
+        return Optional<int>();
+
+      MergedResult = SpillSlot;
+    }
+    return MergedResult;
+  }
+
+  // TODO: We can do better for PHI nodes. In cases like this:
+  //   ptr = phi(relocated_pointer, not_relocated_pointer)
+  //   statepoint(ptr)
+  // We will return that stack slot for ptr is unknown. And later we might
+  // assign different stack slots for ptr and relocated_pointer. This limits
+  // llvm's ability to remove redundant stores.
+  // Unfortunatelly it's hard to accomplish in current infrastructure.
+  // We use this function to eliminate spill store complitelly, while
+  // in example we still need to emit store, but instead of any location
+  // we need to use special "preferred" location.
+
+  // TODO: handle simple updates.  If a value is modified and the original
+  // value is no longer live, it would be nice to put the modified value in the
+  // same slot.  This allows folding of the memory accesses for some
+  // instructions types (like an increment).
+  //   statepoint (i)
+  //   i1 = i+1
+  //   statepoint (i1)
+  // However we need to be carefull for cases like this:
+  //   statepoint(i)
+  //   i1 = i+1
+  //   statepoint(i, i1)
+  // Here we want to reserve spill slot for 'i', but not for 'i+1'. If we just
+  // put handling of simple modifications in this function like it's done
+  // for bitcasts we might end up reserving i's slot for 'i+1' because order in
+  // which we visit values is unspecified.
+
+  // Don't know any information about this instruction
+  return Optional<int>();
+}
+
 /// Try to find existing copies of the incoming values in stack slots used for
 /// statepoint spilling.  If we can find a spill slot for the incoming value,
 /// mark that slot as allocated, and reuse the same slot for this safepoint.
 /// This helps to avoid series of loads and stores that only serve to resuffle
 /// values on the stack between calls.
-static void reservePreviousStackSlotForValue(SDValue Incoming,
+static void reservePreviousStackSlotForValue(const Value *IncomingValue,
                                              SelectionDAGBuilder &Builder) {
 
+  SDValue Incoming = Builder.getValue(IncomingValue);
+
   if (isa<ConstantSDNode>(Incoming) || isa<FrameIndexSDNode>(Incoming)) {
     // We won't need to spill this, so no need to check for previously
     // allocated stack slots
     return;
   }
 
-  SDValue Loc = Builder.StatepointLowering.getLocation(Incoming);
-  if (Loc.getNode()) {
+  SDValue OldLocation = Builder.StatepointLowering.getLocation(Incoming);
+  if (OldLocation.getNode())
     // duplicates in input
     return;
-  }
 
-  // Search back for the load from a stack slot pattern to find the original
-  // slot we allocated for this value.  We could extend this to deal with
-  // simple modification patterns, but simple dealing with trivial load/store
-  // sequences helps a lot already.
-  if (LoadSDNode *Load = dyn_cast<LoadSDNode>(Incoming)) {
-    if (auto *FI = dyn_cast<FrameIndexSDNode>(Load->getBasePtr())) {
-      const int Index = FI->getIndex();
-      auto Itr = std::find(Builder.FuncInfo.StatepointStackSlots.begin(),
-                           Builder.FuncInfo.StatepointStackSlots.end(), Index);
-      if (Itr == Builder.FuncInfo.StatepointStackSlots.end()) {
-        // not one of the lowering stack slots, can't reuse!
-        // TODO: Actually, we probably could reuse the stack slot if the value
-        // hasn't changed at all, but we'd need to look for intervening writes
-        return;
-      } else {
-        // This is one of our dedicated lowering slots
-        const int Offset =
-            std::distance(Builder.FuncInfo.StatepointStackSlots.begin(), Itr);
-        if (Builder.StatepointLowering.isStackSlotAllocated(Offset)) {
-          // stack slot already assigned to someone else, can't use it!
-          // TODO: currently we reserve space for gc arguments after doing
-          // normal allocation for deopt arguments.  We should reserve for
-          // _all_ deopt and gc arguments, then start allocating.  This
-          // will prevent some moves being inserted when vm state changes,
-          // but gc state doesn't between two calls.
-          return;
-        }
-        // Reserve this stack slot
-        Builder.StatepointLowering.reserveStackSlot(Offset);
-      }
+  const int LookUpDepth = 6;
+  Optional<int> Index =
+      findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth);
+  if (!Index.hasValue())
+    return;
 
-      // Cache this slot so we find it when going through the normal
-      // assignment loop.
-      SDValue Loc =
-          Builder.DAG.getTargetFrameIndex(Index, Incoming.getValueType());
-
-      Builder.StatepointLowering.setLocation(Incoming, Loc);
-    }
+  auto Itr = std::find(Builder.FuncInfo.StatepointStackSlots.begin(),
+                       Builder.FuncInfo.StatepointStackSlots.end(), *Index);
+  assert(Itr != Builder.FuncInfo.StatepointStackSlots.end() &&
+         "value spilled to the unknown stack slot");
+
+  // This is one of our dedicated lowering slots
+  const int Offset =
+      std::distance(Builder.FuncInfo.StatepointStackSlots.begin(), Itr);
+  if (Builder.StatepointLowering.isStackSlotAllocated(Offset)) {
+    // stack slot already assigned to someone else, can't use it!
+    // TODO: currently we reserve space for gc arguments after doing
+    // normal allocation for deopt arguments.  We should reserve for
+    // _all_ deopt and gc arguments, then start allocating.  This
+    // will prevent some moves being inserted when vm state changes,
+    // but gc state doesn't between two calls.
+    return;
   }
+  // Reserve this stack slot
+  Builder.StatepointLowering.reserveStackSlot(Offset);
 
-  // TODO: handle case where a reloaded value flows through a phi to
-  // another safepoint.  e.g.
-  // bb1:
-  //  a' = relocated...
-  // bb2: % pred: bb1, bb3, bb4, etc.
-  //  a_phi = phi(a', ...)
-  // statepoint ... a_phi
-  // NOTE: This will require reasoning about cross basic block values.  This is
-  // decidedly non trivial and this might not be the right place to do it.  We
-  // don't really have the information we need here...
-
-  // TODO: handle simple updates.  If a value is modified and the original
-  // value is no longer live, it would be nice to put the modified value in the
-  // same slot.  This allows folding of the memory accesses for some
-  // instructions types (like an increment).
-  // statepoint (i)
-  // i1 = i+1
-  // statepoint (i1)
+  // Cache this slot so we find it when going through the normal
+  // assignment loop.
+  SDValue Loc = Builder.DAG.getTargetFrameIndex(*Index, Incoming.getValueType());
+  Builder.StatepointLowering.setLocation(Incoming, Loc);
 }
 
 /// Remove any duplicate (as SDValues) from the derived pointer pairs.  This
@@ -469,15 +522,11 @@
   // doesn't change semantics at all.  It is important for performance that we
   // reserve slots for both deopt and gc values before lowering either.
   for (const Value *V : StatepointSite.vm_state_args()) {
-    SDValue Incoming = Builder.getValue(V);
-    reservePreviousStackSlotForValue(Incoming, Builder);
+    reservePreviousStackSlotForValue(V, Builder);
   }
   for (unsigned i = 0; i < Bases.size(); ++i) {
-    const Value *Base = Bases[i];
-    reservePreviousStackSlotForValue(Builder.getValue(Base), Builder);
-
-    const Value *Ptr = Ptrs[i];
-    reservePreviousStackSlotForValue(Builder.getValue(Ptr), Builder);
+    reservePreviousStackSlotForValue(Bases[i], Builder);
+    reservePreviousStackSlotForValue(Ptrs[i], Builder);
   }
 
   // First, prefix the list with the number of unique values to be
Index: test/CodeGen/X86/statepoint-stack-usage.ll
===================================================================
--- test/CodeGen/X86/statepoint-stack-usage.ll
+++ test/CodeGen/X86/statepoint-stack-usage.ll
@@ -52,9 +52,53 @@
   ret i32 1
 }
 
+; Test that stack slots are reused for invokes
+define i32 @back_to_back_invokes(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #1 gc "statepoint-example" {
+; CHECK-LABEL: back_to_back_invokes
+entry:
+  ; The exact stores don't matter, but there need to be three stack slots created
+  ; CHECK: movq	%rdi, 16(%rsp)
+  ; CHECK: movq	%rdx, 8(%rsp)
+  ; CHECK: movq	%rsi, (%rsp)
+  ; CHECK: callq
+  %safepoint_token = invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c)
+                   to label %normal_return unwind label %exceptional_return
+
+normal_return:
+  %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 12)
+  %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 13)
+  %c1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 14)
+  ; Should work even through bitcasts
+  %c1.casted = bitcast i32 addrspace(1)* %c1 to i8 addrspace(1)*
+  ; This is the key check.  There should NOT be any memory moves here
+  ; CHECK-NOT: movq
+  ; CHECK: callq
+  %safepoint_token2 = invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i8 addrspace(1)* %c1.casted, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1)
+                    to label %normal_return2 unwind label %exceptional_return2
+
+normal_return2:
+  %a2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 14)
+  %b2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 13)
+  %c2 = tail call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %safepoint_token2, i32 12, i32 12)
+  ret i32 1
+
+exceptional_return:
+  %landing_pad = landingpad { i8*, i32 } personality i32 ()* @"personality_function"
+          cleanup
+  ret i32 0
+
+exceptional_return2:
+  %landing_pad2 = landingpad { i8*, i32 } personality i32 ()* @"personality_function"
+          cleanup
+  ret i32 0
+}
+
 ; Function Attrs: nounwind
 declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
+declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32, i32, i32) #3
 
 declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
 
-attributes #1 = { uwtable }
\ No newline at end of file
+declare i32 @"personality_function"()
+
+attributes #1 = { uwtable }