Index: lib/CodeGen/MachineSink.cpp
===================================================================
--- lib/CodeGen/MachineSink.cpp
+++ lib/CodeGen/MachineSink.cpp
@@ -104,6 +104,9 @@
     using AllSuccsCache =
         std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>;
 
+    // Remember debug uses of vregs seen, so we know what to sink out of blocks.
+    DenseMap<unsigned, TinyPtrVector<MachineInstr *>> SeenDbgUsers;
+
   public:
     static char ID; // Pass identification
 
@@ -134,6 +137,7 @@
 
   private:
     bool ProcessBlock(MachineBasicBlock &MBB);
+    void ProcessDbgInst(MachineInstr &MI);
     bool isWorthBreakingCriticalEdge(MachineInstr &MI,
                                      MachineBasicBlock *From,
                                      MachineBasicBlock *To);
@@ -155,8 +159,9 @@
                                    MachineBasicBlock *To,
                                    bool BreakPHIEdge);
     bool SinkInstruction(MachineInstr &MI, bool &SawStore,
-
                          AllSuccsCache &AllSuccessors);
+    void SalvageUnsunkDebugUsersOfCopy(MachineInstr &,
+                                       MachineBasicBlock *TargetBlock);
     bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
                                  MachineBasicBlock *DefMBB,
                                  bool &BreakPHIEdge, bool &LocalUse) const;
@@ -371,8 +376,10 @@
     if (!ProcessedBegin)
       --I;
 
-    if (MI.isDebugInstr())
+    if (MI.isDebugInstr()) {
+      ProcessDbgInst(MI);
       continue;
+    }
 
     bool Joined = PerformTrivialForwardCoalescing(MI, &MBB);
     if (Joined) {
@@ -388,9 +395,24 @@
     // If we just processed the first instruction in the block, we're done.
   } while (!ProcessedBegin);
 
+  SeenDbgUsers.clear();
+
   return MadeChange;
 }
 
+void MachineSinking::ProcessDbgInst(MachineInstr &MI) {
+  // When we see DBG_VALUEs for registers, record any vreg it reads, so that
+  // we know what to sink if the vreg def sinks.
+  if (!MI.isDebugValue())
+    return;
+
+  MachineOperand &MO = MI.getOperand(0);
+  if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+    return;
+
+  SeenDbgUsers[MO.getReg()].push_back(&MI);
+}
+
 bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
                                                  MachineBasicBlock *From,
                                                  MachineBasicBlock *To) {
@@ -736,21 +758,12 @@
          MBP.LHS.getReg() == BaseOp->getReg();
 }
 
-/// Sink an instruction and its associated debug instructions. If the debug
-/// instructions to be sunk are already known, they can be provided in DbgVals.
+/// Sink an instruction and its associated debug instructions.
 static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
                         MachineBasicBlock::iterator InsertPos,
-                        SmallVectorImpl<MachineInstr *> *DbgVals = nullptr) {
+                        SmallVectorImpl<MachineInstr *> &DbgValuesToSink) {
   const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
 
-  // If debug values are provided use those, otherwise call collectDebugValues.
-  SmallVector<MachineInstr *, 2> DbgValuesToSink;
-  if (DbgVals)
-    DbgValuesToSink.insert(DbgValuesToSink.begin(),
-                           DbgVals->begin(), DbgVals->end());
-  else
-    MI.collectDebugValues(DbgValuesToSink);
-
   // If we cannot find a location to use (merge with), then we erase the debug
   // location to prevent debug-info driven tools from potentially reporting
   // wrong location information.
@@ -929,7 +942,26 @@
   while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
     ++InsertPos;
 
-  performSink(MI, *SuccToSinkTo, InsertPos);
+  // Collect debug users of any vreg that this inst defines.
+  SmallVector<MachineInstr *, 4> DbgUsersToSink;
+  for (auto &MO : MI.operands()) {
+    if (!MO.isReg() || !MO.isDef() ||
+        !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+      continue;
+    if (!SeenDbgUsers.count(MO.getReg()))
+      continue;
+
+    auto &Users = SeenDbgUsers[MO.getReg()];
+    DbgUsersToSink.insert(DbgUsersToSink.end(), Users.begin(), Users.end());
+  }
+
+  // After sinking, some debug users may not be dominated any more. If possible,
+  // copy-propagate their operands. As it's expensive, don't do this if there's
+  // no debuginfo in the program.
+  if (MI.getMF()->getFunction().getSubprogram() && MI.isCopy())
+    SalvageUnsunkDebugUsersOfCopy(MI, SuccToSinkTo);
+
+  performSink(MI, *SuccToSinkTo, InsertPos, DbgUsersToSink);
 
   // Conservatively, clear any kill flags, since it's possible that they are no
   // longer correct.
@@ -944,6 +976,41 @@
   return true;
 }
 
+void MachineSinking::SalvageUnsunkDebugUsersOfCopy(
+    MachineInstr &MI, MachineBasicBlock *TargetBlock) {
+  assert(MI.isCopy());
+  assert(MI.getOperand(1).isReg());
+
+  // Enumerate all users of vreg operands that are def'd. Skip those that will
+  // be sunk. For the rest, if they are not dominated by the block we will sink
+  // MI into, propagate the copy source to them.
+  SmallVector<MachineInstr *, 4> DbgDefUsers;
+  const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
+  for (auto &MO : MI.operands()) {
+    if (!MO.isReg() || !MO.isDef() ||
+        !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+      continue;
+    for (auto &User : MRI.use_instructions(MO.getReg())) {
+      if (!User.isDebugValue() || DT->dominates(TargetBlock, User.getParent()))
+        continue;
+
+      // If is in same block, will either sink or be use-before-def,
+      if (User.getParent() == MI.getParent())
+        continue;
+
+      assert(User.getOperand(0).isReg());
+      DbgDefUsers.push_back(&User);
+    }
+  }
+
+  // Point the users of this copy that are no longer dominated, at the source
+  // of the copy.
+  for (auto *User : DbgDefUsers) {
+    User->getOperand(0).setReg(MI.getOperand(1).getReg());
+    User->getOperand(0).setSubReg(MI.getOperand(1).getSubReg());
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // This pass is not intended to be a replacement or a complete alternative
 // for the pre-ra machine sink pass. It is only designed to sink COPY
@@ -1235,7 +1302,7 @@
     // block.
     clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
     MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI();
-    performSink(*MI, *SuccBB, InsertPos, &DbgValsToSink);
+    performSink(*MI, *SuccBB, InsertPos, DbgValsToSink);
     updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
 
     Changed = true;
Index: test/DebugInfo/MIR/X86/machinesink.mir
===================================================================
--- /dev/null
+++ test/DebugInfo/MIR/X86/machinesink.mir
@@ -0,0 +1,113 @@
+# RUN: llc -run-pass=machine-sink -o - %s | FileCheck %s
+# Check various things when we sink machine instructions:
+#   a) DBG_VALUEs should sink with defs
+#   b) Undefs should be left behind
+#   c) DBG_VALUEs not immediately following the defining inst should sink too
+#   d) If we generate debug-use-before-defs through sinking, and can copy
+#      propagate to a different value, we should do that.
+--- |
+  target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+  target triple = "x86_64-unknown-linux-gnu"
+
+  @x = common local_unnamed_addr global i32 0, align 4, !dbg !0
+
+  ; Function Attrs: noreturn nounwind uwtable
+  define void @Process(i32* nocapture readonly %p) local_unnamed_addr #0 !dbg !9 {
+  ; Stripped
+  entry:
+    br label %nou
+  nou:
+    br label %exit
+  exit:
+    ret void
+  }
+
+  ; Function Attrs: nounwind readnone
+  declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+  attributes #0 = { noreturn nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #1 = { nounwind readnone }
+  attributes #2 = { nounwind }
+
+  !llvm.dbg.cu = !{!1}
+  !llvm.module.flags = !{!6, !7}
+  !llvm.ident = !{!8}
+
+  !0 = !DIGlobalVariableExpression(var: !DIGlobalVariable(name: "x", scope: !1, file: !2, line: 1, type: !5, isLocal: false, isDefinition: true), expr: !DIExpression())
+  !1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, globals: !4)
+  !2 = !DIFile(filename: "t.c", directory: "")
+  !3 = !{}
+  !4 = !{!0}
+  !5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+  !6 = !{i32 2, !"Dwarf Version", i32 4}
+  !7 = !{i32 2, !"Debug Info Version", i32 3}
+  !8 = !{!"clang version 4.0.0 "}
+  !9 = distinct !DISubprogram(name: "Process", scope: !2, file: !2, line: 2, type: !10, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !1, retainedNodes: !15)
+  !10 = !DISubroutineType(types: !11)
+  !11 = !{null, !12}
+  !12 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !13, size: 64)
+  !13 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !14)
+  !14 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
+  !15 = !{!16}
+  !16 = !DILocalVariable(name: "p", arg: 1, scope: !9, file: !2, line: 2, type: !12)
+  !17 = !DIExpression()
+  !18 = !DILocation(line: 2, column: 34, scope: !9)
+  !22 = !{!23, !23, i64 0}
+  !23 = !{!"int", !24, i64 0}
+  !24 = !{!"omnipotent char", !25, i64 0}
+  !25 = !{!"Simple C/C++ TBAA"}
+  !28 = !DILexicalBlockFile(scope: !9, file: !2, discriminator: 1)
+
+  ; CHECK: [[VARNUM:![0-9]+]] = !DILocalVariable(name: "p",
+
+...
+---
+name:            Process
+tracksRegLiveness: true
+liveins:
+  - { reg: '$rdi', virtual-reg: '%2' }
+  - { reg: '$rsi', virtual-reg: '%2' }
+body:             |
+  bb.0.entry:
+    successors: %bb.1.nou(0x80000000), %bb.2.exit
+    liveins: $rdi, $esi
+
+    ; This block should have the vreg copy sunk from it, the DBG_VALUE with it,
+    ; and an undef DBG_VALUE left over.
+    ; CHECK-LABEL: bb.0.entry:
+    ; CHECK:       [[ARG0VREG:%[0-9]+]]:gr64 = COPY $rdi
+    ; CHECK-NEXT:  CMP32ri $esi, 0
+    ; CHECK-NEXT:  DBG_VALUE $noreg, $noreg, [[VARNUM]]
+    ; CHECK-NEXT:  JE_1
+    ; CHECK-NEXT:  JMP_1
+
+    %2:gr64 = COPY $rdi
+    %5:gr64 = COPY %2
+    CMP32ri $esi, 0, implicit-def $eflags
+    DBG_VALUE %5, _, !16, !17, debug-location !18
+    JE_1 %bb.1.nou, implicit $eflags
+    JMP_1 %bb.2.exit
+
+  bb.1.nou:
+    successors: %bb.2.exit(0x80000000)
+
+    ; This block should receive the sunk copy and DBG_VALUE
+    ; CHECK-LABEL: bb.1.nou:
+    ; CHECK:       [[SUNKVREG:%[0-9]+]]:gr64 = COPY [[ARG0VREG]]
+    ; CHECK-NEXT:  DBG_VALUE [[SUNKVREG]], $noreg, [[VARNUM]]
+    ; CHECK-NEXT:  ADD64ri8
+    ; CHECK-NEXT:  JMP_1
+    %1:gr64 = ADD64ri8 %5, 4, implicit-def dead $eflags
+    JMP_1 %bb.2.exit
+
+  bb.2.exit:
+    ; The DBG_VALUE below should have its operand copy-propagated after
+    ; the copy to %5 is sunk.
+    ; CHECK-LABEL: bb.2.exit:
+    ; CHECK:       DBG_VALUE [[ARG0VREG]], $noreg, [[VARNUM]]
+    ; CHECK-NEXT:  $rax = MOV64rr [[ARG0VREG]]
+    ; CHECK-NEXT:  RET 0
+    DBG_VALUE %5, _, !16, !17, debug-location !18
+    $rax = MOV64rr %2, _, 0, _
+    RET 0
+...