Index: lib/CodeGen/ProcessImplicitDefs.cpp
===================================================================
--- lib/CodeGen/ProcessImplicitDefs.cpp
+++ lib/CodeGen/ProcessImplicitDefs.cpp
@@ -69,7 +69,7 @@
       !MI->isPHI())
     return false;
   for (const MachineOperand &MO : MI->operands())
-    if (MO.isReg() && MO.isUse() && MO.readsReg() && !MO.isImplicit())
+    if (MO.isReg() && MO.isUse() && MO.readsReg())
       return false;
   return true;
 }
Index: lib/CodeGen/TwoAddressInstructionPass.cpp
===================================================================
--- lib/CodeGen/TwoAddressInstructionPass.cpp
+++ lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1723,31 +1723,20 @@
 ///
 /// The instruction is turned into a sequence of sub-register copies:
 ///
-///   %dst = REG_SEQUENCE %v1, ssub0, %v2, ssub1, [implicit uses]
+///   %dst = REG_SEQUENCE %v1, ssub0, %v2, ssub1
 ///
 /// Becomes:
 ///
-///   %dst:ssub0<def,undef> = COPY %v1, [implicit uses]
-///   %dst:ssub1<def> = COPY %v2, [implicit uses]
+///   %dst:ssub0<def,undef> = COPY %v1
+///   %dst:ssub1<def> = COPY %v2
 ///
 void TwoAddressInstructionPass::
 eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
   MachineInstr *MI = MBBI;
   unsigned DstReg = MI->getOperand(0).getReg();
-  unsigned NumTrailingImplicit = 0;
-
-  for (unsigned i = MI->getNumOperands(); i > 0; --i) {
-    const MachineOperand &MO = MI->getOperand(i - 1);
-    if (!MO.isReg() || !MO.isImplicit())
-      break;
-    NumTrailingImplicit++;
-  }
-
-  unsigned NumOperands = MI->getNumOperands() - NumTrailingImplicit;
-
   if (MI->getOperand(0).getSubReg() ||
       TargetRegisterInfo::isPhysicalRegister(DstReg) ||
-      !(NumOperands & 1)) {
+      !(MI->getNumOperands() & 1)) {
     DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
     llvm_unreachable(nullptr);
   }
@@ -1755,12 +1744,12 @@
   SmallVector<unsigned, 4> OrigRegs;
   if (LIS) {
     OrigRegs.push_back(MI->getOperand(0).getReg());
-    for (unsigned i = 1; i < NumOperands; i += 2)
+    for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2)
       OrigRegs.push_back(MI->getOperand(i).getReg());
   }
 
   bool DefEmitted = false;
-  for (unsigned i = 1; i < NumOperands; i += 2) {
+  for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
     MachineOperand &UseMO = MI->getOperand(i);
     unsigned SrcReg = UseMO.getReg();
     unsigned SubIdx = MI->getOperand(i+1).getImm();
@@ -1772,7 +1761,7 @@
     // might insert a COPY that uses SrcReg after is was killed.
     bool isKill = UseMO.isKill();
     if (isKill)
-      for (unsigned j = i + 2; j < NumOperands; j += 2)
+      for (unsigned j = i + 2; j < e; j += 2)
         if (MI->getOperand(j).getReg() == SrcReg) {
           MI->getOperand(j).setIsKill();
           UseMO.setIsKill(false);
@@ -1786,9 +1775,6 @@
       .addReg(DstReg, RegState::Define, SubIdx)
       .addOperand(UseMO);
 
-    for (unsigned j = 0; j < NumTrailingImplicit; ++j)
-      CopyMI->addOperand(MI->getOperand(NumOperands + j));
-
     // The first def needs an <undef> flag because there is no live register
     // before it.
     if (!DefEmitted) {
@@ -1811,7 +1797,7 @@
   if (!DefEmitted) {
     DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF");
     MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
-    for (int j = NumOperands - 1, ee = 0; j > ee; --j)
+    for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
       MI->RemoveOperand(j);
   } else {
     DEBUG(dbgs() << "Eliminated: " << *MI);
Index: lib/Target/AMDGPU/AMDGPUInstrInfo.h
===================================================================
--- lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -62,6 +62,9 @@
                                int64_t Offset1, int64_t Offset2,
                                unsigned NumLoads) const override;
 
+  bool isSchedulingBoundary(const MachineInstr *MI,
+                            const MachineBasicBlock *MBB,
+                            const MachineFunction &MF) const override;
 
   /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
   /// Return -1 if the target-specific opcode for the pseudo instruction does
Index: lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -63,6 +63,23 @@
   return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
 }
 
+bool AMDGPUInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+                                           const MachineBasicBlock *MBB,
+                                           const MachineFunction &MF) const {
+  // Terminators and labels can't be scheduled around.
+  if (MI->isTerminator() || MI->isPosition())
+    return true;
+
+  // Target-independent instructions do not have an implicit-use of EXEC, even
+  // when they operate on VGPRs. Treating EXEC modifications as scheduling
+  // boundaries prevents incorrect movements of such instructions.
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  if (MI->modifiesRegister(AMDGPU::EXEC, TRI))
+    return true;
+
+  return false;
+}
+
 int AMDGPUInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
   const MachineRegisterInfo &MRI = MF.getRegInfo();
   const MachineFrameInfo *MFI = MF.getFrameInfo();
Index: lib/Target/AMDGPU/SIWholeQuadMode.cpp
===================================================================
--- lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -374,10 +374,6 @@
           if (TRI->isSGPRReg(*MRI, Op.getReg())) {
             // SGPR instructions are not affected by EXEC
             continue;
-          } else {
-            // Generic instructions on VGPRs must be marked as implicitly using
-            // EXEC or subsequent passes might reschedule them incorrectly.
-            MI.addOperand(MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
           }
         }
       }
@@ -453,21 +449,6 @@
     // For a shader that needs only WQM, we can just set it once.
     BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
             AMDGPU::EXEC).addReg(AMDGPU::EXEC);
-
-    for (MachineInstr &MI : Entry) {
-      if (TargetInstrInfo::isGenericOpcode(MI.getOpcode()) &&
-          MI.getNumExplicitOperands() >= 1) {
-        const MachineOperand &Op = MI.getOperand(0);
-        if (Op.isReg()) {
-          if (!TRI->isSGPRReg(*MRI, Op.getReg())) {
-            // Generic instructions on VGPRs must be marked as implicitly using
-            // EXEC or subsequent passes might reschedule them incorrectly.
-            MI.addOperand(MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
-          }
-        }
-      }
-    }
-
     return true;
   }
 
Index: test/CodeGen/AMDGPU/si-scheduler.ll
===================================================================
--- test/CodeGen/AMDGPU/si-scheduler.ll
+++ test/CodeGen/AMDGPU/si-scheduler.ll
@@ -3,9 +3,9 @@
 ; The test checks the "si" machine scheduler pass works correctly.
 
 ; CHECK-LABEL: {{^}}main:
+; CHECK: s_wqm
 ; CHECK: s_load_dwordx4
 ; CHECK: s_load_dwordx8
-; CHECK: s_wqm
 ; CHECK: s_waitcnt lgkmcnt(0)
 ; CHECK: image_sample
 ; CHECK: s_waitcnt vmcnt(0)