diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -105,6 +105,9 @@
     /// available in a physical register.
     LiveRegMap LiveVirtRegs;
 
+    /// Stores assigned virtual registers present in the bundle MI.
+    DenseMap<Register, MCPhysReg> BundleVirtRegsMap;
+
     DenseMap<unsigned, SmallVector<MachineInstr *, 2>> LiveDbgValueMap;
     /// List of DBG_VALUE that we encountered without the vreg being assigned
     /// because they were placed after the last use of the vreg.
@@ -218,6 +221,8 @@
 
     void allocateInstruction(MachineInstr &MI);
     void handleDebugValue(MachineInstr &MI);
+    void handleBundle(MachineInstr &MI);
+
     bool usePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
     bool definePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
     bool displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
@@ -889,6 +894,9 @@
     LRI->LiveOut = false;
     LRI->Reloaded = false;
   }
+  if (MI.getOpcode() == TargetOpcode::BUNDLE) {
+    BundleVirtRegsMap[VirtReg] = PhysReg;
+  }
   markRegUsedInInstr(PhysReg);
   setPhysReg(MI, MO, PhysReg);
 }
@@ -934,6 +942,10 @@
   }
 
   LRI->LastUse = &MI;
+
+  if (MI.getOpcode() == TargetOpcode::BUNDLE) {
+    BundleVirtRegsMap[VirtReg] = LRI->PhysReg;
+  }
   markRegUsedInInstr(LRI->PhysReg);
   setPhysReg(MI, MO, LRI->PhysReg);
 }
@@ -1064,6 +1076,7 @@
   //   operands and early-clobbers.
 
   UsedInInstr.clear();
+  BundleVirtRegsMap.clear();
 
   // Scan for special cases; Apply pre-assigned register defs to state.
   bool HasPhysRegUse = false;
@@ -1382,6 +1395,30 @@
   LiveDbgValueMap[Reg].push_back(&MI);
 }
 
+void RegAllocFast::handleBundle(MachineInstr &MI) {
+  MachineBasicBlock::instr_iterator BundledMI = MI.getIterator();
+  ++BundledMI;
+  while (BundledMI->isBundledWithPred()) {
+    for (unsigned I = 0; I < BundledMI->getNumOperands(); ++I) {
+      MachineOperand &MO = BundledMI->getOperand(I);
+      if (!MO.isReg())
+        continue;
+
+      Register Reg = MO.getReg();
+      if (!Reg.isVirtual())
+        continue;
+
+      DenseMap<Register, MCPhysReg>::iterator DI;
+      DI = BundleVirtRegsMap.find(Reg);
+      assert(DI != BundleVirtRegsMap.end() && "Unassigned virtual register");
+
+      setPhysReg(MI, MO, DI->second);
+    }
+
+    ++BundledMI;
+  }
+}
+
 void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
   this->MBB = &MBB;
   LLVM_DEBUG(dbgs() << "\nAllocating " << MBB);
@@ -1411,6 +1448,12 @@
     }
 
     allocateInstruction(MI);
+
+    // Once BUNDLE header is assigned registers, same assignments need to be
+    // done for bundled MIs.
+    if (MI.getOpcode() == TargetOpcode::BUNDLE) {
+      handleBundle(MI);
+    }
   }
 
   LLVM_DEBUG(
diff --git a/llvm/test/CodeGen/AMDGPU/fast-regalloc-bundles.mir b/llvm/test/CodeGen/AMDGPU/fast-regalloc-bundles.mir
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fast-regalloc-bundles.mir
@@ -0,0 +1,26 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs -run-pass=regallocfast %s -o - | FileCheck -check-prefixes=GCN,XNACK,GCX9 %s
+
+---
+name: fast_regalloc_bundle_handling
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: vgpr_32 }
+  - { id: 1, class: vgpr_32 }
+  - { id: 2, class: vgpr_32 }
+body: |
+  bb.0:
+    ; GCN-LABEL: name: fast_regalloc_bundle_handling
+    ; GCN: renamable $vgpr0 = IMPLICIT_DEF
+    ; GCN: renamable $vgpr1 = IMPLICIT_DEF
+    ; GCN: renamable $vgpr0 = BUNDLE implicit killed renamable $vgpr0, implicit killed renamable $vgpr1, implicit $exec {
+    ; GCN:   renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    ; GCN: }
+    ; GCN: S_ENDPGM 0, implicit killed renamable $vgpr0
+    %0 = IMPLICIT_DEF
+    %1 = IMPLICIT_DEF
+    %2 = BUNDLE implicit %0, implicit %1, implicit $exec {
+      %2 = V_ADD_U32_e32 %0, %1, implicit $exec
+    }
+    S_ENDPGM 0, implicit %2
+...
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier-fastregalloc.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier-fastregalloc.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -stop-after=postrapseudos -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=MIR %s
+
+
+; MIR-LABEL: name: gws_barrier_offset0{{$}}
+; MIR: BUNDLE implicit{{( killed)?( renamable)?}} $vgpr0, implicit $m0, implicit $exec {
+; MIR-NEXT: DS_GWS_BARRIER renamable $vgpr0, 0, implicit $m0, implicit $exec :: (load 4 from custom "GWSResource")
+; MIR-NEXT: S_WAITCNT 0
+; MIR-NEXT: }
+define amdgpu_kernel void @gws_barrier_offset0(i32 %val) #0 {
+  call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 0)
+  ret void
+}
+
+
+declare void @llvm.amdgcn.ds.gws.barrier(i32, i32) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { convergent inaccessiblememonly nounwind }