diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -105,6 +105,9 @@ /// available in a physical register. LiveRegMap LiveVirtRegs; + /// Stores assigned virtual registers present in the bundle MI. + DenseMap<Register, MCPhysReg> BundleVirtRegsMap; + DenseMap<unsigned, SmallVector<MachineInstr *, 2>> LiveDbgValueMap; /// List of DBG_VALUE that we encountered without the vreg being assigned /// because they were placed after the last use of the vreg. @@ -218,6 +221,8 @@ void allocateInstruction(MachineInstr &MI); void handleDebugValue(MachineInstr &MI); + void handleBundle(MachineInstr &MI); + bool usePhysReg(MachineInstr &MI, MCPhysReg PhysReg); bool definePhysReg(MachineInstr &MI, MCPhysReg PhysReg); bool displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg); @@ -889,6 +894,9 @@ LRI->LiveOut = false; LRI->Reloaded = false; } + if (MI.getOpcode() == TargetOpcode::BUNDLE) { + BundleVirtRegsMap[VirtReg] = PhysReg; + } markRegUsedInInstr(PhysReg); setPhysReg(MI, MO, PhysReg); } @@ -934,6 +942,10 @@ } LRI->LastUse = &MI; + + if (MI.getOpcode() == TargetOpcode::BUNDLE) { + BundleVirtRegsMap[VirtReg] = LRI->PhysReg; + } markRegUsedInInstr(LRI->PhysReg); setPhysReg(MI, MO, LRI->PhysReg); } @@ -1064,6 +1076,7 @@ // operands and early-clobbers. UsedInInstr.clear(); + BundleVirtRegsMap.clear(); // Scan for special cases; Apply pre-assigned register defs to state. bool HasPhysRegUse = false; @@ -1382,6 +1395,30 @@ LiveDbgValueMap[Reg].push_back(&MI); } +void RegAllocFast::handleBundle(MachineInstr &MI) { + MachineBasicBlock::instr_iterator BundledMI = MI.getIterator(); + ++BundledMI; + while (BundledMI->isBundledWithPred()) { + for (unsigned I = 0; I < BundledMI->getNumOperands(); ++I) { + MachineOperand &MO = BundledMI->getOperand(I); + if (!MO.isReg()) + continue; + + Register Reg = MO.getReg(); + if (!Reg.isVirtual()) + continue; + + DenseMap<Register, MCPhysReg>::iterator DI; + DI = BundleVirtRegsMap.find(Reg); + assert(DI != BundleVirtRegsMap.end() && "Unassigned virtual register"); + + setPhysReg(MI, MO, DI->second); + } + + ++BundledMI; + } +} + void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { this->MBB = &MBB; LLVM_DEBUG(dbgs() << "\nAllocating " << MBB); @@ -1411,6 +1448,12 @@ } allocateInstruction(MI); + + // Once BUNDLE header is assigned registers, same assignments need to be + // done for bundled MIs. + if (MI.getOpcode() == TargetOpcode::BUNDLE) { + handleBundle(MI); + } } LLVM_DEBUG( diff --git a/llvm/test/CodeGen/AMDGPU/fast-regalloc-bundles.mir b/llvm/test/CodeGen/AMDGPU/fast-regalloc-bundles.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fast-regalloc-bundles.mir @@ -0,0 +1,26 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs -run-pass=regallocfast %s -o - | FileCheck -check-prefixes=GCN,XNACK,GCX9 %s + +--- +name: fast_regalloc_bundle_handling +tracksRegLiveness: true +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } +body: | + bb.0: + ; GCN-LABEL: name: fast_regalloc_bundle_handling + ; GCN: renamable $vgpr0 = IMPLICIT_DEF + ; GCN: renamable $vgpr1 = IMPLICIT_DEF + ; GCN: renamable $vgpr0 = BUNDLE implicit killed renamable $vgpr0, implicit killed renamable $vgpr1, implicit $exec { + ; GCN: renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec + ; GCN: } + ; GCN: S_ENDPGM 0, implicit killed renamable $vgpr0 + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = BUNDLE implicit %0, implicit %1, implicit $exec { + %2 = V_ADD_U32_e32 %0, %1, implicit $exec + } + S_ENDPGM 0, implicit %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier-fastregalloc.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier-fastregalloc.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -stop-after=postrapseudos -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=MIR %s + + +; MIR-LABEL: name: gws_barrier_offset0{{$}} +; MIR: BUNDLE implicit{{( killed)?( renamable)?}} $vgpr0, implicit $m0, implicit $exec { +; MIR-NEXT: DS_GWS_BARRIER renamable $vgpr0, 0, implicit $m0, implicit $exec :: (load 4 from custom "GWSResource") +; MIR-NEXT: S_WAITCNT 0 +; MIR-NEXT: } +define amdgpu_kernel void @gws_barrier_offset0(i32 %val) #0 { + call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 0) + ret void +} + + +declare void @llvm.amdgcn.ds.gws.barrier(i32, i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { convergent inaccessiblememonly nounwind }