Index: llvm/lib/Target/AMDGPU/GCNRegPressure.cpp =================================================================== --- llvm/lib/Target/AMDGPU/GCNRegPressure.cpp +++ llvm/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -384,6 +384,7 @@ void GCNDownwardRPTracker::advanceToNext() { LastTrackedMI = &*NextMI++; + NextMI = skipDebugInstructionsForward(NextMI, MBBEnd); // Add new registers or mask bits. for (const auto &MO : LastTrackedMI->operands()) { Index: llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp +++ llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp @@ -107,7 +107,8 @@ // There no sense to create store clauses, they do not define anything, // thus there is nothing to set early-clobber. static bool isValidClauseInst(const MachineInstr &MI, bool IsVMEMClause) { - if (MI.isDebugValue() || MI.isBundled()) + assert(!MI.isDebugInstr() && "debug instructions should not reach here"); + if (MI.isBundled()) return false; if (!MI.mayLoad() || MI.mayStore()) return false; @@ -322,6 +323,8 @@ unsigned FuncMaxClause = AMDGPU::getIntegerAttribute( MF.getFunction(), "amdgpu-max-memory-clause", MaxClause); + SmallVector DbgInstrs; + for (MachineBasicBlock &MBB : MF) { GCNDownwardRPTracker RPT(*LIS); MachineBasicBlock::instr_iterator Next; @@ -329,6 +332,9 @@ MachineInstr &MI = *I; Next = std::next(I); + if (MI.isDebugInstr()) + continue; + bool IsVMEM = isVMEMClauseInst(MI); if (!isValidClauseInst(MI, IsVMEM)) @@ -350,6 +356,11 @@ unsigned Length = 1; for ( ; Next != E && Length < FuncMaxClause; ++Next) { + // Debug instructions should not change the bundling. We need to move + // these after the bundle + if (Next->isDebugInstr()) + continue; + if (!isValidClauseInst(*Next, IsVMEM)) break; @@ -374,8 +385,17 @@ // Restore the state after processing the bundle. RPT.reset(*B, &LiveRegsCopy); + DbgInstrs.clear(); + + auto BundleNext = I; + for (auto BI = I; BI != Next; BI = BundleNext) { + BundleNext = std::next(BI); + + if (BI->isDebugValue()) { + DbgInstrs.push_back(BI->removeFromParent()); + continue; + } - for (auto BI = I; BI != Next; ++BI) { BI->bundleWithPred(); Ind->removeSingleMachineInstrFromMaps(*BI); @@ -384,6 +404,10 @@ MO.setIsInternalRead(true); } + // Replace any debug instructions after the new bundle. + for (MachineInstr *DbgInst : DbgInstrs) + MBB.insert(Next, DbgInst); + for (auto &&R : Defs) { forAllLanes(R.first, R.second.second, [&R, &B](unsigned SubReg) { unsigned S = R.second.first | RegState::EarlyClobber; Index: llvm/test/CodeGen/AMDGPU/soft-clause-dbg-value.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/soft-clause-dbg-value.mir @@ -0,0 +1,49 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+xnack -run-pass=si-form-memory-clauses -verify-machineinstrs -o - %s | FileCheck %s + +# Make sure that debug instructions do not change the bundling, and +# the dbg_values which break the clause are inserted after the new +# bundles. + +--- +name: sgpr_clause_dbg_value +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr4_sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24 + ; CHECK-LABEL: name: sgpr_clause_dbg_value + ; CHECK: liveins: $sgpr4_sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr4_sgpr5 + ; CHECK: early-clobber %2:sreg_32_xm0_xexec, early-clobber %1:sreg_32_xm0_xexec = BUNDLE [[COPY]] { + ; CHECK: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4) + ; CHECK: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 8, 0, 0 :: (load 4, addrspace 4) + ; CHECK: } + ; CHECK: DBG_VALUE [[S_LOAD_DWORD_IMM]], 0, 0 + ; CHECK: DBG_VALUE [[S_LOAD_DWORD_IMM1]], 0, 0 + ; CHECK: S_NOP 0 + ; CHECK: S_NOP 0 + ; CHECK: S_NOP 0 + ; CHECK: early-clobber %4:sreg_32_xm0_xexec, early-clobber %3:sreg_32_xm0_xexec, early-clobber %5:sreg_32_xm0_xexec = BUNDLE [[COPY]] { + ; CHECK: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 16, 0, 0 :: (load 4, addrspace 4) + ; CHECK: [[S_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 32, 0, 0 :: (load 4, addrspace 4) + ; CHECK: [[S_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 64, 0, 0 :: (load 4, addrspace 4) + ; CHECK: } + ; CHECK: DBG_VALUE [[S_LOAD_DWORD_IMM2]], 0, 0 + ; CHECK: DBG_VALUE [[S_LOAD_DWORD_IMM3]], 0, 0 + ; CHECK: S_ENDPGM 0, implicit [[S_LOAD_DWORD_IMM]], implicit [[S_LOAD_DWORD_IMM1]], implicit [[S_LOAD_DWORD_IMM2]], implicit [[S_LOAD_DWORD_IMM3]], implicit [[S_LOAD_DWORD_IMM4]] + %0:sreg_64 = COPY $sgpr4_sgpr5 + %1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 0, 0, 0 :: (load 4, align 4, addrspace 4) + DBG_VALUE %1, 0, 0 + %2:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 8, 0, 0 :: (load 4, align 4, addrspace 4) + DBG_VALUE %2, 0, 0 + S_NOP 0 + S_NOP 0 + S_NOP 0 + %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0, 0 :: (load 4, align 4, addrspace 4) + %4:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 32, 0, 0 :: (load 4, align 4, addrspace 4) + DBG_VALUE %3, 0, 0 + DBG_VALUE %4, 0, 0 + %5:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0, 0 :: (load 4, align 4, addrspace 4) + S_ENDPGM 0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5 + +...