diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp --- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp +++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp @@ -108,7 +108,11 @@ // which is used in Phase 3 if we need to insert a mode change. MachineInstr *FirstInsertionPoint; - BlockData() : FirstInsertionPoint(nullptr){}; + // A flag to indicate whether an Exit value has been set (we can't tell by + // examining the Exit value itself as all values may be valid results). + bool ExitSet; + + BlockData() : FirstInsertionPoint(nullptr), ExitSet(false){}; }; namespace { @@ -129,6 +133,8 @@ Status DefaultStatus = Status(FP_ROUND_MODE_DP(0x3), FP_ROUND_MODE_DP(DefaultMode)); + bool Changed = false; + public: SIModeRegister() : MachineFunctionPass(ID) {} @@ -199,6 +205,7 @@ (Offset << AMDGPU::Hwreg::OFFSET_SHIFT_) | (AMDGPU::Hwreg::ID_MODE << AMDGPU::Hwreg::ID_SHIFT_)); ++NumSetregInserted; + Changed = true; InstrMode.Mask &= ~(((1 << Width) - 1) << Offset); } } @@ -323,22 +330,49 @@ // exit value is propagated. void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB, const SIInstrInfo *TII) { - // BlockData *BI = BlockInfo[MBB.getNumber()]; + bool RevisitRequired = false; + bool ExitSet = false; unsigned ThisBlock = MBB.getNumber(); if (MBB.pred_empty()) { // There are no predecessors, so use the default starting status. BlockInfo[ThisBlock]->Pred = DefaultStatus; + ExitSet = true; } else { // Build a status that is common to all the predecessors by intersecting // all the predecessor exit status values. + // Mask bits (which represent the Mode bits with a known value) can only be + // added by explicit SETREG instructions or the initial default value - + // the intersection process may remove Mask bits. + // If we find a predecessor that has not yet had an exit value determined + // (this can happen for example if a block is its own predecessor) we defer + // use of that value as the Mask will be all zero, and we will revisit this + // block again later (unless the only predecessor without an exit value is + // this block). MachineBasicBlock::pred_iterator P = MBB.pred_begin(), E = MBB.pred_end(); MachineBasicBlock &PB = *(*P); - BlockInfo[ThisBlock]->Pred = BlockInfo[PB.getNumber()]->Exit; + unsigned PredBlock = PB.getNumber(); + if ((ThisBlock == PredBlock) && (std::next(P) == E)) { + BlockInfo[ThisBlock]->Pred = DefaultStatus; + ExitSet = true; + } else if (BlockInfo[PredBlock]->ExitSet) { + BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit; + ExitSet = true; + } else if (PredBlock != ThisBlock) + RevisitRequired = true; for (P = std::next(P); P != E; P = std::next(P)) { MachineBasicBlock *Pred = *P; - BlockInfo[ThisBlock]->Pred = BlockInfo[ThisBlock]->Pred.intersect( - BlockInfo[Pred->getNumber()]->Exit); + unsigned PredBlock = Pred->getNumber(); + if (BlockInfo[PredBlock]->ExitSet) { + if (BlockInfo[ThisBlock]->ExitSet) { + BlockInfo[ThisBlock]->Pred = + BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[PredBlock]->Exit); + } else { + BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit; + } + ExitSet = true; + } else if (PredBlock != ThisBlock) + RevisitRequired = true; } } Status TmpStatus = @@ -354,6 +388,9 @@ Phase2List.push(&B); } } + BlockInfo[ThisBlock]->ExitSet = ExitSet; + if (RevisitRequired) + Phase2List.push(&MBB); } // In Phase 3 we revisit each block and if it has an insertion point defined we @@ -361,7 +398,6 @@ // not we insert an appropriate setreg instruction to modify the Mode register. void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB, const SIInstrInfo *TII) { - // BlockData *BI = BlockInfo[MBB.getNumber()]; unsigned ThisBlock = MBB.getNumber(); if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) { Status Delta = @@ -402,5 +438,5 @@ BlockInfo.clear(); - return NumSetregInserted > 0; + return Changed; } diff --git a/llvm/test/CodeGen/AMDGPU/mode-register.mir b/llvm/test/CodeGen/AMDGPU/mode-register.mir --- a/llvm/test/CodeGen/AMDGPU/mode-register.mir +++ b/llvm/test/CodeGen/AMDGPU/mode-register.mir @@ -457,3 +457,55 @@ bb.4: S_ENDPGM 0 ... +--- +# checks for bug where if a block is its own predecessor it could cause mode tracking +# to produce the wrong mode, resulting in an unnecessary setreg +# CHECK-LABEL: name: single_block_loop +# CHECK-LABEL: bb.0: +# CHECK-NOT: S_SETREG + +name: single_block_loop + +body: | + bb.0: + successors: %bb.1 + S_BRANCH %bb.1 + + bb.1: + successors: %bb.1, %bb.2 + S_CBRANCH_VCCZ %bb.1, implicit $vcc + S_BRANCH %bb.2 + + bb.2: + successors: %bb.3 + liveins: $vgpr1_vgpr2 + $vgpr1_vgpr2 = V_FRACT_F64_e32 killed $vgpr1_vgpr2, implicit $mode, implicit $exec + S_BRANCH %bb.3 + + bb.3: + S_ENDPGM 0 +... +--- +# checks for a bug where if the first block is its own predecessor the initial mode was +# not correctly propagated, resulting in an unnecessary setreg +# CHECK-LABEL: name: first_block_loop +# CHECK-LABEL: bb.0: +# CHECK-NOT: S_SETREG + +name: first_block_loop + +body: | + bb.0: + successors: %bb.0, %bb.1 + S_CBRANCH_VCCZ %bb.0, implicit $vcc + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2 + liveins: $vgpr1_vgpr2 + $vgpr1_vgpr2 = V_FRACT_F64_e32 killed $vgpr1_vgpr2, implicit $mode, implicit $exec + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +...