diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp --- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp +++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp @@ -83,9 +83,7 @@ return ((Mask & S.Mask) == S.Mask) && ((Mode & S.Mask) == S.Mode); } - bool isCombinable(Status &S) { - return !(Mask & S.Mask) || isCompatible(S); - } + bool isCombinable(Status &S) { return !(Mask & S.Mask) || isCompatible(S); } }; class BlockData { @@ -110,7 +108,11 @@ // which is used in Phase 3 if we need to insert a mode change. MachineInstr *FirstInsertionPoint; - BlockData() : FirstInsertionPoint(nullptr) {}; + // A flag to indicate whether an Exit value has been set (we can't tell by + // examining the Exit value itself as all values may be valid results). + bool ExitSet; + + BlockData() : FirstInsertionPoint(nullptr), ExitSet(false){}; }; namespace { @@ -131,6 +133,8 @@ Status DefaultStatus = Status(FP_ROUND_MODE_DP(0x3), FP_ROUND_MODE_DP(DefaultMode)); + bool SetregInserted = false; + public: SIModeRegister() : MachineFunctionPass(ID) {} @@ -201,6 +205,7 @@ (Offset << AMDGPU::Hwreg::OFFSET_SHIFT_) | (AMDGPU::Hwreg::ID_MODE << AMDGPU::Hwreg::ID_SHIFT_)); ++NumSetregInserted; + SetregInserted = true; InstrMode.Mask &= ~(((1 << Width) - 1) << Offset); } } @@ -325,24 +330,53 @@ // exit value is propagated. void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB, const SIInstrInfo *TII) { -// BlockData *BI = BlockInfo[MBB.getNumber()]; + bool RevisitRequired = false; + bool ExitSet = false; unsigned ThisBlock = MBB.getNumber(); if (MBB.pred_empty()) { // There are no predecessors, so use the default starting status. BlockInfo[ThisBlock]->Pred = DefaultStatus; + ExitSet = true; } else { // Build a status that is common to all the predecessors by intersecting // all the predecessor exit status values. + // Mask bits (which represent the Mode bits with a known value) can only be + // added by explicit SETREG instructions or the initial default value - + // the intersection process may remove Mask bits. + // If we find a predecessor that has not yet had an exit value determined + // (this can happen for example if a block is its own predecessor) we defer + // use of that value as the Mask will be all zero, and we will revisit this + // block again later (unless the only predecessor without an exit value is + // this block). MachineBasicBlock::pred_iterator P = MBB.pred_begin(), E = MBB.pred_end(); MachineBasicBlock &PB = *(*P); - BlockInfo[ThisBlock]->Pred = BlockInfo[PB.getNumber()]->Exit; + unsigned PredBlock = PB.getNumber(); + if ((ThisBlock == PredBlock) && (std::next(P) == E)) { + BlockInfo[ThisBlock]->Pred = DefaultStatus; + ExitSet = true; + } else if (BlockInfo[PredBlock]->ExitSet) { + BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit; + ExitSet = true; + } else if (PredBlock != ThisBlock) + RevisitRequired = true; for (P = std::next(P); P != E; P = std::next(P)) { MachineBasicBlock *Pred = *P; - BlockInfo[ThisBlock]->Pred = BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[Pred->getNumber()]->Exit); + unsigned PredBlock = Pred->getNumber(); + if (BlockInfo[PredBlock]->ExitSet) { + if (BlockInfo[ThisBlock]->ExitSet) { + BlockInfo[ThisBlock]->Pred = + BlockInfo[ThisBlock]->Pred.intersect(BlockInfo[PredBlock]->Exit); + } else { + BlockInfo[ThisBlock]->Pred = BlockInfo[PredBlock]->Exit; + } + ExitSet = true; + } else if (PredBlock != ThisBlock) + RevisitRequired = true; } } - Status TmpStatus = BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change); + Status TmpStatus = + BlockInfo[ThisBlock]->Pred.merge(BlockInfo[ThisBlock]->Change); if (BlockInfo[ThisBlock]->Exit != TmpStatus) { BlockInfo[ThisBlock]->Exit = TmpStatus; // Add the successors to the work list so we can propagate the changed exit @@ -354,6 +388,9 @@ Phase2List.push(&B); } } + BlockInfo[ThisBlock]->ExitSet = ExitSet; + if (RevisitRequired) + Phase2List.push(&MBB); } // In Phase 3 we revisit each block and if it has an insertion point defined we @@ -361,10 +398,10 @@ // not we insert an appropriate setreg instruction to modify the Mode register. void SIModeRegister::processBlockPhase3(MachineBasicBlock &MBB, const SIInstrInfo *TII) { -// BlockData *BI = BlockInfo[MBB.getNumber()]; unsigned ThisBlock = MBB.getNumber(); if (!BlockInfo[ThisBlock]->Pred.isCompatible(BlockInfo[ThisBlock]->Require)) { - Status Delta = BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require); + Status Delta = + BlockInfo[ThisBlock]->Pred.delta(BlockInfo[ThisBlock]->Require); if (BlockInfo[ThisBlock]->FirstInsertionPoint) insertSetreg(MBB, BlockInfo[ThisBlock]->FirstInsertionPoint, TII, Delta); else @@ -401,5 +438,5 @@ BlockInfo.clear(); - return NumSetregInserted > 0; + return SetregInserted; } diff --git a/llvm/test/CodeGen/AMDGPU/mode-register.mir b/llvm/test/CodeGen/AMDGPU/mode-register.mir --- a/llvm/test/CodeGen/AMDGPU/mode-register.mir +++ b/llvm/test/CodeGen/AMDGPU/mode-register.mir @@ -457,3 +457,55 @@ bb.4: S_ENDPGM 0 ... +--- +# checks for bug where if a block is its own predecessor it could cause mode tracking +# to produce the wrong mode, resulting in an unnecessary setreg +# CHECK-LABEL: name: single_block_loop +# CHECK-LABEL: bb.0: +# CHECK-NOT: S_SETREG + +name: single_block_loop + +body: | + bb.0: + successors: %bb.1 + S_BRANCH %bb.1 + + bb.1: + successors: %bb.1, %bb.2 + S_CBRANCH_VCCZ %bb.1, implicit $vcc + S_BRANCH %bb.2 + + bb.2: + successors: %bb.3 + liveins: $vgpr1_vgpr2 + $vgpr1_vgpr2 = V_FRACT_F64_e32 killed $vgpr1_vgpr2, implicit $mode, implicit $exec + S_BRANCH %bb.3 + + bb.3: + S_ENDPGM 0 +... +--- +# checks for a bug where if the first block is its own predecessor the initial mode was +# not correctly propagated, resulting in an unnecessary setreg +# CHECK-LABEL: name: first_block_loop +# CHECK-LABEL: bb.0: +# CHECK-NOT: S_SETREG + +name: first_block_loop + +body: | + bb.0: + successors: %bb.0, %bb.1 + S_CBRANCH_VCCZ %bb.0, implicit $vcc + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2 + liveins: $vgpr1_vgpr2 + $vgpr1_vgpr2 = V_FRACT_F64_e32 killed $vgpr1_vgpr2, implicit $mode, implicit $exec + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +...