diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -39,6 +39,7 @@ FunctionPass *createSIOptimizeExecMaskingPreRAPass(); FunctionPass *createSIOptimizeVGPRLiveRangePass(); FunctionPass *createSIFixSGPRCopiesPass(); +FunctionPass *createSIFixSGPRLivenessPass(); FunctionPass *createSIMemoryLegalizerPass(); FunctionPass *createSIInsertWaitcntsPass(); FunctionPass *createSIPreAllocateWWMRegsPass(); @@ -174,6 +175,9 @@ void initializeSIFixSGPRCopiesPass(PassRegistry &); extern char &SIFixSGPRCopiesID; +void initializeSIFixSGPRLivenessPass(PassRegistry &); +extern char &SIFixSGPRLivenessID; + void initializeSIFixVGPRCopiesPass(PassRegistry &); extern char &SIFixVGPRCopiesID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -348,6 +348,7 @@ initializeGCNDPPCombinePass(*PR); initializeSILowerI1CopiesPass(*PR); initializeSILowerSGPRSpillsPass(*PR); + initializeSIFixSGPRLivenessPass(*PR); initializeSIFixSGPRCopiesPass(*PR); initializeSIFixVGPRCopiesPass(*PR); initializeSIFoldOperandsPass(*PR); @@ -1241,6 +1242,7 @@ bool GCNPassConfig::addInstSelector() { AMDGPUPassConfig::addInstSelector(); + addPass(&SIFixSGPRLivenessID); addPass(&SIFixSGPRCopiesID); addPass(createSILowerI1CopiesPass()); return false; diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -132,6 +132,7 @@ R600TargetTransformInfo.cpp SIAnnotateControlFlow.cpp SIFixSGPRCopies.cpp + SIFixSGPRLiveness.cpp SIFixVGPRCopies.cpp SIFoldOperands.cpp SIFormMemoryClauses.cpp diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRLiveness.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRLiveness.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRLiveness.cpp @@ -0,0 +1,152 @@ +//===- SIFixSGPRLiveness.cpp - Fix SGPR Liveness --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Consider a simple example: +/// BB1: %1:SGPR = COPY 1 +/// | \ +/// | BB2: %2:SGPR = IMPLICIT_DEF +/// | / +/// BB3: %3 = phi (%2, BB2), (%1, BB1) +/// +/// BB1 ends with a divergent branch. The virtual registers shown in above +/// example are dead in BB2. After register allocation, they may end up +/// being put in the same physical register, and the liveness of the register +/// does not cover the range of BB2. If the corresponding physical register +/// is reused in BB2, the content of the physical register will be overwritten. +/// The idea in this pass is to extend the liveness of %1 through BB2. +/// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/InitializePasses.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +#define DEBUG_TYPE "si-fix-sgpr-liveness" + +namespace { + +class SIFixSGPRLiveness : public MachineFunctionPass { + MachineDominatorTree *MDT; + const SIInstrInfo *TII; + +public: + static char ID; + + MachineRegisterInfo *MRI; + const SIRegisterInfo *TRI; + + SIFixSGPRLiveness() : MachineFunctionPass(ID) {} + + bool extendSGPRLiveRangeForPHI(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { return "SI Fix SGPR Liveness"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +} // end anonymous namespace + +INITIALIZE_PASS_BEGIN(SIFixSGPRLiveness, DEBUG_TYPE, "SI Fix SGPR Liveness", + false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(SIFixSGPRLiveness, DEBUG_TYPE, "SI Fix SGPR Liveness", + false, false) + +char SIFixSGPRLiveness::ID = 0; + +char &llvm::SIFixSGPRLivenessID = SIFixSGPRLiveness::ID; + +FunctionPass *llvm::createSIFixSGPRLivenessPass() { + return new SIFixSGPRLiveness(); +} + +bool SIFixSGPRLiveness::extendSGPRLiveRangeForPHI(MachineFunction &MF) { + bool Changed = false; + for (auto &MBB : MF) { + for (auto &MI : MBB.phis()) { + Register DstReg = MI.getOperand(0).getReg(); + if (!TRI->isSGPRClass(MRI->getRegClass(DstReg))) + continue; + SmallVector, 4> Undefs; + MachineOperand *SingleOp = nullptr; + MachineBasicBlock *DominateMBB = nullptr; + for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { + MachineOperand *SrcOp = &MI.getOperand(i); + MachineBasicBlock *SrcMBB = MI.getOperand(i + 1).getMBB(); + + if (SrcOp->isReg() && SrcOp->getReg().isVirtual()) { + MachineInstr *Def = MRI->getVRegDef(SrcOp->getReg()); + if (Def && Def->isImplicitDef()) { + Undefs.push_back(std::make_pair(SrcOp, SrcMBB)); + continue; + } + } + + if (!SingleOp) { + SingleOp = SrcOp; + DominateMBB = SrcMBB; + } else if (SrcOp->isIdenticalTo(*SingleOp)) { + // If there are several predecessors with the same incoming value, we + // need to find the dominate predecessor. + if (MDT->dominates(SrcMBB, DominateMBB)) + DominateMBB = SrcMBB; + } else { + SingleOp = nullptr; + break; + } + } + + if (!SingleOp || Undefs.empty() || !TII->hasDivergentBranch(DominateMBB)) + continue; + + // If we have one non-undef incoming value that dominates all the + // other predecessors with undef incoming value, make it alive through all + // paths from the DominateMBB to MBB. + if (all_of(Undefs, [&](auto UD) { + return MDT->dominates(DominateMBB, UD.second); + })) { + assert(SingleOp->isReg()); + for (auto UndefOp : Undefs) { + // Bypass predecessor which is dominated by MBB to exclude backedge + // case. + if (MDT->dominates(&MBB, UndefOp.second)) + continue; + + UndefOp.first->setIsUndef(false); + UndefOp.first->setReg(SingleOp->getReg()); + UndefOp.first->setSubReg(SingleOp->getSubReg()); + } + Changed = true; + } + } + } + return Changed; +} + +bool SIFixSGPRLiveness::runOnMachineFunction(MachineFunction &MF) { + const GCNSubtarget &ST = MF.getSubtarget(); + MRI = &MF.getRegInfo(); + TRI = ST.getRegisterInfo(); + TII = ST.getInstrInfo(); + MDT = &getAnalysis(); + + return extendSGPRLiveRangeForPHI(MF); +} diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-liveness.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-liveness.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-liveness.mir @@ -0,0 +1,233 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -run-pass=si-fix-sgpr-liveness -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s + +--- +name: extend_sgpr_liveness_basic +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: extend_sgpr_liveness_basic + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; GCN-NEXT: liveins: $sgpr0_sgpr1, $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY1]], 1, implicit $exec + ; GCN-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.2(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_64 = IMPLICIT_DEF + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: [[PHI:%[0-9]+]]:sgpr_64 = PHI [[COPY]], %bb.0, [[COPY]], %bb.1 + ; GCN-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[PHI]] + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0 + + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:vgpr_32 = COPY $vgpr0 + %2:sreg_64 = V_CMP_EQ_U32_e64 %1, 1, implicit $exec + %3:sreg_64 = SI_IF %2, %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + S_BRANCH %bb.1 + + bb.1: + %4:sgpr_64 = IMPLICIT_DEF + S_BRANCH %bb.2 + + bb.2: + %5:sgpr_64 = PHI %0, %bb.0, %4, %bb.1 + SI_END_CF %3, implicit-def $exec, implicit-def $scc, implicit $exec + $sgpr0_sgpr1 = COPY %5 +... + +# The SGPR liveness needs extension through all dominated undef predecessors. +# Currently we do not generate such CFG, this is just used to show the pass +# works fine for such situation. +--- +name: uniform_branch_nested_inside +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: uniform_branch_nested_inside + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.5(0x40000000), %bb.1(0x40000000) + ; GCN-NEXT: liveins: $sgpr0_sgpr1, $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY1]], 1, implicit $exec + ; GCN-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_CBRANCH_SCC1 %bb.3, implicit undef $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: successors: %bb.5(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_64 = IMPLICIT_DEF + ; GCN-NEXT: S_BRANCH %bb.5 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.3: + ; GCN-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_CBRANCH_SCC1 %bb.5, implicit undef $scc + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.4: + ; GCN-NEXT: successors: %bb.5(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[DEF1:%[0-9]+]]:sgpr_64 = IMPLICIT_DEF + ; GCN-NEXT: S_BRANCH %bb.5 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.5: + ; GCN-NEXT: [[PHI:%[0-9]+]]:sgpr_64 = PHI [[COPY]], %bb.3, [[COPY]], %bb.0, [[COPY]], %bb.2, [[COPY]], %bb.4 + ; GCN-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[PHI]] + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0 + + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:vgpr_32 = COPY $vgpr0 + %2:sreg_64 = V_CMP_EQ_U32_e64 %1, 1, implicit $exec + %3:sreg_64 = SI_IF %2, %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + S_BRANCH %bb.1 + + bb.1: + S_CBRANCH_SCC1 %bb.3, implicit undef $scc + + bb.2: + %4:sgpr_64 = IMPLICIT_DEF + S_BRANCH %bb.5 + + bb.3: + S_CBRANCH_SCC1 %bb.5, implicit undef $scc + + bb.4: + %5:sgpr_64 = IMPLICIT_DEF + S_BRANCH %bb.5 + + bb.5: + %6:sgpr_64 = PHI %0, %bb.3, %0, %bb.0, %4, %bb.2, %5, %bb.4 + SI_END_CF %3, implicit-def $exec, implicit-def $scc, implicit $exec + $sgpr0_sgpr1 = COPY %6 +... + +# No need to extend SGPR liveness for backedge case. + +--- +name: no_liveness_extension_for_backedge +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: no_liveness_extension_for_backedge + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: liveins: $sgpr0_sgpr1, $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.0, %3, %bb.1 + ; GCN-NEXT: [[PHI1:%[0-9]+]]:sgpr_64 = PHI [[COPY]], %bb.0, %5, %bb.1 + ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; GCN-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_64 = SI_IF_BREAK [[DEF]], [[PHI]], implicit-def dead $scc + ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; GCN-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: [[PHI2:%[0-9]+]]:sreg_64 = PHI [[SI_IF_BREAK]], %bb.1 + ; GCN-NEXT: SI_END_CF [[PHI2]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0 + + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64 = S_MOV_B64 0 + + bb.1: + %2:sreg_64 = PHI %1, %bb.0, %3, %bb.1 + %7:sgpr_64 = PHI %0, %bb.0, %8, %bb.1 + %5:sreg_64 = IMPLICIT_DEF + %3:sreg_64 = SI_IF_BREAK %5, %2, implicit-def dead $scc + %8:sreg_64 = IMPLICIT_DEF + SI_LOOP %3, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.2 + + bb.2: + %4:sreg_64 = PHI %3, %bb.1 + SI_END_CF %4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_ENDPGM 0 +... +# No SGPR liveness extension for backedge of loop inside if-then. +--- +name: backedge_of_loop_inside_if_then +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: backedge_of_loop_inside_if_then + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; GCN-NEXT: liveins: $sgpr0_sgpr1, $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1 + ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY1]], 1, implicit $exec + ; GCN-NEXT: [[SI_IF:%[0-9]+]]:sreg_64 = SI_IF [[V_CMP_EQ_U32_e64_]], %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.0, %6, %bb.1 + ; GCN-NEXT: [[PHI1:%[0-9]+]]:sgpr_64 = PHI [[COPY]], %bb.0, %8, %bb.1 + ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; GCN-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_64 = SI_IF_BREAK [[DEF]], [[PHI]], implicit-def dead $scc + ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; GCN-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: successors: %bb.3(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[PHI2:%[0-9]+]]:sreg_64 = PHI [[SI_IF_BREAK]], %bb.1 + ; GCN-NEXT: SI_END_CF [[PHI2]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.3: + ; GCN-NEXT: SI_END_CF [[SI_IF]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0 + + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64 = S_MOV_B64 0 + %10:vgpr_32 = COPY $vgpr0 + %11:sreg_64 = V_CMP_EQ_U32_e64 %10, 1, implicit $exec + %12:sreg_64 = SI_IF %11, %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + + bb.1: + %2:sreg_64 = PHI %1, %bb.0, %3, %bb.1 + %7:sgpr_64 = PHI %0, %bb.0, %8, %bb.1 + %5:sreg_64 = IMPLICIT_DEF + %3:sreg_64 = SI_IF_BREAK %5, %2, implicit-def dead $scc + %8:sreg_64 = IMPLICIT_DEF + SI_LOOP %3, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.2 + + bb.2: + %4:sreg_64 = PHI %3, %bb.1 + SI_END_CF %4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.3 + + bb.3: + SI_END_CF %12, implicit-def $exec, implicit-def $scc, implicit $exec + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -93,6 +93,7 @@ ; GCN-O0-NEXT: Legacy Divergence Analysis ; GCN-O0-NEXT: AMDGPU DAG->DAG Pattern Instruction Selection ; GCN-O0-NEXT: MachineDominator Tree Construction +; GCN-O0-NEXT: SI Fix SGPR Liveness ; GCN-O0-NEXT: SI Fix SGPR copies ; GCN-O0-NEXT: MachinePostDominator Tree Construction ; GCN-O0-NEXT: SI Lower i1 Copies @@ -276,6 +277,7 @@ ; GCN-O1-NEXT: Lazy Block Frequency Analysis ; GCN-O1-NEXT: AMDGPU DAG->DAG Pattern Instruction Selection ; GCN-O1-NEXT: MachineDominator Tree Construction +; GCN-O1-NEXT: SI Fix SGPR Liveness ; GCN-O1-NEXT: SI Fix SGPR copies ; GCN-O1-NEXT: MachinePostDominator Tree Construction ; GCN-O1-NEXT: SI Lower i1 Copies @@ -557,6 +559,7 @@ ; GCN-O1-OPTS-NEXT: Lazy Block Frequency Analysis ; GCN-O1-OPTS-NEXT: AMDGPU DAG->DAG Pattern Instruction Selection ; GCN-O1-OPTS-NEXT: MachineDominator Tree Construction +; GCN-O1-OPTS-NEXT: SI Fix SGPR Liveness ; GCN-O1-OPTS-NEXT: SI Fix SGPR copies ; GCN-O1-OPTS-NEXT: MachinePostDominator Tree Construction ; GCN-O1-OPTS-NEXT: SI Lower i1 Copies @@ -847,6 +850,7 @@ ; GCN-O2-NEXT: Lazy Block Frequency Analysis ; GCN-O2-NEXT: AMDGPU DAG->DAG Pattern Instruction Selection ; GCN-O2-NEXT: MachineDominator Tree Construction +; GCN-O2-NEXT: SI Fix SGPR Liveness ; GCN-O2-NEXT: SI Fix SGPR copies ; GCN-O2-NEXT: MachinePostDominator Tree Construction ; GCN-O2-NEXT: SI Lower i1 Copies @@ -1151,6 +1155,7 @@ ; GCN-O3-NEXT: Lazy Block Frequency Analysis ; GCN-O3-NEXT: AMDGPU DAG->DAG Pattern Instruction Selection ; GCN-O3-NEXT: MachineDominator Tree Construction +; GCN-O3-NEXT: SI Fix SGPR Liveness ; GCN-O3-NEXT: SI Fix SGPR copies ; GCN-O3-NEXT: MachinePostDominator Tree Construction ; GCN-O3-NEXT: SI Lower i1 Copies diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-liveness.ll b/llvm/test/CodeGen/AMDGPU/sgpr-liveness.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sgpr-liveness.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN +; +; This is a test to show that we need to extend the liveness of SGPR register to +; stop being overwritten. The example IR has a divergent if-then, and the phi %c2 +; only has meaningful incoming value from %entry. To achieve the goal of keeping +; more values as uniform, the divergence/uniform analysis will mark %c2 as uniform. +; For AMDGPU backend, we will assign SGPR for %c2 and its incoming values. Without +; making the value alive in block %if, it might be overwritten in block %if. +; Usually this issued does not happend because middle-end optimization tends to +; simplify %c2 as %c. But this case happens after subsequent structurizer change. +; +define amdgpu_ps float @sgpr_live_through_all_paths(float inreg %c, float %v, i32 %x, i32 %y) #0 { +; GCN-LABEL: sgpr_live_through_all_paths: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: v_cmp_lt_i32_e64 s2, v2, v1 +; GCN-NEXT: s_mov_b32 s1, exec_lo +; GCN-NEXT: s_and_b32 s2, s1, s2 +; GCN-NEXT: s_mov_b32 exec_lo, s2 +; GCN-NEXT: s_cbranch_execz .LBB0_2 +; GCN-NEXT: ; %bb.1: ; %if +; GCN-NEXT: s_mov_b32 s2, 2.0 +; GCN-NEXT: v_div_scale_f32 v1, s3, s2, s2, v0 +; GCN-NEXT: v_rcp_f32_e64 v2, v1 +; GCN-NEXT: s_mov_b32 s3, 1.0 +; GCN-NEXT: v_fma_f32 v3, -v1, v2, s3 +; GCN-NEXT: v_fmac_f32_e64 v2, v3, v2 +; GCN-NEXT: v_div_scale_f32 v3, vcc_lo, v0, s2, v0 +; GCN-NEXT: v_mul_f32_e64 v4, v3, v2 +; GCN-NEXT: v_fma_f32 v5, -v1, v4, v3 +; GCN-NEXT: v_fmac_f32_e64 v4, v5, v2 +; GCN-NEXT: v_fma_f32 v1, -v1, v4, v3 +; GCN-NEXT: v_div_fmas_f32 v1, v1, v2, v4 +; GCN-NEXT: v_div_fixup_f32 v0, v1, s2, v0 +; GCN-NEXT: .LBB0_2: ; %end +; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GCN-NEXT: v_add_f32_e64 v0, v0, s0 +; GCN-NEXT: ; return to shader part epilog +entry: + %cc = icmp slt i32 %y, %x + br i1 %cc, label %if, label %end + +if: + %v.if = fdiv float %v, 2.0 + br label %end + +end: + %v2 = phi float [ %v.if, %if ], [ %v, %entry ] + %c2 = phi float [ undef, %if ], [ %c, %entry ] + %r = fadd float %v2, %c2 + ret float %r +} + +attributes #0 = { nounwind optnone noinline }