diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1636,6 +1636,11 @@ return false; } + virtual MachineBasicBlock::iterator + getPHILoweringPoint(MachineBasicBlock &MBB) const { + return std::prev(MBB.SkipPHIsAndLabels(MBB.begin())); + } + /// Returns a \p outliner::OutlinedFunction struct containing target-specific /// information for a set of outlining candidates. virtual outliner::OutlinedFunction getOutliningCandidateInfo( diff --git a/llvm/lib/CodeGen/PHIElimination.cpp b/llvm/lib/CodeGen/PHIElimination.cpp --- a/llvm/lib/CodeGen/PHIElimination.cpp +++ b/llvm/lib/CodeGen/PHIElimination.cpp @@ -199,10 +199,9 @@ if (MBB.empty() || !MBB.front().isPHI()) return false; // Quick exit for basic blocks without PHIs. + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); // Get an iterator to the last PHI node. - MachineBasicBlock::iterator LastPHIIt = - std::prev(MBB.SkipPHIsAndLabels(MBB.begin())); - + MachineBasicBlock::iterator LastPHIIt = TII->getPHILoweringPoint(MBB); while (MBB.front().isPHI()) LowerPHINode(MBB, LastPHIIt); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -954,6 +954,9 @@ bool isBasicBlockPrologue(const MachineInstr &MI) const override; + MachineBasicBlock::iterator + getPHILoweringPoint(MachineBasicBlock &MBB) const override; + /// Return a partially built integer add instruction without carry. /// Caller must add source operands. /// For pre-GFX9 it will generate unused carry destination operand. diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6054,6 +6054,32 @@ MI.modifiesRegister(AMDGPU::EXEC, &RI); } +MachineBasicBlock::iterator +SIInstrInfo::getPHILoweringPoint(MachineBasicBlock &MBB) const { + // Canonicalize PHI usage in SI_END_CF. + auto MII = MBB.getFirstNonPHI(); + if (MII != MBB.end() && MII->getOpcode() == AMDGPU::SI_END_CF) { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + unsigned Reg = MII->getOperand(0).getReg(); + assert(MII->getOperand(0).getSubReg() == 0 && "Unexpected subregister"); + // Check whether it's defined from PHI. + auto Def = MRI.getVRegDef(Reg); + if (Def->isPHI()) { + // It should a trivial PHI. Such a PHI node may be inserted as the last + // LCSSA before isel. It's meanless and should be eliminated. + assert(Def->getNumOperands() == 3 && "Unexpected PHI fed into SI_END_CF"); + // Eliminate this trivial PHI. + unsigned SrcReg = Def->getOperand(1).getReg(); + assert(Def->getOperand(1).getSubReg() == 0 && + MRI.constrainRegClass(SrcReg, MRI.getRegClass(Reg)) && + "Unexpected source register"); + MRI.replaceRegWith(Reg, SrcReg); + Def->eraseFromParent(); + } + } + return TargetInstrInfo::getPHILoweringPoint(MBB); +} + MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, diff --git a/llvm/test/CodeGen/AMDGPU/lcssa-optnone.ll b/llvm/test/CodeGen/AMDGPU/lcssa-optnone.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lcssa-optnone.ll @@ -0,0 +1,26 @@ +; RUN: llc -verify-machineinstrs -march=amdgcn -O0 -o - %s | FileCheck %s + +; CHECK-LABEL: non_uniform_loop +; CHECK: s_endpgm +define amdgpu_kernel void @non_uniform_loop(float addrspace(1)* %array) { +entry: + %w = tail call i32 @llvm.amdgcn.workitem.id.x() + br label %for.cond + +for.cond: + %i = phi i32 [0, %entry], [%i.next, %for.inc] + %cmp = icmp ult i32 %i, %w + br i1 %cmp, label %for.body, label %for.end + +for.body: + br label %for.inc + +for.inc: + %i.next = add i32 %i, 1 + br label %for.cond + +for.end: + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x()