Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -17,6 +17,7 @@ #include "AMDGPUArgumentUsageInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/IR/InstrTypes.h" @@ -59,6 +60,8 @@ }; bool isInstrUniform(const MachineInstr &MI) const; + bool isVCC(Register Reg, const MachineRegisterInfo &MRI) const; + /// tblgen-erated 'select' implementation. bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -59,8 +59,9 @@ const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; } -static bool isSCC(unsigned Reg, const MachineRegisterInfo &MRI) { - assert(!TargetRegisterInfo::isPhysicalRegister(Reg)); +static bool isSCC(Register Reg, const MachineRegisterInfo &MRI) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return Reg == AMDGPU::SCC; auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); const TargetRegisterClass *RC = @@ -76,15 +77,16 @@ return RB->getID() == AMDGPU::SCCRegBankID; } -static bool isVCC(unsigned Reg, const MachineRegisterInfo &MRI, - const SIRegisterInfo &TRI) { - assert(!TargetRegisterInfo::isPhysicalRegister(Reg)); +bool AMDGPUInstructionSelector::isVCC(Register Reg, + const MachineRegisterInfo &MRI) const { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return Reg == TRI.getVCC(); auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); const TargetRegisterClass *RC = RegClassOrBank.dyn_cast(); if (RC) { - return RC == TRI.getWaveMaskRegClass() && + return RC->hasSuperClassEq(TRI.getBoolRC()) && MRI.getType(Reg).getSizeInBits() == 1; } @@ -991,27 +993,41 @@ Register CondReg = CondOp.getReg(); const DebugLoc &DL = I.getDebugLoc(); + unsigned BrOpcode; + Register CondPhysReg; + const TargetRegisterClass *ConstrainRC; + + // In SelectionDAG, we inspect the IR block for uniformity metadata to decide + // whether the branch is uniform when selecting the instruction. In + // GlobalISel, we should push that decision into RegBankSelect. Assume for now + // RegBankSelect knows what it's doing if the branch condition is scc, even + // though it currently does not. if (isSCC(CondReg, MRI)) { - // In SelectionDAG, we inspect the IR block for uniformity metadata to decide - // whether the branch is uniform when selecting the instruction. In - // GlobalISel, we should push that decision into RegBankSelect. Assume for now - // RegBankSelect knows what it's doing if the branch condition is scc, even - // though it currently does not. - BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) - .addReg(CondReg); - if (!MRI.getRegClassOrNull(CondReg)) { - const TargetRegisterClass *RC - = TRI.getConstrainedRegClassForOperand(CondOp, MRI); - MRI.setRegClass(CondReg, RC); - } + CondPhysReg = AMDGPU::SCC; + BrOpcode = AMDGPU::S_CBRANCH_SCC1; + ConstrainRC = &AMDGPU::SReg_32_XM0RegClass; + } else if (isVCC(CondReg, MRI)) { + // FIXME: Do we have to insert an and with exec here, like in SelectionDAG? + // We sort of know that a VCC producer based on the register bank, that ands + // inactive lanes with 0. What if there was a logical operation with vcc + // producers in different blocks/with different exec masks? + // FIXME: Should scc->vcc copies and with exec? + CondPhysReg = TRI.getVCC(); + BrOpcode = AMDGPU::S_CBRANCH_VCCNZ; + ConstrainRC = TRI.getBoolRC(); + } else + return false; - BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_CBRANCH_SCC1)) - .addMBB(I.getOperand(1).getMBB()); - I.eraseFromParent(); - return true; - } + if (!MRI.getRegClassOrNull(CondReg)) + MRI.setRegClass(CondReg, ConstrainRC); - return false; + BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg) + .addReg(CondReg); + BuildMI(*BB, &I, DL, TII.get(BrOpcode)) + .addMBB(I.getOperand(1).getMBB()); + + I.eraseFromParent(); + return true; } bool AMDGPUInstructionSelector::selectG_FRAME_INDEX(MachineInstr &I) const { Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir @@ -4,7 +4,7 @@ # ERR-NOT: remark: # ERR: remark: :0:0: cannot select: G_BRCOND %1:sgpr(s1), %bb.1 (in function: brcond_sgpr) -# ERR-NEXT: remark: :0:0: cannot select: G_BRCOND %2:vcc(s1), %bb.1 (in function: brcond_vcc) +# ERR-NEXT: remark: :0:0: cannot select: G_BRCOND %1:vgpr(s1), %bb.1 (in function: brcond_vgpr) # ERR-NOT: remark: --- @@ -117,6 +117,34 @@ ... +--- + +name: brcond_vcc +legalized: true +regBankSelected: true + +body: | + ; GCN-LABEL: name: brcond_vcc + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN: $vcc = COPY [[V_CMP_EQ_U32_e64_]] + ; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; GCN: bb.1: + bb.0: + liveins: $vgpr0, $vgpr1 + + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vcc(s1) = G_ICMP intpred(eq), %0, %1 + G_BRCOND %2, %bb.1 + + bb.1: + +... + # Don't try to select this. --- @@ -143,30 +171,27 @@ ... - -# Don't try to select this for now. +# Don't try to select this. --- -name: brcond_vcc +name: brcond_vgpr legalized: true regBankSelected: true body: | - ; GCN-LABEL: name: brcond_vcc + ; GCN-LABEL: name: brcond_vgpr ; GCN: bb.0: ; GCN: successors: %bb.1(0x80000000) ; GCN: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GCN: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GCN: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; GCN: G_BRCOND [[ICMP]](s1), %bb.1 + ; GCN: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; GCN: G_BRCOND [[TRUNC]](s1), %bb.1 ; GCN: bb.1: bb.0: liveins: $vgpr0, $vgpr1 %0:vgpr(s32) = COPY $vgpr0 - %1:vgpr(s32) = COPY $vgpr1 - %2:vcc(s1) = G_ICMP intpred(eq), %0, %1 - G_BRCOND %2, %bb.1 + %1:vgpr(s1) = G_TRUNC %0 + G_BRCOND %1, %bb.1 bb.1: