Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2465,6 +2465,27 @@ return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); } +static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI) { + if (Reg.isPhysical()) + return false; + + MachineInstr &MI = *MRI.getUniqueVRegDef(Reg); + const unsigned Opcode = MI.getOpcode(); + + if (Opcode == AMDGPU::COPY) + return isVCmpResult(MI.getOperand(1).getReg(), MRI); + + if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR || + Opcode == AMDGPU::G_XOR) + return isVCmpResult(MI.getOperand(1).getReg(), MRI) && + isVCmpResult(MI.getOperand(2).getReg(), MRI); + + if (Opcode == TargetOpcode::G_INTRINSIC) + return MI.getIntrinsicID() == Intrinsic::amdgcn_class; + + return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP; +} + bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); MachineOperand &CondOp = I.getOperand(0); @@ -2488,11 +2509,24 @@ BrOpcode = AMDGPU::S_CBRANCH_SCC1; ConstrainRC = &AMDGPU::SReg_32RegClass; } else { - // FIXME: Do we have to insert an and with exec here, like in SelectionDAG? - // We sort of know that a VCC producer based on the register bank, that ands - // inactive lanes with 0. What if there was a logical operation with vcc - // producers in different blocks/with different exec masks? // FIXME: Should scc->vcc copies and with exec? + + // Unless the value of CondReg is a result of a V_CMP* instruction then we + // need to insert an and with exec. + if (!isVCmpResult(CondReg, *MRI)) { + const bool Is64 = STI.isWave64(); + const TargetRegisterClass *RC = + Is64 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass; + const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32; + const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO; + + Register TmpReg = MRI->createVirtualRegister(RC); + BuildMI(*BB, &I, DL, TII.get(Opcode), TmpReg) + .addReg(CondReg) + .addReg(Exec); + CondReg = TmpReg; + } + CondPhysReg = TRI.getVCC(); BrOpcode = AMDGPU::S_CBRANCH_VCCNZ; ConstrainRC = TRI.getBoolRC(); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir @@ -174,3 +174,138 @@ bb.1: ... + +--- + +name: brcond_class_intrinsic +legalized: true +regBankSelected: true + +body: | + ; GCN-LABEL: name: brcond_class_intrinsic + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec + ; GCN: $vcc = COPY [[V_CMP_CLASS_F32_e64_]] + ; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; GCN: bb.1: + bb.0: + liveins: $vgpr0, $vgpr1 + + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0:vgpr(s32), %1:vgpr(s32) + G_BRCOND %2(s1), %bb.1 + + bb.1: + +... + +--- + +name: brcond_cmp_logic +legalized: true +regBankSelected: true + +body: | + ; GCN-LABEL: name: brcond_cmp_logic + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN: %5:sreg_64 = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec + ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ_U32_e64_]], %5, implicit-def dead $scc + ; GCN: $vcc = COPY [[S_AND_B64_]] + ; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; GCN: bb.1: + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = COPY $vgpr3 + %4:vcc(s1) = G_ICMP intpred(eq), %0, %1 + %5:vcc(s1) = G_FCMP floatpred(oeq), %2, %3 + %6:vcc(s1) = G_AND %4, %5 + G_BRCOND %6(s1), %bb.1 + + bb.1: + +... + +--- + +name: brcond_logic +legalized: true +regBankSelected: true + +body: | + ; GCN-LABEL: name: brcond_logic + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY2]], implicit-def $scc + ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec + ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc + ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[S_AND_B64_]], $exec, implicit-def $scc + ; GCN: $vcc = COPY [[S_AND_B64_1]] + ; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; GCN: bb.1: + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1 + + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:sgpr(s32) = COPY $sgpr0 + %3:sgpr(s1) = G_TRUNC %2(s32) + %4:vcc(s1) = COPY %3(s1) + %5:vcc(s1) = G_ICMP intpred(eq), %0, %1 + %6:vcc(s1) = G_AND %5, %4 + G_BRCOND %6(s1), %bb.1 + + bb.1: + +... + +--- + +name: brcond_logic_const +legalized: true +regBankSelected: true + +body: | + ; GCN-LABEL: name: brcond_logic_const + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[V_CMP_EQ_U32_e64_]], [[S_MOV_B64_]], implicit-def dead $scc + ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[S_XOR_B64_]], $exec, implicit-def $scc + ; GCN: $vcc = COPY [[S_AND_B64_]] + ; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; GCN: bb.1: + bb.0: + liveins: $vgpr0, $vgpr1 + + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vcc(s1) = G_ICMP intpred(eq), %0, %1 + %3:sgpr(s1) = G_CONSTANT i1 true + %4:vcc(s1) = COPY %3(s1) + %5:vcc(s1) = G_XOR %2, %4 + G_BRCOND %5(s1), %bb.1 + + bb.1: + +...