Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2488,11 +2488,25 @@ BrOpcode = AMDGPU::S_CBRANCH_SCC1; ConstrainRC = &AMDGPU::SReg_32RegClass; } else { - // FIXME: Do we have to insert an and with exec here, like in SelectionDAG? - // We sort of know that a VCC producer based on the register bank, that ands - // inactive lanes with 0. What if there was a logical operation with vcc - // producers in different blocks/with different exec masks? // FIXME: Should scc->vcc copies and with exec? + + // If there was an instruction other than V_CMP then we need to insert an + // and with exec. + const unsigned CondDefOpc = MRI->getUniqueVRegDef(CondReg)->getOpcode(); + if (CondDefOpc != AMDGPU::G_ICMP && CondDefOpc != AMDGPU::G_FCMP) { + const bool Is64 = STI.isWave64(); + const TargetRegisterClass *RC = + Is64 ? &AMDGPU::SReg_64RegClass : &AMDGPU::SReg_32RegClass; + const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32; + const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO; + + Register TmpReg = MRI->createVirtualRegister(RC); + BuildMI(*BB, &I, DL, TII.get(Opcode), TmpReg) + .addReg(CondReg) + .addReg(Exec); + CondReg = TmpReg; + } + CondPhysReg = TRI.getVCC(); BrOpcode = AMDGPU::S_CBRANCH_VCCNZ; ConstrainRC = TRI.getBoolRC(); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-brcond.mir @@ -174,3 +174,40 @@ bb.1: ... + +--- + +name: brcond_vcc_not_cmp +legalized: true +regBankSelected: true + +body: | + ; GCN-LABEL: name: brcond_vcc_not_cmp + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY2]], [[COPY3]], implicit $exec + ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc + ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[S_AND_B64_]], $exec, implicit-def $scc + ; GCN: $vcc = COPY [[S_AND_B64_1]] + ; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc + ; GCN: bb.1: + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = COPY $vgpr3 + %4:vcc(s1) = G_ICMP intpred(eq), %0, %1 + %5:vcc(s1) = G_ICMP intpred(eq), %2, %3 + %6:vcc(s1) = G_AND %4, %5 + G_BRCOND %6(s1), %bb.1 + + bb.1: + +...