Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2348,14 +2348,17 @@ unsigned DstSize = RI.getRegSizeInBits(*DstRC); if (DstSize == 32) { - unsigned SelOp = Pred == SCC_TRUE ? - AMDGPU::S_CSELECT_B32 : AMDGPU::V_CNDMASK_B32_e32; - - // Instruction's operands are backwards from what is expected. - MachineInstr *Select = - BuildMI(MBB, I, DL, get(SelOp), DstReg) - .addReg(FalseReg) - .addReg(TrueReg); + MachineInstr *Select; + if (Pred == SCC_TRUE) { + Select = BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B32), DstReg) + .addReg(TrueReg) + .addReg(FalseReg); + } else { + // Instruction's operands are backwards from what is expected. + Select = BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e32), DstReg) + .addReg(FalseReg) + .addReg(TrueReg); + } preserveCondRegFlags(Select->getOperand(3), Cond[1]); return; @@ -2364,8 +2367,8 @@ if (DstSize == 64 && Pred == SCC_TRUE) { MachineInstr *Select = BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg) - .addReg(FalseReg) - .addReg(TrueReg); + .addReg(TrueReg) + .addReg(FalseReg); preserveCondRegFlags(Select->getOperand(3), Cond[1]); return; @@ -2416,10 +2419,19 @@ unsigned SubIdx = SubIndices[Idx]; - MachineInstr *Select = - BuildMI(MBB, I, DL, get(SelOp), DstElt) - .addReg(FalseReg, 0, SubIdx) - .addReg(TrueReg, 0, SubIdx); + MachineInstr *Select; + if (SelOp == AMDGPU::V_CNDMASK_B32_e32) { + Select = + BuildMI(MBB, I, DL, get(SelOp), DstElt) + .addReg(FalseReg, 0, SubIdx) + .addReg(TrueReg, 0, SubIdx); + } else { + Select = + BuildMI(MBB, I, DL, get(SelOp), DstElt) + .addReg(TrueReg, 0, SubIdx) + .addReg(FalseReg, 0, SubIdx); + } + preserveCondRegFlags(Select->getOperand(3), Cond[1]); fixImplicitOperands(*Select); Index: llvm/test/CodeGen/AMDGPU/early-if-convert.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/early-if-convert.ll +++ llvm/test/CodeGen/AMDGPU/early-if-convert.ll @@ -246,7 +246,7 @@ ; GCN: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x0 ; GCN: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], [[VAL]] ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 1 -; GCN-NEXT: s_cselect_b32 [[SELECT:s[0-9]+]], [[ADD]], [[VAL]] +; GCN-NEXT: s_cselect_b32 [[SELECT:s[0-9]+]], [[VAL]], [[ADD]] define amdgpu_kernel void @test_scc1_sgpr_ifcvt_triangle(i32 addrspace(4)* %in, i32 %cond) #0 { entry: %v = load i32, i32 addrspace(4)* %in @@ -362,7 +362,7 @@ ; GCN-LABEL: {{^}}uniform_if_swap_br_targets_scc_constant_select: ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 -; GCN: s_cselect_b32 s{{[0-9]+}}, 1, 0{{$}} +; GCN: s_cselect_b32 s{{[0-9]+}}, 0, 1{{$}} define amdgpu_kernel void @uniform_if_swap_br_targets_scc_constant_select(i32 %cond, i32 addrspace(1)* %out) { entry: %cmp0 = icmp eq i32 %cond, 0 @@ -383,7 +383,7 @@ ; GCN-LABEL: {{^}}ifcvt_undef_scc: ; GCN: {{^}}; %bb.0: ; GCN-NEXT: s_load_dwordx2 -; GCN-NEXT: s_cselect_b32 s{{[0-9]+}}, 1, 0 +; GCN-NEXT: s_cselect_b32 s{{[0-9]+}}, 0, 1{{$}} define amdgpu_kernel void @ifcvt_undef_scc(i32 %cond, i32 addrspace(1)* %out) { entry: br i1 undef, label %else, label %if