Index: lib/Target/AMDGPU/SIInsertSkips.cpp =================================================================== --- lib/Target/AMDGPU/SIInsertSkips.cpp +++ lib/Target/AMDGPU/SIInsertSkips.cpp @@ -210,65 +210,73 @@ switch (MI.getOperand(2).getImm()) { case ISD::SETOEQ: case ISD::SETEQ: - Opcode = AMDGPU::V_CMPX_EQ_F32_e32; + Opcode = AMDGPU::V_CMPX_EQ_F32_e64; break; case ISD::SETOGT: case ISD::SETGT: - Opcode = AMDGPU::V_CMPX_LT_F32_e32; + Opcode = AMDGPU::V_CMPX_LT_F32_e64; break; case ISD::SETOGE: case ISD::SETGE: - Opcode = AMDGPU::V_CMPX_LE_F32_e32; + Opcode = AMDGPU::V_CMPX_LE_F32_e64; break; case ISD::SETOLT: case ISD::SETLT: - Opcode = AMDGPU::V_CMPX_GT_F32_e32; + Opcode = AMDGPU::V_CMPX_GT_F32_e64; break; case ISD::SETOLE: case ISD::SETLE: - Opcode = AMDGPU::V_CMPX_GE_F32_e32; + Opcode = AMDGPU::V_CMPX_GE_F32_e64; break; case ISD::SETONE: case ISD::SETNE: - Opcode = AMDGPU::V_CMPX_LG_F32_e32; + Opcode = AMDGPU::V_CMPX_LG_F32_e64; break; case ISD::SETO: - Opcode = AMDGPU::V_CMPX_O_F32_e32; + Opcode = AMDGPU::V_CMPX_O_F32_e64; break; case ISD::SETUO: - Opcode = AMDGPU::V_CMPX_U_F32_e32; + Opcode = AMDGPU::V_CMPX_U_F32_e64; break; case ISD::SETUEQ: - Opcode = AMDGPU::V_CMPX_NLG_F32_e32; + Opcode = AMDGPU::V_CMPX_NLG_F32_e64; break; case ISD::SETUGT: - Opcode = AMDGPU::V_CMPX_NGE_F32_e32; + Opcode = AMDGPU::V_CMPX_NGE_F32_e64; break; case ISD::SETUGE: - Opcode = AMDGPU::V_CMPX_NGT_F32_e32; + Opcode = AMDGPU::V_CMPX_NGT_F32_e64; break; case ISD::SETULT: - Opcode = AMDGPU::V_CMPX_NLE_F32_e32; + Opcode = AMDGPU::V_CMPX_NLE_F32_e64; break; case ISD::SETULE: - Opcode = AMDGPU::V_CMPX_NLT_F32_e32; + Opcode = AMDGPU::V_CMPX_NLT_F32_e64; break; case ISD::SETUNE: - Opcode = AMDGPU::V_CMPX_NEQ_F32_e32; + Opcode = AMDGPU::V_CMPX_NEQ_F32_e64; break; default: llvm_unreachable("invalid ISD:SET cond code"); } - // TODO: Allow this: - if (!MI.getOperand(0).isReg() || - !TRI->isVGPR(MBB.getParent()->getRegInfo(), - MI.getOperand(0).getReg())) - llvm_unreachable("SI_KILL operand should be a VGPR"); - - BuildMI(MBB, &MI, DL, TII->get(Opcode)) - .add(MI.getOperand(1)) - .add(MI.getOperand(0)); + assert(MI.getOperand(0).isReg()); + + if (TRI->isVGPR(MBB.getParent()->getRegInfo(), + MI.getOperand(0).getReg())) { + Opcode = AMDGPU::getVOPe32(Opcode); + BuildMI(MBB, &MI, DL, TII->get(Opcode)) + .add(MI.getOperand(1)) + .add(MI.getOperand(0)); + } else { + BuildMI(MBB, &MI, DL, TII->get(Opcode)) + .addReg(AMDGPU::VCC, RegState::Define) + .addImm(0) // src0 modifiers + .add(MI.getOperand(1)) + .addImm(0) // src1 modifiers + .add(MI.getOperand(0)) + .addImm(0); // omod + } break; } case AMDGPU::SI_KILL_I1_TERMINATOR: { Index: test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll @@ -234,6 +234,23 @@ ret void } +; This checks that we use the 64-bit encoding when the operand is a SGPR. +; SI-LABEL: {{^}}test_sgpr: +; SI: v_cmpx_ge_f32_e64 +define amdgpu_ps void @test_sgpr(float inreg %a) #0 { + %c = fcmp ole float %a, 1.000000e+00 + call void @llvm.amdgcn.kill(i1 %c) #1 + ret void +} + +; SI-LABEL: {{^}}test_non_inline_imm_sgpr: +; SI-NOT: v_cmpx_ge_f32_e64 +define amdgpu_ps void @test_non_inline_imm_sgpr(float inreg %a) #0 { + %c = fcmp ole float %a, 1.500000e+00 + call void @llvm.amdgcn.kill(i1 %c) #1 + ret void +} + declare void @llvm.amdgcn.kill(i1) #0 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 declare i1 @llvm.amdgcn.wqm.vote(i1)