Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -303,6 +303,7 @@ INTERP_P1, INTERP_P2, PC_ADD_REL_OFFSET, + KILL, FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, STORE_MSKOR, LOAD_CONSTANT, Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2871,6 +2871,7 @@ NODE_NAME_CASE(BUILD_VERTICAL_VECTOR) NODE_NAME_CASE(CONST_DATA_PTR) NODE_NAME_CASE(PC_ADD_REL_OFFSET) + NODE_NAME_CASE(KILL) case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; NODE_NAME_CASE(SENDMSG) NODE_NAME_CASE(INTERP_MOV) Index: lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -40,6 +40,8 @@ [SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<4>] >; +def AMDGPUKillSDT : SDTypeProfile<0, 1, [SDTCisInt<0>]>; + //===----------------------------------------------------------------------===// // AMDGPU DAG Nodes // @@ -245,6 +247,9 @@ SDTypeProfile<1, 4, [SDTCisFP<0>]>, [SDNPInGlue]>; +def AMDGPUkill : SDNode<"AMDGPUISD::KILL", AMDGPUKillSDT, + [SDNPHasChain, SDNPSideEffect]>; + //===----------------------------------------------------------------------===// // Flow Control Profile Types //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1961,12 +1961,18 @@ Op->getVTList(), Ops, VT, MMO); } case AMDGPUIntrinsic::AMDGPU_kill: { - if (const ConstantFPSDNode *K = dyn_cast(Op.getOperand(2))) { + SDValue Src = Op.getOperand(2); + if (const ConstantFPSDNode *K = dyn_cast(Src)) { if (!K->isNegative()) return Chain; + + return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, + Chain, + DAG.getTargetConstant(-1, DL, MVT::i32)); } - return Op; + SDValue Cast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Src); + return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, Cast); } default: return SDValue(); Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -1983,7 +1983,7 @@ let Uses = [EXEC], Defs = [EXEC,VCC] in { def SI_KILL : InstSI < (outs), (ins VSrc_32:$src), "", - [(int_AMDGPU_kill f32:$src)]> { + [(AMDGPUkill i32:$src)]> { let isConvergent = 1; let usesCustomInserter = 1; } Index: test/CodeGen/AMDGPU/skip-if-dead.ll =================================================================== --- test/CodeGen/AMDGPU/skip-if-dead.ll +++ test/CodeGen/AMDGPU/skip-if-dead.ll @@ -18,6 +18,20 @@ ret void } +; FIXME: Ideally only one would be emitted +; CHECK-LABEL: {{^}}test_kill_depth_0_imm_neg_x2: +; CHECK-NEXT: ; BB#0: +; CHECK-NEXT: s_mov_b64 exec, 0 +; CHECK-NEXT: ; BB#1: +; CHECK-NEXT: s_mov_b64 exec, 0 +; CHECK-NEXT: ; BB#2: +; CHECK-NEXT: s_endpgm +define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 { + call void @llvm.AMDGPU.kill(float -0.0) + call void @llvm.AMDGPU.kill(float -1.0) + ret void +} + ; CHECK-LABEL: {{^}}test_kill_depth_var: ; CHECK-NEXT: ; BB#0: ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 @@ -28,6 +42,48 @@ ret void } +; FIXME: Ideally only one would be emitted +; CHECK-LABEL: {{^}}test_kill_depth_var_x2_same: +; CHECK-NEXT: ; BB#0: +; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 +; CHECK-NEXT: ; BB#1: +; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 +; CHECK-NEXT: ; BB#2: +; CHECK-NEXT: s_endpgm +define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 { + call void @llvm.AMDGPU.kill(float %x) + call void @llvm.AMDGPU.kill(float %x) + ret void +} + +; CHECK-LABEL: {{^}}test_kill_depth_var_x2: +; CHECK-NEXT: ; BB#0: +; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 +; CHECK-NEXT: ; BB#1: +; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v1 +; CHECK-NEXT: ; BB#2: +; CHECK-NEXT: s_endpgm +define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 { + call void @llvm.AMDGPU.kill(float %x) + call void @llvm.AMDGPU.kill(float %y) + ret void +} + +; CHECK-LABEL: {{^}}test_kill_depth_var_x2_instructions: +; CHECK-NEXT: ; BB#0: +; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 +; CHECK-NEXT: ; BB#1: +; CHECK: v_mov_b32_e64 v7, -1 +; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 +; CHECK-NEXT: ; BB#2: +; CHECK-NEXT: s_endpgm +define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 { + call void @llvm.AMDGPU.kill(float %x) + %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={VGPR7}"() + call void @llvm.AMDGPU.kill(float %y) + ret void +} + ; FIXME: why does the skip depend on the asm length in the same block? ; CHECK-LABEL: {{^}}test_kill_control_flow: