Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -764,7 +764,6 @@ include "SISchedule.td" include "GCNProcessors.td" include "AMDGPUInstrInfo.td" -include "AMDGPUIntrinsics.td" include "SIIntrinsics.td" include "AMDGPURegisterInfo.td" include "AMDGPURegisterBanks.td" Index: lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h =================================================================== --- lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h +++ lib/Target/AMDGPU/AMDGPUIntrinsicInfo.h @@ -20,7 +20,7 @@ namespace llvm { class TargetMachine; -namespace AMDGPUIntrinsic { +namespace SIIntrinsic { enum ID { last_non_AMDGPU_intrinsic = Intrinsic::num_intrinsics - 1, #define GET_INTRINSIC_ENUM_VALUES Index: lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp +++ lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp @@ -40,7 +40,7 @@ if (IntrID < Intrinsic::num_intrinsics) return StringRef(); - assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics && + assert(IntrID < SIIntrinsic::num_AMDGPU_intrinsics && "Invalid intrinsic ID"); return IntrinsicNameTable[IntrID - Intrinsic::num_intrinsics]; @@ -91,7 +91,7 @@ = cast(M->getOrInsertFunction(getName(IntrID, Tys), FTy)); AttributeList AS = - getAttributes(M->getContext(), static_cast(IntrID)); + getAttributes(M->getContext(), static_cast(IntrID)); F->setAttributes(AS); return F; } Index: lib/Target/AMDGPU/AMDGPUIntrinsics.td =================================================================== --- lib/Target/AMDGPU/AMDGPUIntrinsics.td +++ /dev/null @@ -1,16 +0,0 @@ -//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines intrinsics that are used by all hw codegen targets. -// -//===----------------------------------------------------------------------===// - -let TargetPrefix = "AMDGPU", isTarget = 1 in { - def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>; -} Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -5065,7 +5065,7 @@ return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32, SDLoc(DAG.getEntryNode()), MFI->getArgInfo().WorkItemIDZ); - case AMDGPUIntrinsic::SI_load_const: { + case SIIntrinsic::SI_load_const: { SDValue Load = lowerSBuffer(MVT::i32, DL, Op.getOperand(1), Op.getOperand(2), DAG.getTargetConstant(0, DL, MVT::i1), DAG); @@ -5808,19 +5808,6 @@ return DAG.getNode(AMDGPUISD::INIT_EXEC_FROM_INPUT, DL, MVT::Other, Chain, Op.getOperand(2), Op.getOperand(3)); } - case AMDGPUIntrinsic::AMDGPU_kill: { - SDValue Src = Op.getOperand(2); - if (const ConstantFPSDNode *K = dyn_cast(Src)) { - if (!K->isNegative()) - return Chain; - - SDValue NegOne = DAG.getTargetConstant(FloatToBits(-1.0f), DL, MVT::i32); - return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, NegOne); - } - - SDValue Cast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Src); - return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, Cast); - } case Intrinsic::amdgcn_s_barrier: { if (getTargetMachine().getOptLevel() > CodeGenOpt::None) { const GCNSubtarget &ST = MF.getSubtarget(); @@ -5831,7 +5818,7 @@ } return SDValue(); }; - case AMDGPUIntrinsic::SI_tbuffer_store: { + case SIIntrinsic::SI_tbuffer_store: { // Extract vindex and voffset from vaddr as appropriate const ConstantSDNode *OffEn = cast(Op.getOperand(10)); Index: test/CodeGen/AMDGPU/default-fp-mode.ll =================================================================== --- test/CodeGen/AMDGPU/default-fp-mode.ll +++ test/CodeGen/AMDGPU/default-fp-mode.ll @@ -86,12 +86,10 @@ ; GCN: IeeeMode: 0 define amdgpu_gs void @kill_gs_const() { main_body: - %0 = icmp ule i32 0, 3 - %1 = select i1 %0, float 1.000000e+00, float -1.000000e+00 - call void @llvm.AMDGPU.kill(float %1) - %2 = icmp ule i32 3, 0 - %3 = select i1 %2, float 1.000000e+00, float -1.000000e+00 - call void @llvm.AMDGPU.kill(float %3) + %cmp0 = icmp ule i32 0, 3 + call void @llvm.amdgcn.kill(i1 %cmp0) + %cmp1 = icmp ule i32 3, 0 + call void @llvm.amdgcn.kill(i1 %cmp1) ret void } @@ -100,12 +98,12 @@ define amdgpu_ps float @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(4)* byval, [17 x <16 x i8>] addrspace(4)* byval, [17 x <4 x i32>] addrspace(4)* byval, [34 x <8 x i32>] addrspace(4)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) { entry: %tmp0 = fcmp olt float %13, 0.0 - call void @llvm.AMDGPU.kill(float %14) + call void @llvm.amdgcn.kill(i1 %tmp0) %tmp1 = select i1 %tmp0, float 1.0, float 0.0 ret float %tmp1 } -declare void @llvm.AMDGPU.kill(float) +declare void @llvm.amdgcn.kill(i1) attributes #0 = { nounwind "target-cpu"="tahiti" } attributes #1 = { nounwind "target-cpu"="fiji" } Index: test/CodeGen/AMDGPU/llvm.AMDGPU.kill.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.AMDGPU.kill.ll +++ /dev/null @@ -1,35 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s - -; SI-LABEL: {{^}}kill_gs_const: -; SI-NOT: v_cmpx_le_f32 -; SI: s_mov_b64 exec, 0 -define amdgpu_gs void @kill_gs_const() { -main_body: - %tmp = icmp ule i32 0, 3 - %tmp1 = select i1 %tmp, float 1.000000e+00, float -1.000000e+00 - call void @llvm.AMDGPU.kill(float %tmp1) - %tmp2 = icmp ule i32 3, 0 - %tmp3 = select i1 %tmp2, float 1.000000e+00, float -1.000000e+00 - call void @llvm.AMDGPU.kill(float %tmp3) - ret void -} - -; SI-LABEL: {{^}}kill_vcc_implicit_def: -; SI-NOT: v_cmp_gt_f32_e32 vcc, -; SI: v_cmp_gt_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], 0, v{{[0-9]+}} -; SI: v_cmpx_le_f32_e32 vcc, 0, v{{[0-9]+}} -; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1.0, [[CMP]] -define amdgpu_ps void @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(4)* byval %arg, [17 x <16 x i8>] addrspace(4)* byval %arg1, [17 x <4 x i32>] addrspace(4)* byval %arg2, [34 x <8 x i32>] addrspace(4)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) { -entry: - %tmp0 = fcmp olt float %arg13, 0.000000e+00 - call void @llvm.AMDGPU.kill(float %arg14) - %tmp1 = select i1 %tmp0, float 1.000000e+00, float 0.000000e+00 - call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0 - ret void -} - -declare void @llvm.AMDGPU.kill(float) #0 -declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 - -attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll @@ -33,20 +33,17 @@ ;CHECK-LABEL: {{^}}kill: ;CHECK: v_cmp_eq_u32_e32 [[CMP:[^,]+]], v0, v1 ;CHECK: s_wqm_b64 [[WQM:[^,]+]], [[CMP]] -;FIXME: This could just be: s_and_b64 exec, exec, [[WQM]] -;CHECK: v_cndmask_b32_e64 [[KILL:[^,]+]], -1.0, 1.0, [[WQM]] -;CHECK: v_cmpx_le_f32_e32 {{[^,]+}}, 0, [[KILL]] +;CHECK: s_and_b64 exec, exec, [[WQM]] ;CHECK: s_endpgm define amdgpu_ps void @kill(i32 %v0, i32 %v1) #1 { main_body: %c = icmp eq i32 %v0, %v1 %w = call i1 @llvm.amdgcn.wqm.vote(i1 %c) - %r = select i1 %w, float 1.0, float -1.0 - call void @llvm.AMDGPU.kill(float %r) + call void @llvm.amdgcn.kill(i1 %w) ret void } -declare void @llvm.AMDGPU.kill(float) #1 +declare void @llvm.amdgcn.kill(i1) #1 declare i1 @llvm.amdgcn.wqm.vote(i1) attributes #1 = { nounwind } Index: test/CodeGen/AMDGPU/skip-if-dead.ll =================================================================== --- test/CodeGen/AMDGPU/skip-if-dead.ll +++ test/CodeGen/AMDGPU/skip-if-dead.ll @@ -2,9 +2,10 @@ ; CHECK-LABEL: {{^}}test_kill_depth_0_imm_pos: ; CHECK-NEXT: ; %bb.0: +; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: s_endpgm define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 { - call void @llvm.AMDGPU.kill(float 0.0) + call void @llvm.amdgcn.kill(i1 true) ret void } @@ -14,7 +15,7 @@ ; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: s_endpgm define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 { - call void @llvm.AMDGPU.kill(float -0.0) + call void @llvm.amdgcn.kill(i1 false) ret void } @@ -27,58 +28,62 @@ ; CHECK-NEXT: ; %bb.2: ; CHECK-NEXT: s_endpgm define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 { - call void @llvm.AMDGPU.kill(float -0.0) - call void @llvm.AMDGPU.kill(float -1.0) + call void @llvm.amdgcn.kill(i1 false) + call void @llvm.amdgcn.kill(i1 false) ret void } ; CHECK-LABEL: {{^}}test_kill_depth_var: ; CHECK-NEXT: ; %bb.0: -; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 +; CHECK-NEXT: v_cmpx_gt_f32_e32 vcc, 0, v0 ; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: s_endpgm define amdgpu_ps void @test_kill_depth_var(float %x) #0 { - call void @llvm.AMDGPU.kill(float %x) + %cmp = fcmp olt float %x, 0.0 + call void @llvm.amdgcn.kill(i1 %cmp) ret void } ; FIXME: Ideally only one would be emitted ; CHECK-LABEL: {{^}}test_kill_depth_var_x2_same: ; CHECK-NEXT: ; %bb.0: -; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 +; CHECK-NEXT: v_cmpx_gt_f32_e32 vcc, 0, v0 ; CHECK-NEXT: ; %bb.1: -; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 +; CHECK-NEXT: v_cmpx_gt_f32_e32 vcc, 0, v0 ; CHECK-NEXT: ; %bb.2: ; CHECK-NEXT: s_endpgm define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 { - call void @llvm.AMDGPU.kill(float %x) - call void @llvm.AMDGPU.kill(float %x) + %cmp = fcmp olt float %x, 0.0 + call void @llvm.amdgcn.kill(i1 %cmp) + call void @llvm.amdgcn.kill(i1 %cmp) ret void } ; CHECK-LABEL: {{^}}test_kill_depth_var_x2: ; CHECK-NEXT: ; %bb.0: -; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 +; CHECK-NEXT: v_cmpx_gt_f32_e32 vcc, 0, v0 ; CHECK-NEXT: ; %bb.1: -; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cmpx_gt_f32_e32 vcc, 0, v1 ; CHECK-NEXT: ; %bb.2: ; CHECK-NEXT: s_endpgm define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 { - call void @llvm.AMDGPU.kill(float %x) - call void @llvm.AMDGPU.kill(float %y) + %cmp.x = fcmp olt float %x, 0.0 + call void @llvm.amdgcn.kill(i1 %cmp.x) + %cmp.y = fcmp olt float %y, 0.0 + call void @llvm.amdgcn.kill(i1 %cmp.y) ret void } ; CHECK-LABEL: {{^}}test_kill_depth_var_x2_instructions: ; CHECK-NEXT: ; %bb.0: -; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 +; CHECK-NEXT: v_cmpx_gt_f32_e32 vcc, 0, v0 ; CHECK-NEXT: s_cbranch_execnz BB6_2 ; CHECK-NEXT: ; %bb.1: ; CHECK-NEXT: exp ; CHECK-NEXT: s_endpgm ; CHECK-NEXT: BB6_2: ; CHECK: v_mov_b32_e64 v7, -1 -; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 +; CHECK: v_cmpx_gt_f32_e32 vcc, 0, v7 ; CHECK-NEXT: s_cbranch_execnz BB6_4 ; CHECK-NEXT: ; %bb.3: ; CHECK-NEXT: exp @@ -86,9 +91,11 @@ ; CHECK-NEXT: BB6_4: ; CHECK-NEXT: s_endpgm define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 { - call void @llvm.AMDGPU.kill(float %x) + %cmp.x = fcmp olt float %x, 0.0 + call void @llvm.amdgcn.kill(i1 %cmp.x) %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={v7}"() - call void @llvm.AMDGPU.kill(float %y) + %cmp.y = fcmp olt float %y, 0.0 + call void @llvm.amdgcn.kill(i1 %cmp.y) ret void } @@ -111,7 +118,7 @@ ; CHECK: v_nop_e64 ; CHECK: v_nop_e64 -; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 +; CHECK: v_cmpx_gt_f32_e32 vcc, 0, v7 ; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]] ; CHECK-NEXT: ; %bb.2: ; CHECK-NEXT: exp null off, off, off, off done vm @@ -137,7 +144,8 @@ v_nop_e64 v_nop_e64 v_nop_e64", "={v7}"() - call void @llvm.AMDGPU.kill(float %var) + %cmp.var = fcmp olt float %var, 0.0 + call void @llvm.amdgcn.kill(i1 %cmp.var) br label %exit exit: @@ -162,7 +170,7 @@ ; CHECK: ;;#ASMEND ; CHECK: v_mov_b32_e64 v8, -1 ; CHECK: ;;#ASMEND -; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 +; CHECK: v_cmpx_gt_f32_e32 vcc, 0, v7 ; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]] ; CHECK-NEXT: ; %bb.2: @@ -196,7 +204,8 @@ v_nop_e64 v_nop_e64", "={v7}"() %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={v8}"() - call void @llvm.AMDGPU.kill(float %var) + %cmp.var = fcmp olt float %var, 0.0 + call void @llvm.amdgcn.kill(i1 %cmp.var) store volatile float %live.across, float addrspace(1)* undef %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={v9}"() br label %exit @@ -221,7 +230,7 @@ ; CHECK: v_mov_b32_e64 v7, -1 ; CHECK: v_nop_e64 -; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 +; CHECK: v_cmpx_gt_f32_e32 vcc, 0, v7 ; CHECK-NEXT: ; %bb.3: ; CHECK: buffer_load_dword [[LOAD:v[0-9]+]] @@ -251,7 +260,8 @@ v_nop_e64 v_nop_e64 v_nop_e64", "={v7}"() - call void @llvm.AMDGPU.kill(float %var) + %cmp.var = fcmp olt float %var, 0.0 + call void @llvm.amdgcn.kill(i1 %cmp.var) %vgpr = load volatile i32, i32 addrspace(1)* undef %loop.cond = icmp eq i32 %vgpr, 0 br i1 %loop.cond, label %bb, label %exit @@ -264,7 +274,7 @@ ; bug 28550 ; CHECK-LABEL: {{^}}phi_use_def_before_kill: ; CHECK: v_cndmask_b32_e64 [[PHIREG:v[0-9]+]], 0, -1.0, -; CHECK: v_cmpx_le_f32_e32 vcc, 0, +; CHECK: v_cmpx_lt_f32_e32 vcc, 0, ; CHECK-NEXT: s_cbranch_execnz [[BB4:BB[0-9]+_[0-9]+]] ; CHECK: exp @@ -288,7 +298,8 @@ %tmp = fadd float %x, 1.000000e+00 %tmp1 = fcmp olt float 0.000000e+00, %tmp %tmp2 = select i1 %tmp1, float -1.000000e+00, float 0.000000e+00 - call void @llvm.AMDGPU.kill(float %tmp2) + %cmp.tmp2 = fcmp olt float %tmp2, 0.0 + call void @llvm.amdgcn.kill(i1 %cmp.tmp2) br i1 undef, label %phibb, label %bb8 phibb: @@ -335,7 +346,7 @@ unreachable bb6: ; preds = %bb - call void @llvm.AMDGPU.kill(float -1.000000e+00) + call void @llvm.amdgcn.kill(i1 false) unreachable bb7: ; preds = %bb4 @@ -348,7 +359,7 @@ ; CHECK: s_xor_b64 ; CHECK-NEXT: mask branch [[BB4:BB[0-9]+_[0-9]+]] -; CHECK: v_cmpx_le_f32_e32 vcc, 0, +; CHECK: v_cmpx_gt_f32_e32 vcc, 0, ; CHECK: [[BB4]]: ; CHECK: s_or_b64 exec, exec ; CHECK: image_sample_c @@ -369,7 +380,8 @@ br i1 %tmp, label %bb3, label %bb4 bb3: ; preds = %bb - call void @llvm.AMDGPU.kill(float %arg) + %cmp.arg = fcmp olt float %arg, 0.0 + call void @llvm.amdgcn.kill(i1 %cmp.arg) br label %bb4 bb4: ; preds = %bb3, %bb @@ -387,7 +399,7 @@ } declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 -declare void @llvm.AMDGPU.kill(float) #0 +declare void @llvm.amdgcn.kill(i1) #0 attributes #0 = { nounwind } attributes #1 = { nounwind readonly } Index: test/CodeGen/AMDGPU/wqm.ll =================================================================== --- test/CodeGen/AMDGPU/wqm.ll +++ test/CodeGen/AMDGPU/wqm.ll @@ -586,7 +586,8 @@ %data.0 = extractelement <2 x float> %data, i32 0 call void @llvm.amdgcn.buffer.store.f32(float %data.0, <4 x i32> undef, i32 %idx.0, i32 0, i1 0, i1 0) - call void @llvm.AMDGPU.kill(float %z) + %z.cmp = fcmp olt float %z, 0.0 + call void @llvm.amdgcn.kill(i1 %z.cmp) %idx.1 = extractelement <2 x i32> %idx, i32 1 %data.1 = extractelement <2 x float> %data, i32 1 @@ -619,7 +620,8 @@ call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0) - call void @llvm.AMDGPU.kill(float %z) + %z.cmp = fcmp olt float %z, 0.0 + call void @llvm.amdgcn.kill(i1 %z.cmp) ret <4 x float> %dtex } @@ -826,7 +828,7 @@ declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #3 declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3 declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3 -declare void @llvm.AMDGPU.kill(float) #1 +declare void @llvm.amdgcn.kill(i1) #1 declare float @llvm.amdgcn.wqm.f32(float) #3 declare i32 @llvm.amdgcn.wqm.i32(i32) #3 declare float @llvm.amdgcn.wwm.f32(float) #3