Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -747,6 +747,12 @@ [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; +// Return true if at least one thread within the pixel quad passes true into +// the function. +def int_amdgcn_wqm_vote : Intrinsic<[llvm_i1_ty], + [llvm_i1_ty], [IntrNoMem, IntrSpeculatable, IntrConvergent] +>; + // Copies the active channels of the source value to the destination value, // with the guarantee that the source value is computed as if the entire // program were executed in Whole Wavefront Mode, i.e. with all channels Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -1206,6 +1206,11 @@ >; def : Pat < + (i1 (int_amdgcn_wqm_vote i1:$src0)), + (S_WQM_B64 $src0) +>; + +def : Pat < (f32 (sint_to_fp i1:$src)), (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src) >; Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3532,6 +3532,14 @@ break; } + case Intrinsic::amdgcn_wqm_vote: { + const ConstantInt *C = dyn_cast(II->getArgOperand(0)); + if (!C) + break; + + // wqm_vote is identity when the argument is constant. + return replaceInstUsesWith(*II, II->getArgOperand(0)); + } case Intrinsic::stackrestore: { // If the save is right next to the restore, remove the restore. This can // happen when variable allocas are DCE'd. Index: test/CodeGen/AMDGPU/wqm.vote.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/wqm.vote.ll @@ -0,0 +1,79 @@ +; RUN: opt -S -mtriple=amdgcn-- -instcombine < %s | llc -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=CHECK %s + +;CHECK-LABEL: {{^}}ret: +;CHECK: v_cmp_eq_u32_e32 [[CMP:[^,]+]], v0, v1 +;CHECK: s_wqm_b64 [[WQM:[^,]+]], [[CMP]] +;CHECK: v_cndmask_b32_e64 v0, 0, 1.0, [[WQM]] +define amdgpu_ps float @ret(i32 %v0, i32 %v1) #1 { +main_body: + %c = icmp eq i32 %v0, %v1 + %w = call i1 @llvm.amdgcn.wqm.vote(i1 %c) + %r = select i1 %w, float 1.0, float 0.0 + ret float %r +} + +;CHECK-LABEL: {{^}}true: +;CHECK-NOT: s_wqm_b64 +;CHECK: v_mov_b32_e32 v0, 1.0 +define amdgpu_ps float @true() #1 { +main_body: + %w = call i1 @llvm.amdgcn.wqm.vote(i1 true) + %r = select i1 %w, float 1.0, float 0.0 + ret float %r +} + +;CHECK-LABEL: {{^}}false: +;CHECK-NOT: s_wqm_b64 +;CHECK: v_mov_b32_e32 v0, 0 +define amdgpu_ps float @false() #1 { +main_body: + %w = call i1 @llvm.amdgcn.wqm.vote(i1 false) + %r = select i1 %w, float 1.0, float 0.0 + ret float %r +} + +;CHECK-LABEL: {{^}}kill: +;CHECK: v_cmp_eq_u32_e32 [[CMP:[^,]+]], v0, v1 +;CHECK: s_wqm_b64 [[WQM:[^,]+]], [[CMP]] +;FIXME: This could just be: s_and_b64 exec, exec, [[WQM]] +;CHECK: v_cndmask_b32_e64 [[KILL:[^,]+]], -1.0, 1.0, [[WQM]] +;CHECK: v_cmpx_le_f32_e32 {{[^,]+}}, 0, [[KILL]] +;CHECK: s_endpgm +define amdgpu_ps void @kill(i32 %v0, i32 %v1) #1 { +main_body: + %c = icmp eq i32 %v0, %v1 + %w = call i1 @llvm.amdgcn.wqm.vote(i1 %c) + %r = select i1 %w, float 1.0, float -1.0 + call void @llvm.AMDGPU.kill(float %r) + ret void +} + +;CHECK-LABEL: {{^}}kill_true: +;CHECK-NEXT: ; BB# +;CHECK-NEXT: s_endpgm +define amdgpu_ps void @kill_true(i32 %v0, i32 %v1) #1 { +main_body: + %c = icmp eq i32 %v0, %v1 + %w = call i1 @llvm.amdgcn.wqm.vote(i1 true) + %r = select i1 %w, float 1.0, float -1.0 + call void @llvm.AMDGPU.kill(float %r) + ret void +} + +;CHECK-LABEL: {{^}}kill_false: +;CHECK: s_mov_b64 exec, 0 +;CHECK: s_endpgm +define amdgpu_ps void @kill_false(i32 %v0, i32 %v1) #1 { +main_body: + %c = icmp eq i32 %v0, %v1 + %w = call i1 @llvm.amdgcn.wqm.vote(i1 false) + %r = select i1 %w, float 1.0, float -1.0 + call void @llvm.AMDGPU.kill(float %r) + ret void +} + +declare void @llvm.AMDGPU.kill(float) #1 +declare i1 @llvm.amdgcn.wqm.vote(i1) #0 + +attributes #0 = { nounwind readnone speculatable convergent } +attributes #1 = { nounwind }