diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -145,9 +145,7 @@ [(set i64:$sdst, (not i64:$src0))] >; def S_WQM_B32 : SOP1_32 <"s_wqm_b32">; - def S_WQM_B64 : SOP1_64 <"s_wqm_b64", - [(set i1:$sdst, (int_amdgcn_wqm_vote i1:$src0))] - >; + def S_WQM_B64 : SOP1_64 <"s_wqm_b64">; } // End Defs = [SCC] @@ -1033,6 +1031,15 @@ (S_GETREG_B32 (as_i16imm $simm16)) >; +//===----------------------------------------------------------------------===// +// WQM Intrinsic Pattern. +//===----------------------------------------------------------------------===// +def : GCNPat< + (i1 (int_amdgcn_wqm_vote i1:$src0)), + (S_WQM_B64 (S_AND_B64 $src0, (i64 EXEC))) +>; + + //===----------------------------------------------------------------------===// // SOP1 Patterns //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll @@ -225,7 +225,8 @@ ; SI-LABEL: {{^}}wqm: ; SI: v_cmp_neq_f32_e32 vcc, 0 -; SI: s_wqm_b64 s[0:1], vcc +; SI: s_and_b64 s[0:1], vcc, exec +; SI: s_wqm_b64 s[0:1], s[0:1] ; SI: s_and_b64 exec, exec, s[0:1] define amdgpu_ps void @wqm(float %a) { %c1 = fcmp une float %a, 0.0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll @@ -2,7 +2,8 @@ ;CHECK-LABEL: {{^}}ret: ;CHECK: v_cmp_eq_u32_e32 [[CMP:[^,]+]], v0, v1 -;CHECK: s_wqm_b64 [[WQM:[^,]+]], [[CMP]] +;CHECK: s_and_b64 [[AND:[^,]+]], [[CMP]], exec +;CHECK: s_wqm_b64 [[WQM:[^,]+]], [[AND]] ;CHECK: v_cndmask_b32_e64 v0, 0, 1.0, [[WQM]] define amdgpu_ps float @ret(i32 %v0, i32 %v1) #1 { main_body: @@ -32,7 +33,8 @@ ;CHECK-LABEL: {{^}}kill: ;CHECK: v_cmp_eq_u32_e32 [[CMP:[^,]+]], v0, v1 -;CHECK: s_wqm_b64 [[WQM:[^,]+]], [[CMP]] +;CHECK: s_and_b64 [[AND:[^,]+]], [[CMP]], exec +;CHECK: s_wqm_b64 [[WQM:[^,]+]], [[AND]] ;CHECK: s_and_b64 exec, exec, [[WQM]] ;CHECK: s_endpgm define amdgpu_ps void @kill(i32 %v0, i32 %v1) #1 { @@ -43,6 +45,32 @@ ret void } +;CHECK-LABEL: {{^}}phi_not: +;CHECK: s_xor_b64 [[NOT:[^,]+]], {{[^,]+}}, -1 +;CHECK-NEXT: s_and_b64 [[AND:[^,]+]], [[NOT]], exec +;CHECK-NEXT: s_wqm_b64 [[WQM:[^,]+]], [[AND]] +;CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, [[WQM]] +define amdgpu_ps float @phi_not(i32 %v0, i32 %v1, i32 %v2) { + %cc = icmp eq i32 %v0, 0 + + br i1 %cc, label %if, label %else + + if: + %tmp1 = icmp ne i32 %v1, 1 + br label %endif + + else: + %tmp2 = icmp eq i32 %v2, 2 + br label %endif + + endif: + %sel = phi i1 [ %tmp1, %if ], [ %tmp2, %else ] + %not = xor i1 %sel, true + %w = call i1 @llvm.amdgcn.wqm.vote(i1 %not) + %r = select i1 %w, float 1.0, float 0.0 + ret float %r +} + declare void @llvm.amdgcn.kill(i1) #1 declare i1 @llvm.amdgcn.wqm.vote(i1)