diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1795,6 +1795,8 @@ (i1 (Inst 0, (kone_type KOne), $src0_modifiers, $src0, DSTCLAMP.NONE)) >; +def : FPToI1Pat; +def : FPToI1Pat; def : FPToI1Pat; def : FPToI1Pat; def : FPToI1Pat; diff --git a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll --- a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll @@ -132,3 +132,16 @@ store <2 x i64> %r.val, <2 x i64> addrspace(1)* %r ret void } + +; GCN-LABEL: {{^}}fptosi_f16_to_i1: +; SI: v_cvt_f32_f16_e32 v{{[0-9+]}}, s{{[0-9]+}} +; SI: v_cmp_eq_f32_e32 vcc, -1.0, v{{[0-9]+}} +; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc +; VI: v_cmp_eq_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, 0xbc00, s{{[0-9]+}} +; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s[0:1] +define amdgpu_kernel void @fptosi_f16_to_i1(i1 addrspace(1)* %out, half %in) { +entry: + %conv = fptosi half %in to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll b/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll --- a/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll @@ -130,3 +130,16 @@ store <2 x i64> %r.val, <2 x i64> addrspace(1)* %r ret void } + +; GCN-LABEL: {{^}}fptoui_f16_to_i1: +; SI: v_cvt_f32_f16_e32 v{{[0-9+]}}, s{{[0-9]+}} +; SI: v_cmp_eq_f32_e32 vcc, 1.0, v{{[0-9]+}} +; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc +; VI: v_cmp_eq_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{[0-9]+}} +; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s[0:1] +define amdgpu_kernel void @fptoui_f16_to_i1(i1 addrspace(1)* %out, half %in) { +entry: + %conv = fptoui half %in to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +}