diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1795,6 +1795,8 @@ (i1 (Inst 0, (kone_type KOne), $src0_modifiers, $src0, DSTCLAMP.NONE)) >; +def : FPToI1Pat; +def : FPToI1Pat; def : FPToI1Pat; def : FPToI1Pat; def : FPToI1Pat; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir @@ -130,3 +130,75 @@ %3:vgpr(s32) = G_FPTOSI %2 $vgpr0 = COPY %3 ... + +--- +name: fptosi_s16_to_s1_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: fptosi_s16_to_s1_vv + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GCN: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec + ; GCN: S_ENDPGM 0, implicit %2 + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s16) = G_TRUNC %0 + %2:vgpr(s32) = G_FPTOSI %1 + %3:vgpr(s1) = G_TRUNC %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: fptosi_s16_to_s1_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: fptosi_s16_to_s1_vs + ; GCN: liveins: $sgpr0 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GCN: %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %4, implicit $mode, implicit $exec + ; GCN: S_ENDPGM 0, implicit %2 + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s16) = G_TRUNC %0 + %2:vgpr(s32) = G_FPTOSI %1 + %3:vgpr(s1) = G_TRUNC %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: fptosi_s16_to_s1_fneg_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: fptosi_s16_to_s1_fneg_vv + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GCN: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec + ; GCN: %3:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec + ; GCN: S_ENDPGM 0, implicit %3 + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s16) = G_TRUNC %0 + %2:vgpr(s16) = G_FNEG %1 + %3:vgpr(s32) = G_FPTOSI %2 + %4:vgpr(s1) = G_TRUNC %3 + S_ENDPGM 0, implicit %4 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir @@ -103,3 +103,75 @@ %3:vgpr(s32) = G_FPTOUI %2 $vgpr0 = COPY %3 ... + +--- +name: fptoui_s16_to_s1_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: fptoui_s16_to_s1_vv + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GCN: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec + ; GCN: S_ENDPGM 0, implicit %2 + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s16) = G_TRUNC %0 + %2:vgpr(s32) = G_FPTOUI %1 + %3:vgpr(s1) = G_TRUNC %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: fptoui_s16_to_s1_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; GCN-LABEL: name: fptoui_s16_to_s1_vs + ; GCN: liveins: $sgpr0 + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: %4:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[COPY]], implicit $mode, implicit $exec + ; GCN: %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %4, implicit $mode, implicit $exec + ; GCN: S_ENDPGM 0, implicit %2 + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s16) = G_TRUNC %0 + %2:vgpr(s32) = G_FPTOUI %1 + %3:vgpr(s1) = G_TRUNC %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: fptoui_s16_to_s1_fneg_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: fptoui_s16_to_s1_fneg_vv + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GCN: %5:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 [[V_XOR_B32_e32_]], implicit $mode, implicit $exec + ; GCN: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 %5, implicit $mode, implicit $exec + ; GCN: S_ENDPGM 0, implicit %3 + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s16) = G_TRUNC %0 + %2:vgpr(s16) = G_FNEG %1 + %3:vgpr(s32) = G_FPTOUI %2 + %4:vgpr(s1) = G_TRUNC %3 + S_ENDPGM 0, implicit %4 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir @@ -644,3 +644,27 @@ %1:_(<2 x s64>) = G_FPTOSI %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... + +--- +name: test_fptosi_s16_to_s1 +body: | + bb.0: + liveins: $vgpr0 + ; SI-LABEL: name: test_fptosi_s16_to_s1 + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FPEXT]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32) + ; SI: S_ENDPGM 0, implicit [[TRUNC1]](s1) + ; VI-LABEL: name: test_fptosi_s16_to_s1 + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC]](s16) + ; VI: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32) + ; VI: S_ENDPGM 0, implicit [[TRUNC1]](s1) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s1) = G_FPTOSI %1 + S_ENDPGM 0, implicit %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir @@ -814,3 +814,27 @@ %1:_(<2 x s64>) = G_FPTOUI %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 ... + +--- +name: test_fptoui_s16_to_s1 +body: | + bb.0: + liveins: $vgpr0 + ; SI-LABEL: name: test_fptoui_s16_to_s1 + ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[FPEXT]](s32) + ; SI: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32) + ; SI: S_ENDPGM 0, implicit [[TRUNC1]](s1) + ; VI-LABEL: name: test_fptoui_s16_to_s1 + ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[TRUNC]](s16) + ; VI: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32) + ; VI: S_ENDPGM 0, implicit [[TRUNC1]](s1) + %0:_(s32) = COPY $vgpr0 + %1:_(s16) = G_TRUNC %0 + %2:_(s1) = G_FPTOSI %1 + S_ENDPGM 0, implicit %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll --- a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll @@ -132,3 +132,16 @@ store <2 x i64> %r.val, <2 x i64> addrspace(1)* %r ret void } + +; GCN-LABEL: {{^}}fptosi_f16_to_i1: +; SI: v_cvt_f32_f16_e32 v{{[0-9+]}}, s{{[0-9]+}} +; SI: v_cmp_eq_f32_e32 vcc, -1.0, v{{[0-9]+}} +; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc +; VI: v_cmp_eq_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, 0xbc00, s{{[0-9]+}} +; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s[0:1] +define amdgpu_kernel void @fptosi_f16_to_i1(i1 addrspace(1)* %out, half %in) { +entry: + %conv = fptosi half %in to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll b/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll --- a/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fptoui.f16.ll @@ -130,3 +130,16 @@ store <2 x i64> %r.val, <2 x i64> addrspace(1)* %r ret void } + +; GCN-LABEL: {{^}}fptoui_f16_to_i1: +; SI: v_cvt_f32_f16_e32 v{{[0-9+]}}, s{{[0-9]+}} +; SI: v_cmp_eq_f32_e32 vcc, 1.0, v{{[0-9]+}} +; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc +; VI: v_cmp_eq_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{[0-9]+}} +; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, s[0:1] +define amdgpu_kernel void @fptoui_f16_to_i1(i1 addrspace(1)* %out, half %in) { +entry: + %conv = fptoui half %in to i1 + store i1 %conv, i1 addrspace(1)* %out + ret void +}