Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -9676,7 +9676,8 @@ SDValue X = LHS.getOperand(0); SDValue Y = RHS.getOperand(0); - if (Y.getOpcode() != ISD::FABS || Y.getOperand(0) != X) + if (Y.getOpcode() != ISD::FABS || Y.getOperand(0) != X || + !isTypeLegal(X.getValueType())) return SDValue(); if (LCC == ISD::SETO) { @@ -11438,8 +11439,8 @@ } } - if (VT != MVT::f32 && VT != MVT::f64 && (Subtarget->has16BitInsts() && - VT != MVT::f16)) + if (VT != MVT::f32 && VT != MVT::f64 && + (!Subtarget->has16BitInsts() || VT != MVT::f16)) return SDValue(); // Match isinf/isfinite pattern Index: llvm/test/CodeGen/AMDGPU/fp-classify.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fp-classify.ll +++ llvm/test/CodeGen/AMDGPU/fp-classify.ll @@ -180,5 +180,68 @@ ret void } +; GCN-LABEL: {{^}}test_isinf_pattern_f16: +; SI-DAG: s_mov_b32 [[INF:s[0-9]+]], 0x7f800000 +; SI-DAG: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], |s{{[0-9]+}}| +; SI: v_cmp_eq_f32_e32 vcc, [[INF]], [[CVT]] +; SI-NEXT: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc + +; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x204{{$}} +; VI: v_cmp_class_f16_e32 vcc, s{{[0-9]+}}, [[MASK]] +; VI-NOT: v_cmp + +; GCN: s_endpgm +define amdgpu_kernel void @test_isinf_pattern_f16(i32 addrspace(1)* nocapture %out, half %x) #0 { + %fabs = tail call half @llvm.fabs.f16(half %x) #1 + %cmp = fcmp oeq half %fabs, 0xH7C00 + %ext = zext i1 %cmp to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}test_isfinite_pattern_0_f16: +; SI-DAG: s_movk_i32 [[MASK:s[0-9]+]], 0x1f8 +; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}} +; SI: v_cmp_class_f32_e64 [[CLASS:s\[[0-9]+:[0-9]+\]]], [[CVT]], [[MASK]] +; SI-NEXT: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[CLASS]] + +; VI-NOT: v_cmp +; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1f8{{$}} +; VI: v_cmp_class_f16_e32 vcc, s{{[0-9]+}}, [[MASK]] +; VI-NOT: v_cmp + +; GCN: s_endpgm +define amdgpu_kernel void @test_isfinite_pattern_0_f16(i32 addrspace(1)* nocapture %out, half %x) #0 { + %ord = fcmp ord half %x, 0.0 + %x.fabs = tail call half @llvm.fabs.f16(half %x) #1 + %ninf = fcmp une half %x.fabs, 0xH7C00 + %and = and i1 %ord, %ninf + %ext = zext i1 %and to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: {{^}}test_isfinite_pattern_4_f16: +; SI-DAG: s_movk_i32 [[MASK:s[0-9]+]], 0x1f8 +; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], s{{[0-9]+}} +; SI: v_cmp_class_f32_e64 [[CLASS:s\[[0-9]+:[0-9]+\]]], [[CVT]], [[MASK]] +; SI-NEXT: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[CLASS]] + +; VI-DAG: s_load_dword [[X:s[0-9]+]] +; VI-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1f8 +; VI: v_cmp_class_f16_e32 vcc, [[X]], [[MASK]] +; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc +define amdgpu_kernel void @test_isfinite_pattern_4_f16(i32 addrspace(1)* nocapture %out, half %x) #0 { + %ord = fcmp ord half %x, 0.0 + %x.fabs = tail call half @llvm.fabs.f16(half %x) #1 + %ninf = fcmp one half %x.fabs, 0xH7C00 + %and = and i1 %ord, %ninf + %ext = zext i1 %and to i32 + store i32 %ext, i32 addrspace(1)* %out, align 4 + ret void +} + +declare half @llvm.fabs.f16(half) #1 + attributes #0 = { nounwind } attributes #1 = { nounwind readnone }