Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -310,6 +310,8 @@ setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); setOperationAction(ISD::FMINNUM, MVT::f16, Legal); setOperationAction(ISD::FDIV, MVT::f16, Custom); + if (!Subtarget->hasBFI()) + setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); // F16 - VOP3 Actions. setOperationAction(ISD::FMA, MVT::f16, Legal); Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -678,6 +678,11 @@ >; def : Pat < + (fcopysign f16:$src0, f16:$src1), + (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1) +>; + +def : Pat < (fneg f16:$src), (V_XOR_B32_e32 $src, (V_MOV_B32_e32 (i32 0x00008000))) >; Index: test/CodeGen/AMDGPU/fcopysign.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fcopysign.f16.ll +++ test/CodeGen/AMDGPU/fcopysign.f16.ll @@ -0,0 +1,50 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s + +declare half @llvm.copysign.f16(half, half) nounwind readnone +declare <2 x half> @llvm.copysign.v2f16(<2 x half>, <2 x half>) nounwind readnone +declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>) nounwind readnone + +; FUNC-LABEL: {{^}}test_copysign_f16: +; GCN: buffer_load_ushort v[[VMAG:[0-9]+]] +; GCN: buffer_load_ushort v[[VSIGN:[0-9]+]] +; SI: s_brev_b32 s[[SCONST:[0-9]+]], -2 +; SI: v_cvt_f32_f16_e32 v[[VMAG_F32:[0-9]+]], v[[VMAG]] +; SI: v_cvt_f32_f16_e32 v[[VSIGN_F32:[0-9]+]], v[[VSIGN]] +; SI: v_bfi_b32 v[[VDST_F32:[0-9]+]], s[[SCONST]], v[[VMAG_F32]], v[[VSIGN_F32]] +; SI: v_cvt_f16_f32_e32 v[[VDST:[0-9]+]], v[[VDST_F32]] +; VI: s_movk_i32 s[[SCONST:[0-9]+]], 0x7fff +; VI: v_bfi_b32 v[[VDST:[0-9]+]], s[[SCONST]], v[[VMAG]], v[[VSIGN]] +; GCN: buffer_store_short v[[VDST]] +; GCN: s_endpgm +define void @test_copysign_f16(half addrspace(1)* %mag_ptr, + half addrspace(1)* %sign_ptr, + half addrspace(1)* %out) nounwind { + %mag = load half, half addrspace(1)* %mag_ptr + %sign = load half, half addrspace(1)* %sign_ptr + %result = call half @llvm.copysign.f16(half %mag, half %sign) + store half %result, half addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}test_copysign_v2f16: +; GCN: v_bfi_b32 +; GCN: v_bfi_b32 +; GCN: s_endpgm +define void @test_copysign_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %mag, <2 x half> %sign) nounwind { + %result = call <2 x half> @llvm.copysign.v2f16(<2 x half> %mag, <2 x half> %sign) + store <2 x half> %result, <2 x half> addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}test_copysign_v4f16: +; GCN: v_bfi_b32 +; GCN: v_bfi_b32 +; GCN: v_bfi_b32 +; GCN: v_bfi_b32 +; GCN: s_endpgm +define void @test_copysign_v4f16(<4 x half> addrspace(1)* %out, <4 x half> %mag, <4 x half> %sign) nounwind { + %result = call <4 x half> @llvm.copysign.v4f16(<4 x half> %mag, <4 x half> %sign) + store <4 x half> %result, <4 x half> addrspace(1)* %out, align 16 + ret void +}