Index: lib/Target/R600/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/R600/AMDGPUISelLowering.cpp +++ lib/Target/R600/AMDGPUISelLowering.cpp @@ -217,6 +217,12 @@ setOperationAction(ISD::FREM, MVT::f64, Custom); setOperationAction(ISD::FRINT, MVT::f64, Custom); + if (!Subtarget->hasBFI()) { + // fcopysign can be done in a single instruction with BFI. + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + } + if (!Subtarget->hasBCNT(32)) setOperationAction(ISD::CTPOP, MVT::i32, Expand); @@ -279,6 +285,7 @@ setOperationAction(ISD::FNEG, VT, Expand); setOperationAction(ISD::SELECT, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); + setOperationAction(ISD::FCOPYSIGN, VT, Expand); } setTargetDAGCombine(ISD::MUL); Index: lib/Target/R600/AMDGPUInstructions.td =================================================================== --- lib/Target/R600/AMDGPUInstructions.td +++ lib/Target/R600/AMDGPUInstructions.td @@ -365,7 +365,7 @@ // BFI_INT patterns -multiclass BFIPatterns { +multiclass BFIPatterns { // Definition from ISA doc: // (y & x) | (z & ~x) @@ -381,6 +381,19 @@ (BFI_INT $x, $y, $z) >; + def : Pat < + (fcopysign f32:$src0, f32:$src1), + (BFI_INT (LoadImm32 0x7fffffff), $src0, $src1) + >; + + def : Pat < + (f64 (fcopysign f64:$src0, f64:$src1)), + (INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (i32 (EXTRACT_SUBREG $src0, sub0)), sub0), + (BFI_INT (LoadImm32 0x7fffffff), + (i32 (EXTRACT_SUBREG $src0, sub1)), + (i32 (EXTRACT_SUBREG $src1, sub1))), sub1) + >; } // SHA-256 Ma patterns Index: lib/Target/R600/AMDGPUSubtarget.h =================================================================== --- lib/Target/R600/AMDGPUSubtarget.h +++ lib/Target/R600/AMDGPUSubtarget.h @@ -72,6 +72,10 @@ return (getGeneration() >= EVERGREEN); } + bool hasBFI() const { + return (getGeneration() >= EVERGREEN); + } + bool hasBFM() const { return hasBFE(); } Index: lib/Target/R600/EvergreenInstructions.td =================================================================== --- lib/Target/R600/EvergreenInstructions.td +++ lib/Target/R600/EvergreenInstructions.td @@ -295,7 +295,7 @@ def : Pat<(i32 (sext_inreg i32:$src, i16)), (BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 16))>; -defm : BFIPatterns ; +defm : BFIPatterns ; def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT", [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))], Index: lib/Target/R600/SIInstructions.td =================================================================== --- lib/Target/R600/SIInstructions.td +++ lib/Target/R600/SIInstructions.td @@ -2140,7 +2140,7 @@ (V_MUL_HI_I32 $src0, $src1, (i32 0)) >; -defm : BFIPatterns ; +defm : BFIPatterns ; def : ROTRPattern ; /********** ======================= **********/ Index: test/CodeGen/R600/fcopysign.f32.ll =================================================================== --- /dev/null +++ test/CodeGen/R600/fcopysign.f32.ll @@ -0,0 +1,50 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + + +declare float @llvm.copysign.f32(float, float) nounwind readnone +declare <2 x float> @llvm.copysign.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) nounwind readnone + +; Try to identify arg based on higher address. +; FUNC-LABEL: @test_copysign_f32: +; SI: S_LOAD_DWORD [[SSIGN:s[0-9]+]], {{.*}} 0xc +; SI: V_MOV_B32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]] +; SI-DAG: S_LOAD_DWORD [[SMAG:s[0-9]+]], {{.*}} 0xb +; SI-DAG: V_MOV_B32_e32 [[VMAG:v[0-9]+]], [[SMAG]] +; SI-DAG: S_MOV_B32 [[SCONST:s[0-9]+]], 0x7fffffff +; SI: V_BFI_B32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]] +; SI: BUFFER_STORE_DWORD [[RESULT]], +; SI: S_ENDPGM + +; EG: BFI_INT +define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign) nounwind { + %result = call float @llvm.copysign.f32(float %mag, float %sign) + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @test_copysign_v2f32: +; SI: S_ENDPGM + +; EG: BFI_INT +; EG: BFI_INT +define void @test_copysign_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %mag, <2 x float> %sign) nounwind { + %result = call <2 x float> @llvm.copysign.v2f32(<2 x float> %mag, <2 x float> %sign) + store <2 x float> %result, <2 x float> addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @test_copysign_v4f32: +; SI: S_ENDPGM + +; EG: BFI_INT +; EG: BFI_INT +; EG: BFI_INT +; EG: BFI_INT +define void @test_copysign_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %mag, <4 x float> %sign) nounwind { + %result = call <4 x float> @llvm.copysign.v4f32(<4 x float> %mag, <4 x float> %sign) + store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16 + ret void +} + Index: test/CodeGen/R600/fcopysign.f64.ll =================================================================== --- /dev/null +++ test/CodeGen/R600/fcopysign.f64.ll @@ -0,0 +1,37 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare double @llvm.copysign.f64(double, double) nounwind readnone +declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) nounwind readnone +declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) nounwind readnone + +; FUNC-LABEL: @test_copysign_f64: +; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd +; SI: V_MOV_B32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]] +; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI-DAG: V_MOV_B32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]] +; SI-DAG: S_MOV_B32 [[SCONST:s[0-9]+]], 0x7fffffff +; SI: V_BFI_B32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]] +; SI: V_MOV_B32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]] +; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}} +; SI: S_ENDPGM +define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %sign) nounwind { + %result = call double @llvm.copysign.f64(double %mag, double %sign) + store double %result, double addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @test_copysign_v2f64: +; SI: S_ENDPGM +define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %mag, <2 x double> %sign) nounwind { + %result = call <2 x double> @llvm.copysign.v2f64(<2 x double> %mag, <2 x double> %sign) + store <2 x double> %result, <2 x double> addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @test_copysign_v4f64: +; SI: S_ENDPGM +define void @test_copysign_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %mag, <4 x double> %sign) nounwind { + %result = call <4 x double> @llvm.copysign.v4f64(<4 x double> %mag, <4 x double> %sign) + store <4 x double> %result, <4 x double> addrspace(1)* %out, align 8 + ret void +}