Index: lib/Target/R600/EvergreenInstructions.td =================================================================== --- lib/Target/R600/EvergreenInstructions.td +++ lib/Target/R600/EvergreenInstructions.td @@ -256,6 +256,11 @@ let Predicates = [isEGorCayman] in { +// def FMA_64 : R600_3OP < +// 0xA, "FMA_64", +// [(set f64:$dst, (fma f64:$src0, f64:$src1, f64:$src2))] +// >; + // BFE_UINT - bit_extract, an optimization for mask and shift // Src0 = Input // Src1 = Offset @@ -312,6 +317,7 @@ def : ROTRPattern ; def MULADD_eg : MULADD_Common<0x14>; def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>; +def FMA_eg : FMA_Common<0x7>; def ASHR_eg : ASHR_Common<0x15>; def LSHR_eg : LSHR_Common<0x16>; def LSHL_eg : LSHL_Common<0x17>; Index: lib/Target/R600/R600Instructions.td =================================================================== --- lib/Target/R600/R600Instructions.td +++ lib/Target/R600/R600Instructions.td @@ -915,6 +915,11 @@ [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))] >; +class FMA_Common inst> : R600_3OP < + inst, "FMA", + [(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))] +>; + class CNDE_Common inst> : R600_3OP < inst, "CNDE", [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OEQ))] Index: test/CodeGen/R600/fma.f64.ll =================================================================== --- /dev/null +++ test/CodeGen/R600/fma.f64.ll @@ -0,0 +1,46 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare double @llvm.fma.f64(double, double, double) nounwind readnone +declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone +declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone + + +; FUNC-LABEL: @fma_f64 +; SI: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} +define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1, + double addrspace(1)* %in2, double addrspace(1)* %in3) { + %r0 = load double addrspace(1)* %in1 + %r1 = load double addrspace(1)* %in2 + %r2 = load double addrspace(1)* %in3 + %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %r2) + store double %r3, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @fma_v2f64 +; SI: V_FMA_F64 +; SI: V_FMA_F64 +define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1, + <2 x double> addrspace(1)* %in2, <2 x double> addrspace(1)* %in3) { + %r0 = load <2 x double> addrspace(1)* %in1 + %r1 = load <2 x double> addrspace(1)* %in2 + %r2 = load <2 x double> addrspace(1)* %in3 + %r3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2) + store <2 x double> %r3, <2 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @fma_v4f64 +; SI: V_FMA_F64 +; SI: V_FMA_F64 +; SI: V_FMA_F64 +; SI: V_FMA_F64 +define void @fma_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1, + <4 x double> addrspace(1)* %in2, <4 x double> addrspace(1)* %in3) { + %r0 = load <4 x double> addrspace(1)* %in1 + %r1 = load <4 x double> addrspace(1)* %in2 + %r2 = load <4 x double> addrspace(1)* %in3 + %r3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %r0, <4 x double> %r1, <4 x double> %r2) + store <4 x double> %r3, <4 x double> addrspace(1)* %out + ret void +} Index: test/CodeGen/R600/fma.ll =================================================================== --- test/CodeGen/R600/fma.ll +++ test/CodeGen/R600/fma.ll @@ -1,13 +1,10 @@ ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare float @llvm.fma.f32(float, float, float) nounwind readnone declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone -declare double @llvm.fma.f64(double, double, double) nounwind readnone -declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone -declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone - ; FUNC-LABEL: @fma_f32 ; SI: V_FMA_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}} define void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1, @@ -47,43 +44,3 @@ store <4 x float> %r3, <4 x float> addrspace(1)* %out ret void } - -; FUNC-LABEL: @fma_f64 -; SI: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} -define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1, - double addrspace(1)* %in2, double addrspace(1)* %in3) { - %r0 = load double addrspace(1)* %in1 - %r1 = load double addrspace(1)* %in2 - %r2 = load double addrspace(1)* %in3 - %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %r2) - store double %r3, double addrspace(1)* %out - ret void -} - -; FUNC-LABEL: @fma_v2f64 -; SI: V_FMA_F64 -; SI: V_FMA_F64 -define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1, - <2 x double> addrspace(1)* %in2, <2 x double> addrspace(1)* %in3) { - %r0 = load <2 x double> addrspace(1)* %in1 - %r1 = load <2 x double> addrspace(1)* %in2 - %r2 = load <2 x double> addrspace(1)* %in3 - %r3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2) - store <2 x double> %r3, <2 x double> addrspace(1)* %out - ret void -} - -; FUNC-LABEL: @fma_v4f64 -; SI: V_FMA_F64 -; SI: V_FMA_F64 -; SI: V_FMA_F64 -; SI: V_FMA_F64 -define void @fma_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1, - <4 x double> addrspace(1)* %in2, <4 x double> addrspace(1)* %in3) { - %r0 = load <4 x double> addrspace(1)* %in1 - %r1 = load <4 x double> addrspace(1)* %in2 - %r2 = load <4 x double> addrspace(1)* %in3 - %r3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %r0, <4 x double> %r1, <4 x double> %r2) - store <4 x double> %r3, <4 x double> addrspace(1)* %out - ret void -}