Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -19,6 +19,12 @@ "Enable double precision operations" >; +def FeatureFMA : SubtargetFeature<"fmaf", + "FMA", + "true", + "Enable single precision FMA (not as fast as mul+add, but fused)" +>; + def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf", "FastFMAF32", "true", Index: lib/Target/AMDGPU/AMDGPUInstructions.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructions.td +++ lib/Target/AMDGPU/AMDGPUInstructions.td @@ -49,6 +49,7 @@ def NoFP32Denormals : Predicate<"!Subtarget->hasFP32Denormals()">; def NoFP64Denormals : Predicate<"!Subtarget->hasFP64Denormals()">; def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">; +def FMA : Predicate<"Subtarget->hasFMA()">; def InstFlag : OperandWithDefaultOps ; def ADDRIndirect : ComplexPattern; Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -140,6 +140,7 @@ // Subtarget statically properties set by tablegen bool FP64; + bool FMA; bool IsGCN; bool GCN3Encoding; bool CIInsts; @@ -261,7 +262,7 @@ return HasVOP3PInsts; } - bool hasHWFP64() const { + bool hasFP64() const { return FP64; } @@ -348,6 +349,10 @@ return CaymanISA; } + bool hasFMA() const { + return FMA; + } + TrapHandlerAbi getTrapHandlerAbi() const { return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone; } Index: lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/R600ISelLowering.cpp +++ lib/Target/AMDGPU/R600ISelLowering.cpp @@ -211,6 +211,11 @@ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); + if (!Subtarget->hasFMA()) { + setOperationAction(ISD::FMA, MVT::f32, Expand); + setOperationAction(ISD::FMA, MVT::f64, Expand); + } + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; Index: lib/Target/AMDGPU/R600Instructions.td =================================================================== --- lib/Target/AMDGPU/R600Instructions.td +++ lib/Target/AMDGPU/R600Instructions.td @@ -986,10 +986,12 @@ [(set f32:$dst, (fmad f32:$src0, f32:$src1, f32:$src2))] >; +let OtherPredicates = [FMA] in { class FMA_Common inst> : R600_3OP < inst, "FMA", [(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))], VecALU >; +} class CNDE_Common inst> : R600_3OP < inst, "CNDE", Index: lib/Target/AMDGPU/R600Processors.td =================================================================== --- lib/Target/AMDGPU/R600Processors.td +++ lib/Target/AMDGPU/R600Processors.td @@ -53,7 +53,7 @@ >; def : Processor<"cypress", R600_VLIW5_Itin, - [FeatureEvergreen, FeatureWavefrontSize64, FeatureVertexCache] + [FeatureEvergreen, FeatureWavefrontSize64, FeatureVertexCache, FeatureFMA] >; def : Processor<"juniper", R600_VLIW5_Itin, @@ -82,7 +82,7 @@ >; def : Processor<"cayman", R600_VLIW4_Itin, - [FeatureNorthernIslands, FeatureCaymanISA] + [FeatureNorthernIslands, FeatureCaymanISA, FeatureFMA] >; def : Processor<"turks", R600_VLIW5_Itin, Index: test/CodeGen/AMDGPU/fma.ll =================================================================== --- test/CodeGen/AMDGPU/fma.ll +++ test/CodeGen/AMDGPU/fma.ll @@ -1,5 +1,12 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; XUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cedar -verify-machineinstrs < %s +; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=juniper -verify-machineinstrs < %s +; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood -verify-machineinstrs < %s +; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=sumo -verify-machineinstrs < %s +; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=barts -verify-machineinstrs < %s +; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=caicos -verify-machineinstrs < %s +; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=turks -verify-machineinstrs < %s declare float @llvm.fma.f32(float, float, float) nounwind readnone declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone