diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -510,12 +510,14 @@ // Check possible fuse {fadd|fsub}(a,fmul(b,c)) and return zero cost for // fmul(b,c) supposing the fadd|fsub will get estimated cost for the whole // fused operation. - if (!HasFP32Denormals && SLT == MVT::f32 && CxtI && CxtI->hasOneUse()) + if ((SLT == MVT::f32 || SLT == MVT::f64 || SLT == MVT::f16) && CxtI && + CxtI->hasOneUse()) if (const auto *FAdd = dyn_cast(*CxtI->user_begin())) { const int OPC = TLI->InstructionOpcodeToISD(FAdd->getOpcode()); - if (OPC == ISD::FADD || OPC == ISD::FSUB) { + if ((OPC == ISD::FADD || OPC == ISD::FSUB) && + (!HasFP32Denormals || + (CxtI->hasAllowContract() && FAdd->hasAllowContract()))) return TargetTransformInfo::TCC_Free; - } } LLVM_FALLTHROUGH; case ISD::FADD: diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll b/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll --- a/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll @@ -1,48 +1,155 @@ -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=FUSED,ALL %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=SLOW,ALL %s -; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=FUSED,ALL %s -; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=SLOW,ALL %s - -target triple = "amdgcn--" - -; ALL-LABEL: 'fmul_fadd_f32': -; FUSED: estimated cost of 0 for instruction: %mul = fmul float -; SLOW: estimated cost of 1 for instruction: %mul = fmul float -; ALL: estimated cost of 1 for instruction: %add = fadd float -define float @fmul_fadd_f32(float %r0, float %r1, float %r2) #0 { - %mul = fmul float %r0, %r1 - %add = fadd float %mul, %r2 - ret float %add -} - -; ALL-LABEL: 'fmul_fadd_v2f32': -; FUSED: estimated cost of 0 for instruction: %mul = fmul <2 x float> -; SLOW: estimated cost of 2 for instruction: %mul = fmul <2 x float> -; ALL: estimated cost of 2 for instruction: %add = fadd <2 x float> -define <2 x float> @fmul_fadd_v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2) #0 { - %mul = fmul <2 x float> %r0, %r1 - %add = fadd <2 x float> %mul, %r2 - ret <2 x float> %add -} - -; ALL-LABEL: 'fmul_fsub_f32': -; FUSED: estimated cost of 0 for instruction: %mul = fmul float -; SLOW: estimated cost of 1 for instruction: %mul = fmul float -; ALL: estimated cost of 1 for instruction: %sub = fsub float -define float @fmul_fsub_f32(float %r0, float %r1, float %r2) #0 { - %mul = fmul float %r0, %r1 - %sub = fsub float %mul, %r2 - ret float %sub -} - -; ALL-LABEL: 'fmul_fsub_v2f32': -; FUSED: estimated cost of 0 for instruction: %mul = fmul <2 x float> -; SLOW: estimated cost of 2 for instruction: %mul = fmul <2 x float> -; ALL: estimated cost of 2 for instruction: %sub = fsub <2 x float> -define <2 x float> @fmul_fsub_v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2) #0 { - %mul = fmul <2 x float> %r0, %r1 - %sub = fsub <2 x float> %mul, %r2 - ret <2 x float> %sub -} - -attributes #0 = { nounwind } +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=FUSED,ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=SLOW,ALL %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=FUSED,ALL %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=SLOW,ALL %s + +target triple = "amdgcn--" + +; ALL-LABEL: 'fmul_fadd_f32': +; FUSED: estimated cost of 0 for instruction: %mul = fmul float +; SLOW: estimated cost of 1 for instruction: %mul = fmul float +; ALL: estimated cost of 1 for instruction: %add = fadd float +define float @fmul_fadd_f32(float %r0, float %r1, float %r2) #0 { + %mul = fmul float %r0, %r1 + %add = fadd float %mul, %r2 + ret float %add +} + +; ALL-LABEL: 'fmul_fadd_contract_f32': +; ALL: estimated cost of 0 for instruction: %mul = fmul contract float +; ALL: estimated cost of 1 for instruction: %add = fadd contract float +define float @fmul_fadd_contract_f32(float %r0, float %r1, float %r2) #0 { + %mul = fmul contract float %r0, %r1 + %add = fadd contract float %mul, %r2 + ret float %add +} + +; ALL-LABEL: 'fmul_fadd_v2f32': +; FUSED: estimated cost of 0 for instruction: %mul = fmul <2 x float> +; SLOW: estimated cost of 2 for instruction: %mul = fmul <2 x float> +; ALL: estimated cost of 2 for instruction: %add = fadd <2 x float> +define <2 x float> @fmul_fadd_v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2) #0 { + %mul = fmul <2 x float> %r0, %r1 + %add = fadd <2 x float> %mul, %r2 + ret <2 x float> %add +} + +; ALL-LABEL: 'fmul_fsub_f32': +; FUSED: estimated cost of 0 for instruction: %mul = fmul float +; SLOW: estimated cost of 1 for instruction: %mul = fmul float +; ALL: estimated cost of 1 for instruction: %sub = fsub float +define float @fmul_fsub_f32(float %r0, float %r1, float %r2) #0 { + %mul = fmul float %r0, %r1 + %sub = fsub float %mul, %r2 + ret float %sub +} + +; ALL-LABEL: 'fmul_fsub_v2f32': +; FUSED: estimated cost of 0 for instruction: %mul = fmul <2 x float> +; SLOW: estimated cost of 2 for instruction: %mul = fmul <2 x float> +; ALL: estimated cost of 2 for instruction: %sub = fsub <2 x float> +define <2 x float> @fmul_fsub_v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2) #0 { + %mul = fmul <2 x float> %r0, %r1 + %sub = fsub <2 x float> %mul, %r2 + ret <2 x float> %sub +} + +; ALL-LABEL: 'fmul_fadd_f16': +; FUSED: estimated cost of 0 for instruction: %mul = fmul half +; SLOW: estimated cost of 1 for instruction: %mul = fmul half +; ALL: estimated cost of 1 for instruction: %add = fadd half +define half @fmul_fadd_f16(half %r0, half %r1, half %r2) #0 { + %mul = fmul half %r0, %r1 + %add = fadd half %mul, %r2 + ret half %add +} + +; ALL-LABEL: 'fmul_fadd_contract_f16': +; ALL: estimated cost of 0 for instruction: %mul = fmul contract half +; ALL: estimated cost of 1 for instruction: %add = fadd contract half +define half @fmul_fadd_contract_f16(half %r0, half %r1, half %r2) #0 { + %mul = fmul contract half %r0, %r1 + %add = fadd contract half %mul, %r2 + ret half %add +} + +; ALL-LABEL: 'fmul_fadd_v2f16': +; FUSED: estimated cost of 0 for instruction: %mul = fmul <2 x half> +; SLOW: estimated cost of 1 for instruction: %mul = fmul <2 x half> +; ALL: estimated cost of 1 for instruction: %add = fadd <2 x half> +define <2 x half> @fmul_fadd_v2f16(<2 x half> %r0, <2 x half> %r1, <2 x half> %r2) #0 { + %mul = fmul <2 x half> %r0, %r1 + %add = fadd <2 x half> %mul, %r2 + ret <2 x half> %add +} + +; ALL-LABEL: 'fmul_fsub_f16': +; FUSED: estimated cost of 0 for instruction: %mul = fmul half +; SLOW: estimated cost of 1 for instruction: %mul = fmul half +; ALL: estimated cost of 1 for instruction: %sub = fsub half +define half @fmul_fsub_f16(half %r0, half %r1, half %r2) #0 { + %mul = fmul half %r0, %r1 + %sub = fsub half %mul, %r2 + ret half %sub +} + +; ALL-LABEL: 'fmul_fsub_v2f16': +; FUSED: estimated cost of 0 for instruction: %mul = fmul <2 x half> +; SLOW: estimated cost of 1 for instruction: %mul = fmul <2 x half> +; ALL: estimated cost of 1 for instruction: %sub = fsub <2 x half> +define <2 x half> @fmul_fsub_v2f16(<2 x half> %r0, <2 x half> %r1, <2 x half> %r2) #0 { + %mul = fmul <2 x half> %r0, %r1 + %sub = fsub <2 x half> %mul, %r2 + ret <2 x half> %sub +} + +; ALL-LABEL: 'fmul_fadd_f64': +; FUSED: estimated cost of 0 for instruction: %mul = fmul double +; SLOW: estimated cost of 3 for instruction: %mul = fmul double +; ALL: estimated cost of 3 for instruction: %add = fadd double +define double @fmul_fadd_f64(double %r0, double %r1, double %r2) #0 { + %mul = fmul double %r0, %r1 + %add = fadd double %mul, %r2 + ret double %add +} + +; ALL-LABEL: 'fmul_fadd_contract_f64': +; ALL: estimated cost of 0 for instruction: %mul = fmul contract double +; ALL: estimated cost of 3 for instruction: %add = fadd contract double +define double @fmul_fadd_contract_f64(double %r0, double %r1, double %r2) #0 { + %mul = fmul contract double %r0, %r1 + %add = fadd contract double %mul, %r2 + ret double %add +} + +; ALL-LABEL: 'fmul_fadd_v2f64': +; FUSED: estimated cost of 0 for instruction: %mul = fmul <2 x double> +; SLOW: estimated cost of 6 for instruction: %mul = fmul <2 x double> +; ALL: estimated cost of 6 for instruction: %add = fadd <2 x double> +define <2 x double> @fmul_fadd_v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2) #0 { + %mul = fmul <2 x double> %r0, %r1 + %add = fadd <2 x double> %mul, %r2 + ret <2 x double> %add +} + +; ALL-LABEL: 'fmul_fsub_f64': +; FUSED: estimated cost of 0 for instruction: %mul = fmul double +; SLOW: estimated cost of 3 for instruction: %mul = fmul double +; ALL: estimated cost of 3 for instruction: %sub = fsub double +define double @fmul_fsub_f64(double %r0, double %r1, double %r2) #0 { + %mul = fmul double %r0, %r1 + %sub = fsub double %mul, %r2 + ret double %sub +} + +; ALL-LABEL: 'fmul_fsub_v2f64': +; FUSED: estimated cost of 0 for instruction: %mul = fmul <2 x double> +; SLOW: estimated cost of 6 for instruction: %mul = fmul <2 x double> +; ALL: estimated cost of 6 for instruction: %sub = fsub <2 x double> +define <2 x double> @fmul_fsub_v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2) #0 { + %mul = fmul <2 x double> %r0, %r1 + %sub = fsub <2 x double> %mul, %r2 + ret <2 x double> %sub +} + +attributes #0 = { nounwind }