Index: lib/Basic/Targets/AMDGPU.h =================================================================== --- lib/Basic/Targets/AMDGPU.h +++ lib/Basic/Targets/AMDGPU.h @@ -94,77 +94,78 @@ bool HasLDEXPF; bool HasFP64; bool HasFastFMA; + bool HasFullRateF32Denorms; }; static constexpr GPUInfo InvalidGPU = - {{""}, {""}, GK_NONE, false, false, false, false, false}; + {{""}, {""}, GK_NONE, false, false, false, false, false, false}; static constexpr GPUInfo R600GPUs[26] = { - // Name Canonical Kind Has Has Has Has Has - // Name FMAF Fast LDEXPF FP64 Fast - // FMAF FMA - {{"r600"}, {"r600"}, GK_R600, false, false, false, false, false}, - {{"rv630"}, {"r600"}, GK_R600, false, false, false, false, false}, - {{"rv635"}, {"r600"}, GK_R600, false, false, false, false, false}, - {{"r630"}, {"r630"}, GK_R630, false, false, false, false, false}, - {{"rs780"}, {"rs880"}, GK_RS880, false, false, false, false, false}, - {{"rs880"}, {"rs880"}, GK_RS880, false, false, false, false, false}, - {{"rv610"}, {"rs880"}, GK_RS880, false, false, false, false, false}, - {{"rv620"}, {"rs880"}, GK_RS880, false, false, false, false, false}, - {{"rv670"}, {"rv670"}, GK_RV670, false, false, false, false, false}, - {{"rv710"}, {"rv710"}, GK_RV710, false, false, false, false, false}, - {{"rv730"}, {"rv730"}, GK_RV730, false, false, false, false, false}, - {{"rv740"}, {"rv770"}, GK_RV770, false, false, false, false, false}, - {{"rv770"}, {"rv770"}, GK_RV770, false, false, false, false, false}, - {{"cedar"}, {"cedar"}, GK_CEDAR, false, false, false, false, false}, - {{"palm"}, {"cedar"}, GK_CEDAR, false, false, false, false, false}, - {{"cypress"}, {"cypress"}, GK_CYPRESS, true, false, false, false, false}, - {{"hemlock"}, {"cypress"}, GK_CYPRESS, true, false, false, false, false}, - {{"juniper"}, {"juniper"}, GK_JUNIPER, false, false, false, false, false}, - {{"redwood"}, {"redwood"}, GK_REDWOOD, false, false, false, false, false}, - {{"sumo"}, {"sumo"}, GK_SUMO, false, false, false, false, false}, - {{"sumo2"}, {"sumo"}, GK_SUMO, false, false, false, false, false}, - {{"barts"}, {"barts"}, GK_BARTS, false, false, false, false, false}, - {{"caicos"}, {"caicos"}, GK_BARTS, false, false, false, false, false}, - {{"aruba"}, {"cayman"}, GK_CAYMAN, true, false, false, false, false}, - {{"cayman"}, {"cayman"}, GK_CAYMAN, true, false, false, false, false}, - {{"turks"}, {"turks"}, GK_TURKS, false, false, false, false, false}, + // Name Canonical Kind Has Has Has Has Has Has + // Name FMAF Fast LDEXPF FP64 Fast Fast + // FMAF FMA Denorm + {{"r600"}, {"r600"}, GK_R600, false, false, false, false, false, false}, + {{"rv630"}, {"r600"}, GK_R600, false, false, false, false, false, false}, + {{"rv635"}, {"r600"}, GK_R600, false, false, false, false, false, false}, + {{"r630"}, {"r630"}, GK_R630, false, false, false, false, false, false}, + {{"rs780"}, {"rs880"}, GK_RS880, false, false, false, false, false, false}, + {{"rs880"}, {"rs880"}, GK_RS880, false, false, false, false, false, false}, + {{"rv610"}, {"rs880"}, GK_RS880, false, false, false, false, false, false}, + {{"rv620"}, {"rs880"}, GK_RS880, false, false, false, false, false, false}, + {{"rv670"}, {"rv670"}, GK_RV670, false, false, false, false, false, false}, + {{"rv710"}, {"rv710"}, GK_RV710, false, false, false, false, false, false}, + {{"rv730"}, {"rv730"}, GK_RV730, false, false, false, false, false, false}, + {{"rv740"}, {"rv770"}, GK_RV770, false, false, false, false, false, false}, + {{"rv770"}, {"rv770"}, GK_RV770, false, false, false, false, false, false}, + {{"cedar"}, {"cedar"}, GK_CEDAR, false, false, false, false, false, false}, + {{"palm"}, {"cedar"}, GK_CEDAR, false, false, false, false, false, false}, + {{"cypress"}, {"cypress"}, GK_CYPRESS, true, false, false, false, false, false}, + {{"hemlock"}, {"cypress"}, GK_CYPRESS, true, false, false, false, false, false}, + {{"juniper"}, {"juniper"}, GK_JUNIPER, false, false, false, false, false, false}, + {{"redwood"}, {"redwood"}, GK_REDWOOD, false, false, false, false, false, false}, + {{"sumo"}, {"sumo"}, GK_SUMO, false, false, false, false, false, false}, + {{"sumo2"}, {"sumo"}, GK_SUMO, false, false, false, false, false, false}, + {{"barts"}, {"barts"}, GK_BARTS, false, false, false, false, false, false}, + {{"caicos"}, {"caicos"}, GK_BARTS, false, false, false, false, false, false}, + {{"aruba"}, {"cayman"}, GK_CAYMAN, true, false, false, false, false, false}, + {{"cayman"}, {"cayman"}, GK_CAYMAN, true, false, false, false, false, false}, + {{"turks"}, {"turks"}, GK_TURKS, false, false, false, false, false, false}, }; static constexpr GPUInfo AMDGCNGPUs[32] = { - // Name Canonical Kind Has Has Has Has Has - // Name FMAF Fast LDEXPF FP64 Fast - // FMAF FMA - {{"gfx600"}, {"gfx600"}, GK_GFX600, true, true, true, true, true}, - {{"tahiti"}, {"gfx600"}, GK_GFX600, true, true, true, true, true}, - {{"gfx601"}, {"gfx601"}, GK_GFX601, true, false, true, true, true}, - {{"hainan"}, {"gfx601"}, GK_GFX601, true, false, true, true, true}, - {{"oland"}, {"gfx601"}, GK_GFX601, true, false, true, true, true}, - {{"pitcairn"}, {"gfx601"}, GK_GFX601, true, false, true, true, true}, - {{"verde"}, {"gfx601"}, GK_GFX601, true, false, true, true, true}, - {{"gfx700"}, {"gfx700"}, GK_GFX700, true, false, true, true, true}, - {{"kaveri"}, {"gfx700"}, GK_GFX700, true, false, true, true, true}, - {{"gfx701"}, {"gfx701"}, GK_GFX701, true, true, true, true, true}, - {{"hawaii"}, {"gfx701"}, GK_GFX701, true, true, true, true, true}, - {{"gfx702"}, {"gfx702"}, GK_GFX702, true, true, true, true, true}, - {{"gfx703"}, {"gfx703"}, GK_GFX703, true, false, true, true, true}, - {{"kabini"}, {"gfx703"}, GK_GFX703, true, false, true, true, true}, - {{"mullins"}, {"gfx703"}, GK_GFX703, true, false, true, true, true}, - {{"gfx704"}, {"gfx704"}, GK_GFX704, true, false, true, true, true}, - {{"bonaire"}, {"gfx704"}, GK_GFX704, true, false, true, true, true}, - {{"gfx801"}, {"gfx801"}, GK_GFX801, true, true, true, true, true}, - {{"carrizo"}, {"gfx801"}, GK_GFX801, true, true, true, true, true}, - {{"gfx802"}, {"gfx802"}, GK_GFX802, true, false, true, true, true}, - {{"iceland"}, {"gfx802"}, GK_GFX802, true, false, true, true, true}, - {{"tonga"}, {"gfx802"}, GK_GFX802, true, false, true, true, true}, - {{"gfx803"}, {"gfx803"}, GK_GFX803, true, false, true, true, true}, - {{"fiji"}, {"gfx803"}, GK_GFX803, true, false, true, true, true}, - {{"polaris10"}, {"gfx803"}, GK_GFX803, true, false, true, true, true}, - {{"polaris11"}, {"gfx803"}, GK_GFX803, true, false, true, true, true}, - {{"gfx810"}, {"gfx810"}, GK_GFX810, true, false, true, true, true}, - {{"stoney"}, {"gfx810"}, GK_GFX810, true, false, true, true, true}, - {{"gfx900"}, {"gfx900"}, GK_GFX900, true, true, true, true, true}, - {{"gfx902"}, {"gfx902"}, GK_GFX900, true, true, true, true, true}, - {{"gfx904"}, {"gfx904"}, GK_GFX904, true, true, true, true, true}, - {{"gfx906"}, {"gfx906"}, GK_GFX906, true, true, true, true, true}, + // Name Canonical Kind Has Has Has Has Has Has + // Name FMAF Fast LDEXPF FP64 Fast Fast + // FMAF FMA Denorm + {{"gfx600"}, {"gfx600"}, GK_GFX600, true, true, true, true, true, false}, + {{"tahiti"}, {"gfx600"}, GK_GFX600, true, true, true, true, true, false}, + {{"gfx601"}, {"gfx601"}, GK_GFX601, true, false, true, true, true, false}, + {{"hainan"}, {"gfx601"}, GK_GFX601, true, false, true, true, true, false}, + {{"oland"}, {"gfx601"}, GK_GFX601, true, false, true, true, true, false}, + {{"pitcairn"}, {"gfx601"}, GK_GFX601, true, false, true, true, true, false}, + {{"verde"}, {"gfx601"}, GK_GFX601, true, false, true, true, true, false}, + {{"gfx700"}, {"gfx700"}, GK_GFX700, true, false, true, true, true, false}, + {{"kaveri"}, {"gfx700"}, GK_GFX700, true, false, true, true, true, false}, + {{"gfx701"}, {"gfx701"}, GK_GFX701, true, true, true, true, true, false}, + {{"hawaii"}, {"gfx701"}, GK_GFX701, true, true, true, true, true, false}, + {{"gfx702"}, {"gfx702"}, GK_GFX702, true, true, true, true, true, false}, + {{"gfx703"}, {"gfx703"}, GK_GFX703, true, false, true, true, true, false}, + {{"kabini"}, {"gfx703"}, GK_GFX703, true, false, true, true, true, false}, + {{"mullins"}, {"gfx703"}, GK_GFX703, true, false, true, true, true, false}, + {{"gfx704"}, {"gfx704"}, GK_GFX704, true, false, true, true, true, false}, + {{"bonaire"}, {"gfx704"}, GK_GFX704, true, false, true, true, true, false}, + {{"gfx801"}, {"gfx801"}, GK_GFX801, true, true, true, true, true, true}, + {{"carrizo"}, {"gfx801"}, GK_GFX801, true, true, true, true, true, true}, + {{"gfx802"}, {"gfx802"}, GK_GFX802, true, false, true, true, true, true}, + {{"iceland"}, {"gfx802"}, GK_GFX802, true, false, true, true, true, true}, + {{"tonga"}, {"gfx802"}, GK_GFX802, true, false, true, true, true, true}, + {{"gfx803"}, {"gfx803"}, GK_GFX803, true, false, true, true, true, true}, + {{"fiji"}, {"gfx803"}, GK_GFX803, true, false, true, true, true, true}, + {{"polaris10"}, {"gfx803"}, GK_GFX803, true, false, true, true, true, true}, + {{"polaris11"}, {"gfx803"}, GK_GFX803, true, false, true, true, true, true}, + {{"gfx810"}, {"gfx810"}, GK_GFX810, true, false, true, true, true, true}, + {{"stoney"}, {"gfx810"}, GK_GFX810, true, false, true, true, true, true}, + {{"gfx900"}, {"gfx900"}, GK_GFX900, true, true, true, true, true, true}, + {{"gfx902"}, {"gfx902"}, GK_GFX900, true, true, true, true, true, true}, + {{"gfx904"}, {"gfx904"}, GK_GFX904, true, true, true, true, true, true}, + {{"gfx906"}, {"gfx906"}, GK_GFX906, true, true, true, true, true, true}, }; static GPUInfo parseR600Name(StringRef Name); Index: lib/Basic/Targets/AMDGPU.cpp =================================================================== --- lib/Basic/Targets/AMDGPU.cpp +++ lib/Basic/Targets/AMDGPU.cpp @@ -210,7 +210,8 @@ } if (!hasFP32Denormals) TargetOpts.Features.push_back( - (Twine(CGOptsGPU.HasFastFMAF && !CGOpts.FlushDenorm + (Twine(CGOptsGPU.HasFastFMAF && CGOptsGPU.HasFullRateF32Denorms && + !CGOpts.FlushDenorm ? '+' : '-') + Twine("fp32-denormals")) Index: test/CodeGenOpenCL/denorms-are-zero.cl =================================================================== --- test/CodeGenOpenCL/denorms-are-zero.cl +++ test/CodeGenOpenCL/denorms-are-zero.cl @@ -1,8 +1,26 @@ // RUN: %clang_cc1 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - %s | FileCheck %s --check-prefix=DENORM-ZERO -// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck %s --check-prefix=AMDGCN -// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck %s --check-prefix=AMDGCN-DENORM -// RUN: %clang_cc1 -emit-llvm -target-feature +fp32-denormals -target-feature -fp64-fp16-denormals -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck --check-prefix=AMDGCN-FEATURE %s +// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - %s | FileCheck -check-prefix=DENORM-ZERO %s + +// Slow FMAF and slow f32 denormals +// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn--amdhsa -target-cpu pitcairn %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s +// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu pitcairn %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH-OPT %s + +// Fast FMAF, but slow f32 denormals +// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn--amdhsa -target-cpu tahiti %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s +// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu tahiti %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH-OPT %s + +// Fast F32 denormals, but slow FMAF +// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH %s +// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH-OPT %s + +// Fast F32 denormals and fast FMAF +// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn--amdhsa -target-cpu gfx900 %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-DENORM %s +// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu gfx900 %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FLUSH-OPT %s + +// RUN: %clang_cc1 -emit-llvm -target-feature +fp32-denormals -target-feature -fp64-fp16-denormals -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FEATURE %s +// RUN: %clang_cc1 -emit-llvm -target-feature +fp32-denormals -target-feature -fp64-fp16-denormals -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu pitcairn %s | FileCheck -check-prefixes=AMDGCN,AMDGCN-FEATURE %s + + // For all targets 'denorms-are-zero' attribute is set to 'true' // if '-cl-denorms-are-zero' was specified and to 'false' otherwise. @@ -17,9 +35,11 @@ // explicitly set. amdgcn target always do not flush fp64 denormals. The control for fp64 and fp16 denormals is the same. // AMDGCN-LABEL: define void @f() -// AMDGCN: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="true" {{.*}} "target-features"="{{[^"]*}}+fp64-fp16-denormals,{{[^"]*}}-fp32-denormals{{[^"]*}}" -// AMDGCN-DENORM-LABEL: define void @f() -// AMDGCN-DENORM: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="false" {{.*}} "target-features"="{{[^"]*}}+fp64-fp16-denormals,{{[^"]*}}-fp32-denormals{{[^"]*}}" -// AMDGCN-FEATURE-LABEL: define void @f() + +// AMDGCN-FLUSH: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="false" {{.*}} "target-features"="{{[^"]*}}+fp64-fp16-denormals,{{[^"]*}}-fp32-denormals{{[^"]*}}" +// AMDGCN-FLUSH-OPT: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="true" {{.*}} "target-features"="{{[^"]*}}+fp64-fp16-denormals,{{[^"]*}}-fp32-denormals{{[^"]*}}" + +// AMDGCN-DENORM: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="false" {{.*}} "target-features"="{{[^"]*}}+fp32-denormals,{{[^"]*}}+fp64-fp16-denormals{{[^"]*}}" + // AMDGCN-FEATURE: attributes #{{[0-9]*}} = {{{[^}]*}} "denorms-are-zero"="true" {{.*}} "target-features"="{{[^"]*}}+fp32-denormals,{{[^"]*}}-fp64-fp16-denormals{{[^"]*}}" void f() {}