Index: cfe/trunk/include/clang/Basic/TargetInfo.h =================================================================== --- cfe/trunk/include/clang/Basic/TargetInfo.h +++ cfe/trunk/include/clang/Basic/TargetInfo.h @@ -40,6 +40,7 @@ namespace clang { class DiagnosticsEngine; class LangOptions; +class CodeGenOptions; class MacroBuilder; class SourceLocation; class SourceManager; @@ -797,6 +798,10 @@ /// language options which change the target configuration. virtual void adjust(const LangOptions &Opts); + /// \brief Adjust target options based on codegen options. + virtual void adjustTargetOptions(const CodeGenOptions &CGOpts, + TargetOptions &TargetOpts) const {} + /// \brief Initialize the map with the default set of target features for the /// CPU this should include all legal feature strings on the target. /// Index: cfe/trunk/include/clang/Frontend/CodeGenOptions.def =================================================================== --- cfe/trunk/include/clang/Frontend/CodeGenOptions.def +++ cfe/trunk/include/clang/Frontend/CodeGenOptions.def @@ -110,6 +110,7 @@ CODEGENOPT(NoInline , 1, 0) ///< Set when -fno-inline is enabled. ///< Disables use of the inline keyword. CODEGENOPT(NoNaNsFPMath , 1, 0) ///< Assume FP arguments, results not NaN. +CODEGENOPT(FlushDenorm , 1, 0) ///< Allow FP denorm numbers to be flushed to zero CODEGENOPT(NoZeroInitializedInBSS , 1, 0) ///< -fno-zero-initialized-in-bss. /// \brief Method of Objective-C dispatch to use. ENUM_CODEGENOPT(ObjCDispatchMethod, ObjCDispatchMethodKind, 2, Legacy) Index: cfe/trunk/lib/Basic/Targets.cpp =================================================================== --- cfe/trunk/lib/Basic/Targets.cpp +++ cfe/trunk/lib/Basic/Targets.cpp @@ -21,6 +21,7 @@ #include "clang/Basic/TargetInfo.h" #include "clang/Basic/TargetOptions.h" #include "clang/Basic/Version.h" +#include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" @@ -1960,23 +1961,27 @@ bool hasFP64:1; bool hasFMAF:1; bool hasLDEXPF:1; + bool hasDenormSupport:1; static bool isAMDGCN(const llvm::Triple &TT) { return TT.getArch() == llvm::Triple::amdgcn; } public: - AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &) + AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : TargetInfo(Triple) , GPU(isAMDGCN(Triple) ? GK_SOUTHERN_ISLANDS : GK_R600), hasFP64(false), hasFMAF(false), - hasLDEXPF(false) { + hasLDEXPF(false), + hasDenormSupport(false){ if (getTriple().getArch() == llvm::Triple::amdgcn) { hasFP64 = true; hasFMAF = true; hasLDEXPF = true; } + if (Opts.CPU == "fiji") + hasDenormSupport = true; resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn ? DataLayoutStringSI : DataLayoutStringR600); @@ -2025,6 +2030,26 @@ DiagnosticsEngine &Diags, StringRef CPU, const std::vector &FeatureVec) const override; + void adjustTargetOptions(const CodeGenOptions &CGOpts, + TargetOptions &TargetOpts) const override { + if (!hasDenormSupport) + return; + bool hasFP32Denormals = false; + bool hasFP64Denormals = false; + for (auto &I : TargetOpts.FeaturesAsWritten) { + if (I == "+fp32-denormals" || I == "-fp32-denormals") + hasFP32Denormals = true; + if (I == "+fp64-denormals" || I == "-fp64-denormals") + hasFP64Denormals = true; + } + if (!hasFP32Denormals) + TargetOpts.Features.push_back((Twine(CGOpts.FlushDenorm ? '-' : '+') + + Twine("fp32-denormals")).str()); + if (!hasFP64Denormals && hasFP64) + TargetOpts.Features.push_back((Twine(CGOpts.FlushDenorm ? '-' : '+') + + Twine("fp64-denormals")).str()); + } + ArrayRef getTargetBuiltins() const override { return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin); Index: cfe/trunk/lib/Frontend/CompilerInstance.cpp =================================================================== --- cfe/trunk/lib/Frontend/CompilerInstance.cpp +++ cfe/trunk/lib/Frontend/CompilerInstance.cpp @@ -842,6 +842,9 @@ // created. This complexity should be lifted elsewhere. getTarget().adjust(getLangOpts()); + // Adjust target options based on codegen options. + getTarget().adjustTargetOptions(getCodeGenOpts(), getTargetOpts()); + // rewriter project will change target built-in bool type from its default. if (getFrontendOpts().ProgramAction == frontend::RewriteObjC) getTarget().noSignedCharForObjCBool(); Index: cfe/trunk/lib/Frontend/CompilerInvocation.cpp =================================================================== --- cfe/trunk/lib/Frontend/CompilerInvocation.cpp +++ cfe/trunk/lib/Frontend/CompilerInvocation.cpp @@ -571,6 +571,7 @@ Args.hasArg(OPT_cl_fast_relaxed_math)); Opts.NoSignedZeros = (Args.hasArg(OPT_fno_signed_zeros) || Args.hasArg(OPT_cl_no_signed_zeros)); + Opts.FlushDenorm = Args.hasArg(OPT_cl_denorms_are_zero); Opts.ReciprocalMath = Args.hasArg(OPT_freciprocal_math); Opts.NoZeroInitializedInBSS = Args.hasArg(OPT_mno_zero_initialized_in_bss); Opts.BackendOptions = Args.getAllArgValues(OPT_backend_option); Index: cfe/trunk/test/CodeGenOpenCL/denorms-are-zero.cl =================================================================== --- cfe/trunk/test/CodeGenOpenCL/denorms-are-zero.cl +++ cfe/trunk/test/CodeGenOpenCL/denorms-are-zero.cl @@ -1,5 +1,13 @@ // RUN: %clang_cc1 -S -cl-denorms-are-zero -o - %s 2>&1 +// RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck %s +// RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck %s --check-prefix=CHECK-DENORM -// This test just checks that the -cl-denorms-are-zero argument is accepted +// For non-amdgcn targets, this test just checks that the -cl-denorms-are-zero argument is accepted // by clang. This option is currently a no-op, which is allowed by the // OpenCL specification. + +// CHECK-DENORM-LABEL: define void @f() +// CHECK-DENORM: attributes #{{[0-9]*}} = {{{[^}]*}} "target-features"="{{[^"]*}}+fp32-denormals,+fp64-denormals{{[^"]*}}" +// CHECK-LABEL: define void @f() +// CHECK-NOT: attributes #{{[0-9]*}} = {{{[^}]*}} "target-features"="{{[^"]*}}+fp32-denormals,+fp64-denormals{{[^"]*}}" +void f() {}