Index: clang/include/clang/Basic/CodeGenOptions.def =================================================================== --- clang/include/clang/Basic/CodeGenOptions.def +++ clang/include/clang/Basic/CodeGenOptions.def @@ -396,6 +396,9 @@ /// Whether to not follow the AAPCS that enforce at least one read before storing to a volatile bitfield CODEGENOPT(ForceAAPCSBitfieldLoad, 1, 0) +/// Whether to emit IEEE754-2008 NaN compliant instructions if available (AMDGPU Only) +CODEGENOPT(EmitIEEENaNCompliantInsts, 1, 1) + #undef CODEGENOPT #undef ENUM_CODEGENOPT #undef VALUE_CODEGENOPT Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -2402,6 +2402,11 @@ HelpText<"Generate additional code for specified of debugger ABI (AMDGPU only)">, MetaVarName<"">; +def mamdgpu_ieee : Flag<["-"], "mamdgpu-ieee">, Flags<[CC1Option]>, + Group, HelpText<"Enable IEEE754-2008 NaN compliance in supported AMDGPU instructions">; +def mno_amdgpu_ieee : Flag<["-"], "mno-amdgpu-ieee">, Flags<[CC1Option]>, + Group; + def mcode_object_v3 : Flag<["-"], "mcode-object-v3">, Group, HelpText<"Enable code object v3 (AMDGPU only)">; def mno_code_object_v3 : Flag<["-"], "mno-code-object-v3">, Group, Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -8512,6 +8512,9 @@ if (NumVGPR != 0) F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); } + + if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts) + F->addFnAttr("amdgpu-ieee", "false"); } unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const { Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -1430,6 +1430,11 @@ std::string(Args.getLastArgValue(OPT_fsymbol_partition_EQ)); Opts.ForceAAPCSBitfieldLoad = Args.hasArg(OPT_ForceAAPCSBitfieldLoad); + + Opts.EmitIEEENaNCompliantInsts = + Args.hasFlag(options::OPT_mamdgpu_ieee, options::OPT_mno_amdgpu_ieee, + !Opts.NoNaNsFPMath); + return Success; } Index: clang/test/CodeGenOpenCL/amdgpu-ieee.cl =================================================================== --- /dev/null +++ clang/test/CodeGenOpenCL/amdgpu-ieee.cl @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=COM,ON %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -mamdgpu-ieee | FileCheck -check-prefixes=COM,ON %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -mno-amdgpu-ieee | FileCheck -check-prefixes=COM,OFF %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -menable-no-nans | FileCheck -check-prefixes=COM,OFF %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -menable-no-nans -mamdgpu-ieee | FileCheck -check-prefixes=COM,ON %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -cl-fast-relaxed-math | FileCheck -check-prefixes=COM,OFF %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -cl-fast-relaxed-math -mamdgpu-ieee \ +// RUN: | FileCheck -check-prefixes=COM,ON %s + +kernel void kern() { +// COM: define amdgpu_kernel void @kern() [[ATTRS:#[0-9]+]] +} + +// ON-NOT: attributes [[ATTRS]] = {{.*}} "amdgpu-ieee" +// OFF: attributes [[ATTRS]] = {{.*}} "amdgpu-ieee"="false"