Index: clang/include/clang/Basic/CodeGenOptions.def =================================================================== --- clang/include/clang/Basic/CodeGenOptions.def +++ clang/include/clang/Basic/CodeGenOptions.def @@ -425,6 +425,10 @@ /// according to the field declaring type width. CODEGENOPT(AAPCSBitfieldWidth, 1, 1) +/// Floating point opcodes that support exception flag gathering quiet and +/// propagate signaling NaN inputs per IEEE 754-2008 (AMDGPU Only) +CODEGENOPT(EmitIEEENaNCompliantInsts, 1, 1) + #undef CODEGENOPT #undef ENUM_CODEGENOPT #undef VALUE_CODEGENOPT Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -3171,6 +3171,12 @@ Values<"command,reactor">, HelpText<"Execution model (WebAssembly only)">; +def mamdgpu_ieee : Flag<["-"], "mamdgpu-ieee">, Flags<[CC1Option]>, + Group, HelpText<"Floating point opcodes that support exception flag " + "gathering quiet and propagate signaling NaN inputs per IEEE 754-2008 (AMDGPU only)">; +def mno_amdgpu_ieee : Flag<["-"], "mno-amdgpu-ieee">, Flags<[CC1Option]>, + Group; + def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group, HelpText<"Specify code object ABI version. Defaults to 3. (AMDGPU only)">, MetaVarName<"">, Values<"2,3,4">; Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -9166,6 +9166,9 @@ if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics()) F->addFnAttr("amdgpu-unsafe-fp-atomics", "true"); + + if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts) + F->addFnAttr("amdgpu-ieee", "false"); } unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const { Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -1943,6 +1943,13 @@ else if (Args.hasArg(options::OPT_fno_finite_loops)) Opts.FiniteLoops = CodeGenOptions::FiniteLoopsKind::Never; + // When NaN is not honored, floating point opcodes that support exception + // flag gathering does not need to quiet or propagate signaling NaN inputs + // per IEEE 754-2008. Note this only concerns about signaling NaN. + Opts.EmitIEEENaNCompliantInsts = + Args.hasFlag(options::OPT_mamdgpu_ieee, options::OPT_mno_amdgpu_ieee, + !LangOptsRef.NoHonorNaNs); + return Diags.getNumErrors() == NumErrorsBefore; } Index: clang/test/CodeGenOpenCL/amdgpu-ieee.cl =================================================================== --- /dev/null +++ clang/test/CodeGenOpenCL/amdgpu-ieee.cl @@ -0,0 +1,48 @@ +// REQUIRES: amdgpu-registered-target +// +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=COMMON,ON %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -mamdgpu-ieee | FileCheck -check-prefixes=COMMON,ON %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -mno-amdgpu-ieee | FileCheck -check-prefixes=COMMON,OFF %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -menable-no-nans | FileCheck -check-prefixes=COMMON,OFF %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -menable-no-nans -mamdgpu-ieee | FileCheck -check-prefixes=COMMON,ON %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -cl-fast-relaxed-math | FileCheck -check-prefixes=COMMON,OFF %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -cl-fast-relaxed-math -mamdgpu-ieee \ + +// Check AMDGCN ISA generation. + +// RUN: | FileCheck -check-prefixes=COMMON,ON %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \ +// RUN: -mamdgpu-ieee \ +// RUN: | FileCheck -check-prefixes=ISA-ON %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \ +// RUN: -mno-amdgpu-ieee \ +// RUN: | FileCheck -check-prefixes=ISA-OFF %s + +// COMMON: define{{.*}} amdgpu_kernel void @kern{{.*}} [[ATTRS1:#[0-9]+]] +// ISA-ON: v_mul_f32_e64 v{{[0-9]+}}, 1.0, s{{[0-9]+}} +// ISA-ON: v_mul_f32_e64 v{{[0-9]+}}, 1.0, s{{[0-9]+}} +// ISA-ON: v_min_f32_e32 +// ISA-ON: ; IeeeMode: 1 +// ISA-OFF-NOT: v_mul_f32_e64 v{{[0-9]+}}, 1.0, s{{[0-9]+}} +// ISA-OFF-NOT: v_mul_f32_e64 v{{[0-9]+}}, 1.0, s{{[0-9]+}} +// ISA-OFF: v_min_f32_e32 +// ISA-OFF: ; IeeeMode: 0 +kernel void kern(global float *x, float y, float z) { + *x = __builtin_fmin(y, z); +} + +// COMMON: define{{.*}}void @fun() [[ATTRS2:#[0-9]+]] +void fun() { +} + +// ON-NOT: attributes [[ATTRS1]] = {{.*}} "amdgpu-ieee" +// OFF: attributes [[ATTRS1]] = {{.*}} "amdgpu-ieee"="false" +// ON-NOT: attributes [[ATTRS2]] = {{.*}} "amdgpu-ieee" +// OFF: attributes [[ATTRS2]] = {{.*}} "amdgpu-ieee"="false"