Index: clang/lib/Basic/Targets/AMDGPU.h =================================================================== --- clang/lib/Basic/Targets/AMDGPU.h +++ clang/lib/Basic/Targets/AMDGPU.h @@ -208,9 +208,6 @@ StringRef CPU, const std::vector &FeatureVec) const override; - void adjustTargetOptions(const CodeGenOptions &CGOpts, - TargetOptions &TargetOpts) const override; - ArrayRef getTargetBuiltins() const override; void getTargetDefines(const LangOptions &Opts, Index: clang/lib/Basic/Targets/AMDGPU.cpp =================================================================== --- clang/lib/Basic/Targets/AMDGPU.cpp +++ clang/lib/Basic/Targets/AMDGPU.cpp @@ -233,28 +233,6 @@ return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); } -void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts, - TargetOptions &TargetOpts) const { - bool hasFP32Denormals = false; - bool hasFP64Denormals = false; - - for (auto &I : TargetOpts.FeaturesAsWritten) { - if (I == "+fp32-denormals" || I == "-fp32-denormals") - hasFP32Denormals = true; - if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals") - hasFP64Denormals = true; - } - if (!hasFP32Denormals) - TargetOpts.Features.push_back( - (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && - CGOpts.FP32DenormalMode.Output == llvm::DenormalMode::IEEE - ? '+' : '-') + Twine("fp32-denormals")) - .str()); - // Always do not flush fp64 or fp16 denorms. - if (!hasFP64Denormals && hasFP64()) - TargetOpts.Features.push_back("+fp64-fp16-denormals"); -} - void AMDGPUTargetInfo::fillValidCPUList( SmallVectorImpl &Values) const { if (isAMDGCN(getTriple())) Index: clang/test/CodeGenCUDA/flush-denormals.cu =================================================================== --- clang/test/CodeGenCUDA/flush-denormals.cu +++ clang/test/CodeGenCUDA/flush-denormals.cu @@ -1,26 +1,26 @@ // RUN: %clang_cc1 -fcuda-is-device \ // RUN: -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \ -// RUN: FileCheck -check-prefix=NOFTZ %s +// RUN: FileCheck -check-prefixes=NOFTZ,PTXNOFTZ %s // RUN: %clang_cc1 -fcuda-is-device -fdenormal-fp-math-f32=ieee \ // RUN: -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \ -// RUN: FileCheck -check-prefix=NOFTZ %s +// RUN: FileCheck -check-prefixes=NOFTZ,PTXNOFTZ %s // RUN: %clang_cc1 -fcuda-is-device -fdenormal-fp-math-f32=preserve-sign \ // RUN: -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \ -// RUN: FileCheck -check-prefix=FTZ %s +// RUN: FileCheck -check-prefixes=FTZ,PTXFTZ %s // RUN: %clang_cc1 -fcuda-is-device -x hip \ // RUN: -triple amdgcn-amd-amdhsa -target-cpu gfx900 -emit-llvm -o - %s | \ -// RUN: FileCheck -check-prefix=AMDNOFTZ %s +// RUN: FileCheck -check-prefix=NOFTZ %s // RUN: %clang_cc1 -fcuda-is-device -x hip \ // RUN: -triple amdgcn-amd-amdhsa -target-cpu gfx900 -fdenormal-fp-math-f32=ieee -emit-llvm -o - %s | \ -// RUN: FileCheck -check-prefix=AMDNOFTZ %s +// RUN: FileCheck -check-prefix=NOFTZ %s // RUN: %clang_cc1 -fcuda-is-device -x hip -fdenormal-fp-math-f32=preserve-sign \ // RUN: -triple amdgcn-amd-amdhsa -target-cpu gfx900 -emit-llvm -o - %s | \ -// RUN: FileCheck -check-prefix=AMDFTZ %s +// RUN: FileCheck -check-prefix=FTZ %s #include "Inputs/cuda.h" @@ -29,10 +29,13 @@ // -fdenormal-fp-math-f32. Further, check that we reflect the presence or // absence of -fcuda-flush-denormals-to-zero in a module flag. -// AMDGCN targets always have +fp64-fp16-denormals. -// AMDGCN targets without fast FMAF (e.g. gfx803) always have +fp32-denormals. -// For AMDGCN target with fast FMAF (e.g. gfx900), it has +fp32-denormals -// by default and -fp32-denormals when there is option +// AMDGCN targets always have f64/f16 denormals enabled. +// +// AMDGCN targets without fast FMAF (e.g. gfx803) always have f32 denormal +// flushing by default. +// +// For AMDGCN target with fast FMAF (e.g. gfx900), it has ieee denormals by +// default and preserve-sign when there with the option // -fcuda-flush-denormals-to-zero. // CHECK-LABEL: define void @foo() #0 @@ -41,11 +44,8 @@ // FTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign" // NOFTZ-NOT: "denormal-fp-math-f32" -// AMDNOFTZ: attributes #0 = {{.*}}+fp32-denormals{{.*}}+fp64-fp16-denormals -// AMDFTZ: attributes #0 = {{.*}}+fp64-fp16-denormals{{.*}}-fp32-denormals - -// FTZ:!llvm.module.flags = !{{{.*}}[[MODFLAG:![0-9]+]]} -// FTZ:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 1} +// PTXFTZ:!llvm.module.flags = !{{{.*}}[[MODFLAG:![0-9]+]]} +// PTXFTZ:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 1} -// NOFTZ:!llvm.module.flags = !{{{.*}}[[MODFLAG:![0-9]+]]} -// NOFTZ:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 0} +// PTXNOFTZ:!llvm.module.flags = !{{{.*}}[[MODFLAG:![0-9]+]]} +// PTXNOFTZ:[[MODFLAG]] = !{i32 4, !"nvvm-reflect-ftz", i32 0} Index: clang/test/CodeGenOpenCL/amdgpu-features.cl =================================================================== --- clang/test/CodeGenOpenCL/amdgpu-features.cl +++ clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -14,15 +14,15 @@ // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx600 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX600 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx601 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX601 %s -// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime" -// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime" -// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime" -// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime" -// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime" -// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime" -// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+s-memrealtime" -// GFX700: "target-features"="+ci-insts,+flat-address-space,+fp64-fp16-denormals,-fp32-denormals" -// GFX600: "target-features"="+fp64-fp16-denormals,-fp32-denormals" -// GFX601: "target-features"="+fp64-fp16-denormals,-fp32-denormals" +// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime" +// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime" +// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime" +// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime" +// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime" +// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime" +// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime" +// GFX700: "target-features"="+ci-insts,+flat-address-space" +// GFX600-NOT: "target-features" +// GFX601-NOT: "target-features" kernel void test() {}