This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU][GFX9] Set +fp32-denormals for >=gfx900 unless -cl-denorms-are-zero is set
ClosedPublic

Authored by kzhuravl on Mar 29 2017, 3:51 PM.

Download Raw Diff

Details

Reviewers

b-sumner
arsenm

Commits

rGe668b1cd1e78: [AMDGPU][GFX9] Set +fp32-denormals for >=gfx900 unless -cl-denorms-are-zero is…
rC300306: [AMDGPU][GFX9] Set +fp32-denormals for >=gfx900 unless -cl-denorms-are-zero is…
rL300306: [AMDGPU][GFX9] Set +fp32-denormals for >=gfx900 unless -cl-denorms-are-zero is…

Diff Detail

Repository: rL LLVM

Event Timeline

kzhuravl created this revision.Mar 29 2017, 3:51 PM

Herald added subscribers: t-tye, tpr, dstuttard and 2 others. · View Herald TranscriptMar 29 2017, 3:51 PM

arsenm added inline comments.Mar 29 2017, 3:54 PM

llvm/tools/clang/lib/Basic/Targets.cpp
2114–2116 ↗	(On Diff #93420)	This is misleading since it was true on VI as well. I think just FMA rate changed

kzhuravl added inline comments.Mar 29 2017, 4:06 PM

llvm/tools/clang/lib/Basic/Targets.cpp
2114–2116 ↗	(On Diff #93420)	Yes, GFX8 supports f32 denorms at full speed too. However, it doesn't have a full speed fma, so we didh't enable it then since it caused too many mad-heavy apps to slow down.

arsenm added inline comments.Mar 29 2017, 4:11 PM

llvm/tools/clang/lib/Basic/Targets.cpp
2114–2116 ↗	(On Diff #93420)	Yes, so the name should refer to FMA rather than just fp32 denorms

Address review feedback.

LGTM with f32 clarification

lib/Basic/Targets.cpp
2114 ↗	(On Diff #93422)	FMAF32?
2115 ↗	(On Diff #93422)	We should probably add a new subtarget feature for this, but that's a separate patch

This revision is now accepted and ready to land.Mar 29 2017, 5:55 PM

arsenm added inline comments.Mar 29 2017, 5:57 PM

lib/Basic/Targets.cpp
2114 ↗	(On Diff #93422)	Actually this also needs to specify full speed FMA with denorms. Full rate FMA is already a subtarget feature when denorms are disabled

Closed by commit rL300306: [AMDGPU][GFX9] Set +fp32-denormals for >=gfx900 unless -cl-denorms-are-zero is… (authored by kzhuravl). · Explain WhyApr 13 2017, 10:46 PM

This revision was automatically updated to reflect the committed changes.

kzhuravl marked 2 inline comments as done.

kzhuravl added inline comments.Apr 13 2017, 10:49 PM

lib/Basic/Targets.cpp
2114 ↗	(On Diff #93422)	I will do it in a separate patch. Thanks.

Revision Contents

Path

Size

cfe/

trunk/

lib/

Basic/

Targets.cpp

9 lines

test/

CodeGenOpenCL/

gfx9-fp32-denorms.cl

13 lines

Diff 95270

cfe/trunk/lib/Basic/Targets.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 2,106 Lines • ▼ Show 20 Lines	enum GPUKind {
GK_GFX7,		GK_GFX7,
GK_GFX8,		GK_GFX8,
GK_GFX9		GK_GFX9
} GPU;		} GPU;

bool hasFP64:1;		bool hasFP64:1;
bool hasFMAF:1;		bool hasFMAF:1;
bool hasLDEXPF:1;		bool hasLDEXPF:1;
bool hasFullSpeedFP32Denorms:1;
const AddrSpace AS;		const AddrSpace AS;

		static bool hasFullSpeedFMAF32(StringRef GPUName) {
		return parseAMDGCNName(GPUName) >= GK_GFX9;
		}

static bool isAMDGCN(const llvm::Triple &TT) {		static bool isAMDGCN(const llvm::Triple &TT) {
return TT.getArch() == llvm::Triple::amdgcn;		return TT.getArch() == llvm::Triple::amdgcn;
}		}

static bool isGenericZero(const llvm::Triple &TT) {		static bool isGenericZero(const llvm::Triple &TT) {
return TT.getEnvironmentName() == "amdgiz" \|\|		return TT.getEnvironmentName() == "amdgiz" \|\|
TT.getEnvironmentName() == "amdgizcl";		TT.getEnvironmentName() == "amdgizcl";
}		}
public:		public:
AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)		AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: TargetInfo(Triple) ,		: TargetInfo(Triple) ,
GPU(isAMDGCN(Triple) ? GK_GFX6 : GK_R600),		GPU(isAMDGCN(Triple) ? GK_GFX6 : GK_R600),
hasFP64(false),		hasFP64(false),
hasFMAF(false),		hasFMAF(false),
hasLDEXPF(false),		hasLDEXPF(false),
hasFullSpeedFP32Denorms(false),
AS(isGenericZero(Triple)){		AS(isGenericZero(Triple)){
if (getTriple().getArch() == llvm::Triple::amdgcn) {		if (getTriple().getArch() == llvm::Triple::amdgcn) {
hasFP64 = true;		hasFP64 = true;
hasFMAF = true;		hasFMAF = true;
hasLDEXPF = true;		hasLDEXPF = true;
}		}
auto IsGenericZero = isGenericZero(Triple);		auto IsGenericZero = isGenericZero(Triple);
resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn ?		resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn ?
▲ Show 20 Lines • Show All 53 Lines • ▼ Show 20 Lines	void adjustTargetOptions(const CodeGenOptions &CGOpts,
bool hasFP64Denormals = false;		bool hasFP64Denormals = false;
for (auto &I : TargetOpts.FeaturesAsWritten) {		for (auto &I : TargetOpts.FeaturesAsWritten) {
if (I == "+fp32-denormals" \|\| I == "-fp32-denormals")		if (I == "+fp32-denormals" \|\| I == "-fp32-denormals")
hasFP32Denormals = true;		hasFP32Denormals = true;
if (I == "+fp64-fp16-denormals" \|\| I == "-fp64-fp16-denormals")		if (I == "+fp64-fp16-denormals" \|\| I == "-fp64-fp16-denormals")
hasFP64Denormals = true;		hasFP64Denormals = true;
}		}
if (!hasFP32Denormals)		if (!hasFP32Denormals)
TargetOpts.Features.push_back((Twine(hasFullSpeedFP32Denorms &&		TargetOpts.Features.push_back(
		(Twine(hasFullSpeedFMAF32(TargetOpts.CPU) &&
!CGOpts.FlushDenorm ? '+' : '-') + Twine("fp32-denormals")).str());		!CGOpts.FlushDenorm ? '+' : '-') + Twine("fp32-denormals")).str());
// Always do not flush fp64 or fp16 denorms.		// Always do not flush fp64 or fp16 denorms.
if (!hasFP64Denormals && hasFP64)		if (!hasFP64Denormals && hasFP64)
TargetOpts.Features.push_back("+fp64-fp16-denormals");		TargetOpts.Features.push_back("+fp64-fp16-denormals");
}		}

ArrayRef<Builtin::Info> getTargetBuiltins() const override {		ArrayRef<Builtin::Info> getTargetBuiltins() const override {
return llvm::makeArrayRef(BuiltinInfo,		return llvm::makeArrayRef(BuiltinInfo,
▲ Show 20 Lines • Show All 7,350 Lines • Show Last 20 Lines

cfe/trunk/test/CodeGenOpenCL/gfx9-fp32-denorms.cl

				// REQUIRES: amdgpu-registered-target

				// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - %s \| FileCheck --check-prefix=DEFAULT %s
				// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - -target-feature +fp32-denormals %s \| FileCheck --check-prefix=FEATURE_FP32_DENORMALS_ON %s
				// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - -target-feature -fp32-denormals %s \| FileCheck --check-prefix=FEATURE_FP32_DENORMALS_OFF %s
				// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - -cl-denorms-are-zero %s \| FileCheck --check-prefix=OPT_DENORMS_ARE_ZERO %s

				// DEFAULT: +fp32-denormals
				// FEATURE_FP32_DENORMALS_ON: +fp32-denormals
				// FEATURE_FP32_DENORMALS_OFF: -fp32-denormals
				// OPT_DENORMS_ARE_ZERO: -fp32-denormals

				kernel void gfx9_fp32_denorms() {}