Index: lib/Target/AMDGPU/AMDGPU.td =================================================================== --- lib/Target/AMDGPU/AMDGPU.td +++ lib/Target/AMDGPU/AMDGPU.td @@ -59,6 +59,12 @@ "Assuming f32 fma is at least as fast as mul + add", []>; +def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops", + "HalfRate64Ops", + "true", + "Most fp64 instructions are half rate instead of quarter", + []>; + // Some instructions do not support denormals despite this flag. Using // fp32 denormals also causes instructions to run at the double // precision rate for the device. Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -68,6 +68,7 @@ bool FP64Denormals; bool FP32Denormals; bool FastFMAF32; + bool HalfRate64Ops; bool CaymanISA; bool FlatAddressSpace; bool FlatForGlobal; @@ -157,6 +158,10 @@ return FastFMAF32; } + bool hasHalfRate64Ops() const { + return HalfRate64Ops; + } + bool hasFlatAddressSpace() const { return FlatAddressSpace; } Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -70,11 +70,14 @@ DumpCode(false), R600ALUInst(false), HasVertexCache(false), TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false), FP64Denormals(false), FP32Denormals(false), FastFMAF32(false), - CaymanISA(false), FlatAddressSpace(false), FlatForGlobal(false), - EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true), - EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false), + HalfRate64Ops(false), CaymanISA(false), FlatAddressSpace(false), + FlatForGlobal(false), EnableIRStructurizer(true), + EnablePromoteAlloca(false), + EnableIfCvt(true), EnableLoadStoreOpt(false), + EnableUnsafeDSOffsetFolding(false), EnableXNACK(false), - WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), + WavefrontSize(0), CFALUBug(false), + LocalMemorySize(0), EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false), GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0), IsaVersion(ISAVersion0_0_0), EnableHugeScratchBuffer(false), Index: lib/Target/AMDGPU/Processors.td =================================================================== --- lib/Target/AMDGPU/Processors.td +++ lib/Target/AMDGPU/Processors.td @@ -84,11 +84,11 @@ //===----------------------------------------------------------------------===// def : ProcessorModel<"SI", SIFullSpeedModel, - [FeatureSouthernIslands, FeatureFastFMAF32] + [FeatureSouthernIslands, FeatureFastFMAF32, HalfRate64Ops] >; -def : ProcessorModel<"tahiti", SIFullSpeedModel, - [FeatureSouthernIslands, FeatureFastFMAF32] +def : ProcessorModel<"tahiti", SIFullSpeedModel, + [FeatureSouthernIslands, FeatureFastFMAF32, HalfRate64Ops] >; def : ProcessorModel<"pitcairn", SIQuarterSpeedModel, [FeatureSouthernIslands]>; @@ -116,8 +116,8 @@ >; def : ProcessorModel<"hawaii", SIFullSpeedModel, - [FeatureSeaIslands, FeatureFastFMAF32, FeatureLDSBankCount32, - FeatureISAVersion7_0_1] + [FeatureSeaIslands, FeatureFastFMAF32, HalfRate64Ops, + FeatureLDSBankCount32, FeatureISAVersion7_0_1] >; def : ProcessorModel<"mullins", SIQuarterSpeedModel,