Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1571,10 +1571,21 @@ MFI->Mode.IEEE = YamlMFI.Mode.IEEE; MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp; - MFI->Mode.FP32InputDenormals = YamlMFI.Mode.FP32InputDenormals; - MFI->Mode.FP32OutputDenormals = YamlMFI.Mode.FP32OutputDenormals; - MFI->Mode.FP64FP16InputDenormals = YamlMFI.Mode.FP64FP16InputDenormals; - MFI->Mode.FP64FP16OutputDenormals = YamlMFI.Mode.FP64FP16OutputDenormals; + + // FIXME: Move proper support for denormal-fp-math into base MachineFunction + MFI->Mode.FP32Denormals.Input = YamlMFI.Mode.FP32InputDenormals + ? DenormalMode::IEEE + : DenormalMode::PreserveSign; + MFI->Mode.FP32Denormals.Output = YamlMFI.Mode.FP32OutputDenormals + ? DenormalMode::IEEE + : DenormalMode::PreserveSign; + + MFI->Mode.FP64FP16Denormals.Input = YamlMFI.Mode.FP64FP16InputDenormals + ? DenormalMode::IEEE + : DenormalMode::PreserveSign; + MFI->Mode.FP64FP16Denormals.Output = YamlMFI.Mode.FP64FP16OutputDenormals + ? DenormalMode::IEEE + : DenormalMode::PreserveSign; return false; } Index: llvm/lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1449,10 +1449,11 @@ case AMDGPU::V_MUL_F16_t16_e64: case AMDGPU::V_MUL_F16_e64: { // If output denormals are enabled, omod is ignored. - if ((Op == AMDGPU::V_MUL_F32_e64 && MFI->getMode().FP32OutputDenormals) || + if ((Op == AMDGPU::V_MUL_F32_e64 && + MFI->getMode().FP32Denormals.Output != DenormalMode::PreserveSign) || ((Op == AMDGPU::V_MUL_F64_e64 || Op == AMDGPU::V_MUL_F16_e64 || Op == AMDGPU::V_MUL_F16_t16_e64) && - MFI->getMode().FP64FP16OutputDenormals)) + MFI->getMode().FP64FP16Denormals.Output != DenormalMode::PreserveSign)) return std::make_pair(nullptr, SIOutMods::NONE); const MachineOperand *RegOp = nullptr; @@ -1483,10 +1484,11 @@ case AMDGPU::V_ADD_F16_e64: case AMDGPU::V_ADD_F16_t16_e64: { // If output denormals are enabled, omod is ignored. - if ((Op == AMDGPU::V_ADD_F32_e64 && MFI->getMode().FP32OutputDenormals) || + if ((Op == AMDGPU::V_ADD_F32_e64 && + MFI->getMode().FP32Denormals.Output != DenormalMode::PreserveSign) || ((Op == AMDGPU::V_ADD_F64_e64 || Op == AMDGPU::V_ADD_F16_e64 || Op == AMDGPU::V_ADD_F16_t16_e64) && - MFI->getMode().FP64FP16OutputDenormals)) + MFI->getMode().FP64FP16Denormals.Output != DenormalMode::PreserveSign)) return std::make_pair(nullptr, SIOutMods::NONE); // Look through the DAGCombiner canonicalization fmul x, 2 -> fadd x, x Index: llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -217,10 +217,13 @@ SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) { IEEE = Mode.IEEE; DX10Clamp = Mode.DX10Clamp; - FP32InputDenormals = Mode.FP32InputDenormals; - FP32OutputDenormals = Mode.FP32OutputDenormals; - FP64FP16InputDenormals = Mode.FP64FP16InputDenormals; - FP64FP16OutputDenormals = Mode.FP64FP16OutputDenormals; + FP32InputDenormals = Mode.FP32Denormals.Input != DenormalMode::PreserveSign; + FP32OutputDenormals = + Mode.FP32Denormals.Output != DenormalMode::PreserveSign; + FP64FP16InputDenormals = + Mode.FP64FP16Denormals.Input != DenormalMode::PreserveSign; + FP64FP16OutputDenormals = + Mode.FP64FP16Denormals.Output != DenormalMode::PreserveSign; } bool operator ==(const SIMode Other) const { Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -10,6 +10,7 @@ #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H #include "SIDefines.h" +#include "llvm/ADT/FloatingPointMode.h" #include "llvm/IR/CallingConv.h" #include "llvm/Support/Alignment.h" #include @@ -1295,21 +1296,17 @@ /// If this is set, neither input or output denormals are flushed for most f32 /// instructions. - bool FP32InputDenormals : 1; - bool FP32OutputDenormals : 1; + DenormalMode FP32Denormals; /// If this is set, neither input or output denormals are flushed for both f64 /// and f16/v2f16 instructions. - bool FP64FP16InputDenormals : 1; - bool FP64FP16OutputDenormals : 1; + DenormalMode FP64FP16Denormals; SIModeRegisterDefaults() : IEEE(true), DX10Clamp(true), - FP32InputDenormals(true), - FP32OutputDenormals(true), - FP64FP16InputDenormals(true), - FP64FP16OutputDenormals(true) {} + FP32Denormals(DenormalMode::getIEEE()), + FP64FP16Denormals(DenormalMode::getIEEE()) {} SIModeRegisterDefaults(const Function &F); @@ -1321,42 +1318,40 @@ bool operator ==(const SIModeRegisterDefaults Other) const { return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp && - FP32InputDenormals == Other.FP32InputDenormals && - FP32OutputDenormals == Other.FP32OutputDenormals && - FP64FP16InputDenormals == Other.FP64FP16InputDenormals && - FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals; + FP32Denormals == Other.FP32Denormals && + FP64FP16Denormals == Other.FP64FP16Denormals; } bool allFP32Denormals() const { - return FP32InputDenormals && FP32OutputDenormals; + return FP32Denormals == DenormalMode::getIEEE(); } bool allFP64FP16Denormals() const { - return FP64FP16InputDenormals && FP64FP16OutputDenormals; + return FP64FP16Denormals == DenormalMode::getIEEE(); } /// Get the encoding value for the FP_DENORM bits of the mode register for the /// FP32 denormal mode. uint32_t fpDenormModeSPValue() const { - if (FP32InputDenormals && FP32OutputDenormals) - return FP_DENORM_FLUSH_NONE; - if (FP32InputDenormals) + if (FP32Denormals == DenormalMode::getPreserveSign()) + return FP_DENORM_FLUSH_IN_FLUSH_OUT; + if (FP32Denormals.Output == DenormalMode::PreserveSign) return FP_DENORM_FLUSH_OUT; - if (FP32OutputDenormals) + if (FP32Denormals.Input == DenormalMode::PreserveSign) return FP_DENORM_FLUSH_IN; - return FP_DENORM_FLUSH_IN_FLUSH_OUT; + return FP_DENORM_FLUSH_NONE; } /// Get the encoding value for the FP_DENORM bits of the mode register for the /// FP64/FP16 denormal mode. uint32_t fpDenormModeDPValue() const { - if (FP64FP16InputDenormals && FP64FP16OutputDenormals) - return FP_DENORM_FLUSH_NONE; - if (FP64FP16InputDenormals) + if (FP64FP16Denormals == DenormalMode::getPreserveSign()) + return FP_DENORM_FLUSH_IN_FLUSH_OUT; + if (FP64FP16Denormals.Output == DenormalMode::PreserveSign) return FP_DENORM_FLUSH_OUT; - if (FP64FP16OutputDenormals) + if (FP64FP16Denormals.Input == DenormalMode::PreserveSign) return FP_DENORM_FLUSH_IN; - return FP_DENORM_FLUSH_IN_FLUSH_OUT; + return FP_DENORM_FLUSH_NONE; } /// Returns true if a flag is compatible if it's enabled in the callee, but @@ -1374,10 +1369,20 @@ return false; // Allow inlining denormals enabled into denormals flushed functions. - return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) && - oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) && - oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) && - oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals); + return oneWayCompatible(FP64FP16Denormals.Input != + DenormalMode::PreserveSign, + CalleeMode.FP64FP16Denormals.Input != + DenormalMode::PreserveSign) && + oneWayCompatible(FP64FP16Denormals.Output != + DenormalMode::PreserveSign, + CalleeMode.FP64FP16Denormals.Output != + DenormalMode::PreserveSign) && + oneWayCompatible(FP32Denormals.Input != DenormalMode::PreserveSign, + CalleeMode.FP32Denormals.Input != + DenormalMode::PreserveSign) && + oneWayCompatible(FP32Denormals.Output != DenormalMode::PreserveSign, + CalleeMode.FP32Denormals.Output != + DenormalMode::PreserveSign); } }; Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -2560,23 +2560,15 @@ DX10Clamp = DX10ClampAttr == "true"; StringRef DenormF32Attr = F.getFnAttribute("denormal-fp-math-f32").getValueAsString(); - if (!DenormF32Attr.empty()) { - DenormalMode DenormMode = parseDenormalFPAttribute(DenormF32Attr); - FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE; - FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE; - } + if (!DenormF32Attr.empty()) + FP32Denormals = parseDenormalFPAttribute(DenormF32Attr); StringRef DenormAttr = F.getFnAttribute("denormal-fp-math").getValueAsString(); if (!DenormAttr.empty()) { DenormalMode DenormMode = parseDenormalFPAttribute(DenormAttr); - - if (DenormF32Attr.empty()) { - FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE; - FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE; - } - - FP64FP16InputDenormals = DenormMode.Input == DenormalMode::IEEE; - FP64FP16OutputDenormals = DenormMode.Output == DenormalMode::IEEE; + if (DenormF32Attr.empty()) + FP32Denormals = DenormMode; + FP64FP16Denormals = DenormMode; } }