Index: llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -49,9 +49,9 @@ using namespace llvm::AMDGPU; using namespace llvm::AMDGPU::HSAMD; -// TODO: This should get the default rounding mode from the kernel. We just set -// the default here, but this could change if the OpenCL rounding mode pragmas -// are used. +// This should get the default rounding mode from the kernel. We just set the +// default here, but this could change if the OpenCL rounding mode pragmas are +// used. // // The denormal mode here should match what is reported by the OpenCL runtime // for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but @@ -70,18 +70,10 @@ // instructions to run at the double precision rate for the device so it's // probably best to just report no single precision denormals. static uint32_t getFPMode(AMDGPU::SIModeRegisterDefaults Mode) { - - // TODO: Is there any real use for the flush in only / flush out only modes? - uint32_t FP32Denormals = - Mode.FP32Denormals ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT; - - uint32_t FP64Denormals = - Mode.FP64FP16Denormals ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT; - return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) | FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) | - FP_DENORM_MODE_SP(FP32Denormals) | - FP_DENORM_MODE_DP(FP64Denormals); + FP_DENORM_MODE_SP(Mode.fpDenormModeSPValue()) | + FP_DENORM_MODE_DP(Mode.fpDenormModeDPValue()); } static AsmPrinter * Index: llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -2109,7 +2109,7 @@ bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods); bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods); - assert((IsFMA || !Mode.FP32Denormals) && + assert((IsFMA || !Mode.allFP32Denormals()) && "fmad selected with denormals enabled"); // TODO: We can select this with f32 denormals enabled if all the sources are // converted from f16 (in which case fmad isn't legal). Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1585,7 +1585,7 @@ const AMDGPUMachineFunction *MFI = MF.getInfo(); // float fr = mad(fqneg, fb, fa); - unsigned OpCode = MFI->getMode().FP32Denormals ? + unsigned OpCode = MFI->getMode().allFP32Denormals() ? (unsigned)AMDGPUISD::FMAD_FTZ : (unsigned)ISD::FMAD; SDValue fr = DAG.getNode(OpCode, DL, FltVT, fqneg, fb, fa); @@ -1670,7 +1670,7 @@ const SIMachineFunctionInfo *MFI = MF.getInfo(); // Compute denominator reciprocal. - unsigned FMAD = MFI->getMode().FP32Denormals ? + unsigned FMAD = MFI->getMode().allFP32Denormals() ? (unsigned)AMDGPUISD::FMAD_FTZ : (unsigned)ISD::FMAD; Index: llvm/lib/Target/AMDGPU/AMDGPUInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -101,12 +101,12 @@ PredicateControl; let RecomputePerFunction = 1 in { -def FP16Denormals : Predicate<"MF->getInfo()->getMode().FP64FP16Denormals">; -def FP32Denormals : Predicate<"MF->getInfo()->getMode().FP32Denormals">; -def FP64Denormals : Predicate<"MF->getInfo()->getMode().FP64FP16Denormals">; -def NoFP16Denormals : Predicate<"!MF->getInfo()->getMode().FP64FP16Denormals">; -def NoFP32Denormals : Predicate<"!MF->getInfo()->getMode().FP32Denormals">; -def NoFP64Denormals : Predicate<"!MF->getInfo()->getMode().FP64FP16Denormals">; +def FP16Denormals : Predicate<"MF->getInfo()->getMode().allFP64FP16Denormals()">; +def FP32Denormals : Predicate<"MF->getInfo()->getMode().allFP32Denormals()">; +def FP64Denormals : Predicate<"MF->getInfo()->getMode().allFP64FP16Denormals()">; +def NoFP16Denormals : Predicate<"!MF->getInfo()->getMode().allFP64FP16Denormals()">; +def NoFP32Denormals : Predicate<"!MF->getInfo()->getMode().allFP32Denormals()">; +def NoFP64Denormals : Predicate<"!MF->getInfo()->getMode().allFP64FP16Denormals()">; def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">; } Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1724,12 +1724,12 @@ const SIMachineFunctionInfo *MFI = MF.getInfo(); // TODO: Always legal with future ftz flag. - if (Ty == LLT::scalar(32) && !MFI->getMode().FP32Denormals) + // FIXME: Do we need just output? + if (Ty == LLT::scalar(32) && !MFI->getMode().allFP32Denormals()) return true; - if (Ty == LLT::scalar(16) && !MFI->getMode().FP64FP16Denormals) + if (Ty == LLT::scalar(16) && !MFI->getMode().allFP64FP16Denormals()) return true; - MachineIRBuilder HelperBuilder(MI); GISelObserverWrapper DummyObserver; LegalizerHelper Helper(MF, DummyObserver, HelperBuilder); @@ -1903,7 +1903,7 @@ return false; if (!Unsafe && ResTy == S32 && - MF.getInfo()->getMode().FP32Denormals) + MF.getInfo()->getMode().allFP32Denormals()) return false; if (auto CLHS = getConstantFPVRegVal(LHS, MRI)) { @@ -1984,15 +1984,13 @@ AMDGPU::SIModeRegisterDefaults Mode) { // Set SP denorm mode to this value. unsigned SPDenormMode = - Enable ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT; + Enable ? FP_DENORM_FLUSH_NONE : Mode.fpDenormModeSPValue(); if (ST.hasDenormModeInst()) { // Preserve default FP64FP16 denorm mode while updating FP32 mode. - unsigned DPDenormModeDefault = Mode.FP64FP16Denormals - ? FP_DENORM_FLUSH_NONE - : FP_DENORM_FLUSH_IN_FLUSH_OUT; + uint32_t DPDenormModeDefault = Mode.fpDenormModeDPValue(); - unsigned NewDenormModeValue = SPDenormMode | (DPDenormModeDefault << 2); + uint32_t NewDenormModeValue = SPDenormMode | (DPDenormModeDefault << 2); B.buildInstr(AMDGPU::S_DENORM_MODE) .addImm(NewDenormModeValue); @@ -2045,7 +2043,7 @@ // FIXME: Doesn't correctly model the FP mode switch, and the FP operations // aren't modeled as reading it. - if (!Mode.FP32Denormals) + if (!Mode.allFP32Denormals()) toggleSPDenormMode(true, B, ST, Mode); auto Fma0 = B.buildFMA(S32, NegDivScale0, ApproxRcp, One, Flags); @@ -2055,7 +2053,7 @@ auto Fma3 = B.buildFMA(S32, Fma2, Fma1, Mul, Flags); auto Fma4 = B.buildFMA(S32, NegDivScale0, Fma3, NumeratorScaled, Flags); - if (!Mode.FP32Denormals) + if (!Mode.allFP32Denormals()) toggleSPDenormMode(false, B, ST, Mode); auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32}, false) Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1152,8 +1152,10 @@ MFI->Mode.IEEE = YamlMFI.Mode.IEEE; MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp; - MFI->Mode.FP32Denormals = YamlMFI.Mode.FP32Denormals; - MFI->Mode.FP64FP16Denormals = YamlMFI.Mode.FP64FP16Denormals; + MFI->Mode.FP32InputDenormals = YamlMFI.Mode.FP32InputDenormals; + MFI->Mode.FP32OutputDenormals = YamlMFI.Mode.FP32OutputDenormals; + MFI->Mode.FP64FP16InputDenormals = YamlMFI.Mode.FP64FP16InputDenormals; + MFI->Mode.FP64FP16OutputDenormals = YamlMFI.Mode.FP64FP16OutputDenormals; return false; } Index: llvm/lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1359,8 +1359,8 @@ case AMDGPU::V_MUL_F32_e64: case AMDGPU::V_MUL_F16_e64: { // If output denormals are enabled, omod is ignored. - if ((Op == AMDGPU::V_MUL_F32_e64 && MFI->getMode().FP32Denormals) || - (Op == AMDGPU::V_MUL_F16_e64 && MFI->getMode().FP64FP16Denormals)) + if ((Op == AMDGPU::V_MUL_F32_e64 && MFI->getMode().FP32OutputDenormals) || + (Op == AMDGPU::V_MUL_F16_e64 && MFI->getMode().FP64FP16OutputDenormals)) return std::make_pair(nullptr, SIOutMods::NONE); const MachineOperand *RegOp = nullptr; @@ -1389,8 +1389,8 @@ case AMDGPU::V_ADD_F32_e64: case AMDGPU::V_ADD_F16_e64: { // If output denormals are enabled, omod is ignored. - if ((Op == AMDGPU::V_ADD_F32_e64 && MFI->getMode().FP32Denormals) || - (Op == AMDGPU::V_ADD_F16_e64 && MFI->getMode().FP64FP16Denormals)) + if ((Op == AMDGPU::V_ADD_F32_e64 && MFI->getMode().FP32OutputDenormals) || + (Op == AMDGPU::V_ADD_F16_e64 && MFI->getMode().FP64FP16OutputDenormals)) return std::make_pair(nullptr, SIOutMods::NONE); // Look through the DAGCombiner canonicalization fmul x, 2 -> fadd x, x Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -102,12 +102,12 @@ static bool hasFP32Denormals(const MachineFunction &MF) { const SIMachineFunctionInfo *Info = MF.getInfo(); - return Info->getMode().FP32Denormals; + return Info->getMode().allFP32Denormals(); } static bool hasFP64FP16Denormals(const MachineFunction &MF) { const SIMachineFunctionInfo *Info = MF.getInfo(); - return Info->getMode().FP64FP16Denormals; + return Info->getMode().allFP64FP16Denormals(); } static unsigned findFirstFreeSGPR(CCState &CCInfo) { @@ -785,6 +785,7 @@ (Opcode == ISD::FMA && Subtarget->hasFmaMixInsts())) && DestVT.getScalarType() == MVT::f32 && SrcVT.getScalarType() == MVT::f16 && + // TODO: This probably only requires no input flushing? !hasFP32Denormals(DAG.getMachineFunction()); } Index: llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -236,23 +236,29 @@ struct SIMode { bool IEEE = true; bool DX10Clamp = true; - bool FP32Denormals = true; - bool FP64FP16Denormals = true; + bool FP32InputDenormals = true; + bool FP32OutputDenormals = true; + bool FP64FP16InputDenormals = true; + bool FP64FP16OutputDenormals = true; SIMode() = default; SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) { IEEE = Mode.IEEE; DX10Clamp = Mode.DX10Clamp; - FP32Denormals = Mode.FP32Denormals; - FP64FP16Denormals = Mode.FP64FP16Denormals; + FP32InputDenormals = Mode.FP32InputDenormals; + FP32OutputDenormals = Mode.FP32OutputDenormals; + FP64FP16InputDenormals = Mode.FP64FP16InputDenormals; + FP64FP16OutputDenormals = Mode.FP64FP16OutputDenormals; } bool operator ==(const SIMode Other) const { return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp && - FP32Denormals == Other.FP32Denormals && - FP64FP16Denormals == Other.FP64FP16Denormals; + FP32InputDenormals == Other.FP32InputDenormals && + FP32OutputDenormals == Other.FP32OutputDenormals && + FP64FP16InputDenormals == Other.FP64FP16InputDenormals && + FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals; } }; @@ -260,8 +266,10 @@ static void mapping(IO &YamlIO, SIMode &Mode) { YamlIO.mapOptional("ieee", Mode.IEEE, true); YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true); - YamlIO.mapOptional("fp32-denormals", Mode.FP32Denormals, true); - YamlIO.mapOptional("fp64-fp16-denormals", Mode.FP64FP16Denormals, true); + YamlIO.mapOptional("fp32-input-denormals", Mode.FP32InputDenormals, true); + YamlIO.mapOptional("fp32-output-denormals", Mode.FP32OutputDenormals, true); + YamlIO.mapOptional("fp64-fp16-input-denormals", Mode.FP64FP16InputDenormals, true); + YamlIO.mapOptional("fp64-fp16-output-denormals", Mode.FP64FP16OutputDenormals, true); } }; Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -677,20 +677,21 @@ /// If this is set, neither input or output denormals are flushed for most f32 /// instructions. - /// - /// TODO: Split into separate input and output fields if necessary like the - /// control bits really provide? - bool FP32Denormals : 1; + bool FP32InputDenormals : 1; + bool FP32OutputDenormals : 1; /// If this is set, neither input or output denormals are flushed for both f64 /// and f16/v2f16 instructions. - bool FP64FP16Denormals : 1; + bool FP64FP16InputDenormals : 1; + bool FP64FP16OutputDenormals : 1; SIModeRegisterDefaults() : IEEE(true), DX10Clamp(true), - FP32Denormals(true), - FP64FP16Denormals(true) {} + FP32InputDenormals(true), + FP32OutputDenormals(true), + FP64FP16InputDenormals(true), + FP64FP16OutputDenormals(true) {} // FIXME: Should not depend on the subtarget SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST); @@ -701,15 +702,51 @@ SIModeRegisterDefaults Mode; Mode.DX10Clamp = true; Mode.IEEE = IsCompute; - Mode.FP32Denormals = false; // FIXME: Should be on by default. - Mode.FP64FP16Denormals = true; + Mode.FP32InputDenormals = false; // FIXME: Should be on by default. + Mode.FP32OutputDenormals = false; // FIXME: Should be on by default. + Mode.FP64FP16InputDenormals = true; + Mode.FP64FP16OutputDenormals = true; return Mode; } bool operator ==(const SIModeRegisterDefaults Other) const { return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp && - FP32Denormals == Other.FP32Denormals && - FP64FP16Denormals == Other.FP64FP16Denormals; + FP32InputDenormals == Other.FP32InputDenormals && + FP32OutputDenormals == Other.FP32OutputDenormals && + FP64FP16InputDenormals == Other.FP64FP16InputDenormals && + FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals; + } + + bool allFP32Denormals() const { + return FP32InputDenormals && FP32OutputDenormals; + } + + bool allFP64FP16Denormals() const { + return FP64FP16InputDenormals && FP64FP16OutputDenormals; + } + + /// Get the encoding value for the FP_DENORM bits of the mode register for the + /// FP32 denormal mode. + uint32_t fpDenormModeSPValue() const { + if (FP32InputDenormals && FP32OutputDenormals) + return FP_DENORM_FLUSH_NONE; + if (FP32InputDenormals) + return FP_DENORM_FLUSH_OUT; + if (FP32OutputDenormals) + return FP_DENORM_FLUSH_IN; + return FP_DENORM_FLUSH_IN_FLUSH_OUT; + } + + /// Get the encoding value for the FP_DENORM bits of the mode register for the + /// FP64/FP16 denormal mode. + uint32_t fpDenormModeDPValue() const { + if (FP64FP16InputDenormals && FP64FP16OutputDenormals) + return FP_DENORM_FLUSH_NONE; + if (FP64FP16InputDenormals) + return FP_DENORM_FLUSH_OUT; + if (FP64FP16OutputDenormals) + return FP_DENORM_FLUSH_IN; + return FP_DENORM_FLUSH_IN_FLUSH_OUT; } /// Returns true if a flag is compatible if it's enabled in the callee, but @@ -727,8 +764,10 @@ return false; // Allow inlining denormals enabled into denormals flushed functions. - return oneWayCompatible(FP64FP16Denormals, CalleeMode.FP64FP16Denormals) && - oneWayCompatible(FP32Denormals, CalleeMode.FP32Denormals); + return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) && + oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) && + oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) && + oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals); } }; Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1316,8 +1316,11 @@ if (!DX10ClampAttr.empty()) DX10Clamp = DX10ClampAttr == "true"; - FP32Denormals = ST.hasFP32Denormals(F); - FP64FP16Denormals = ST.hasFP64FP16Denormals(F); + // FIXME: Split this when denormal-fp-math is used + FP32InputDenormals = ST.hasFP32Denormals(F); + FP32OutputDenormals = FP32InputDenormals; + FP64FP16InputDenormals = ST.hasFP64FP16Denormals(F); + FP64FP16OutputDenormals = FP64FP16InputDenormals; } namespace { Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir @@ -8,7 +8,8 @@ regBankSelected: true machineFunctionInfo: mode: - fp64-fp16-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true body: | bb.0: @@ -30,7 +31,8 @@ regBankSelected: true machineFunctionInfo: mode: - fp64-fp16-denormals: false + fp64-fp16-input-denormals: false + fp64-fp16-output-denormals: false body: | bb.0: @@ -52,7 +54,8 @@ regBankSelected: true machineFunctionInfo: mode: - fp32-denormals: true + fp32-input-denormals: true + fp32-output-denormals: true body: | bb.0: @@ -74,7 +77,8 @@ regBankSelected: true machineFunctionInfo: mode: - fp32-denormals: false + fp32-input-denormals: false + fp32-output-denormals: false body: | bb.0: @@ -96,7 +100,8 @@ regBankSelected: true machineFunctionInfo: mode: - fp64-fp16-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true body: | bb.0: @@ -118,7 +123,8 @@ regBankSelected: true machineFunctionInfo: mode: - fp64-fp16-denormals: false + fp64-fp16-input-denormals: false + fp64-fp16-output-denormals: false body: | bb.0: @@ -140,7 +146,8 @@ regBankSelected: true machineFunctionInfo: mode: - fp64-fp16-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true body: | bb.0: @@ -162,7 +169,8 @@ regBankSelected: true machineFunctionInfo: mode: - fp64-fp16-denormals: false + fp64-fp16-input-denormals: false + fp64-fp16-output-denormals: false body: | bb.0: @@ -184,7 +192,8 @@ regBankSelected: true machineFunctionInfo: mode: - fp32-denormals: true + fp32-input-denormals: true + fp32-output-denormals: true body: | bb.0: @@ -207,8 +216,8 @@ regBankSelected: true machineFunctionInfo: mode: - fp32-denormals: false - + fp32-input-denormals: false + fp32-output-denormals: false body: | bb.0: @@ -230,7 +239,8 @@ regBankSelected: true machineFunctionInfo: mode: - fp32-denormals: true + fp32-input-denormals: true + fp32-output-denormals: true body: | bb.0: @@ -252,7 +262,8 @@ regBankSelected: true machineFunctionInfo: mode: - fp32-denormals: false + fp32-input-denormals: false + fp32-output-denormals: false body: | bb.0: @@ -274,7 +285,8 @@ regBankSelected: true machineFunctionInfo: mode: - fp32-denormals: true + fp32-input-denormals: true + fp32-output-denormals: true body: | bb.0: @@ -299,7 +311,8 @@ regBankSelected: true machineFunctionInfo: mode: - fp32-denormals: false + fp32-input-denormals: false + fp32-output-denormals: false body: | bb.0: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir @@ -9,8 +9,10 @@ name: test_fdiv_s16 machineFunctionInfo: mode: - fp32-denormals: true - fp64-fp16-denormals: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true body: | bb.0: @@ -100,8 +102,10 @@ name: test_fdiv_s32_denorms_on machineFunctionInfo: mode: - fp32-denormals: true - fp64-fp16-denormals: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true body: | bb.0: @@ -191,8 +195,10 @@ name: test_fdiv_s32_denorms_off machineFunctionInfo: mode: - fp32-denormals: false - fp64-fp16-denormals: true + fp32-input-denormals: false + fp32-output-denormals: false + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true body: | bb.0: @@ -290,8 +296,10 @@ name: test_fdiv_s32_denorms_off_arcp machineFunctionInfo: mode: - fp32-denormals: false - fp64-fp16-denormals: true + fp32-input-denormals: false + fp32-output-denormals: false + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true body: | bb.0: @@ -337,8 +345,10 @@ name: test_fdiv_s64 machineFunctionInfo: mode: - fp32-denormals: false - fp64-fp16-denormals: true + fp32-input-denormals: false + fp32-output-denormals: false + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true body: | bb.0: @@ -467,8 +477,10 @@ name: test_fdiv_v2s32 machineFunctionInfo: mode: - fp32-denormals: false - fp64-fp16-denormals: true + fp32-input-denormals: false + fp32-output-denormals: false + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true body: | bb.0: @@ -2223,8 +2235,10 @@ name: test_fdiv_s64_constant_one_rcp machineFunctionInfo: mode: - fp32-denormals: false - fp64-fp16-denormals: true + fp32-input-denormals: false + fp32-output-denormals: false + fp64-fp16-output-denormals: true + fp64-fp16-input-denormals: true body: | bb.0: @@ -2347,8 +2361,10 @@ name: test_fdiv_s64_constant_negative_one_rcp machineFunctionInfo: mode: - fp32-denormals: false - fp64-fp16-denormals: true + fp32-input-denormals: false + fp32-output-denormals: false + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true body: | bb.0: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s16.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s16.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s16.mir @@ -7,7 +7,8 @@ name: test_fmad_s16_flush machineFunctionInfo: mode: - fp64-fp16-denormals: false + fp64-fp16-input-denormals: false + fp64-fp16-output-denormals: false body: | bb.0: @@ -66,7 +67,8 @@ name: test_fmad_v2s16_flush machineFunctionInfo: mode: - fp64-fp16-denormals: false + fp64-fp16-input-denormals: false + fp64-fp16-output-denormals: false body: | bb.0: @@ -162,7 +164,8 @@ name: test_fmad_v4s16_flush machineFunctionInfo: mode: - fp64-fp16-denormals: false + fp64-fp16-input-denormals: false + fp64-fp16-output-denormals: false body: | bb.0: @@ -332,7 +335,8 @@ name: test_fmad_s16_denorm machineFunctionInfo: mode: - fp64-fp16-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true body: | bb.0: @@ -392,7 +396,8 @@ name: test_fmad_s16_denorm_flags machineFunctionInfo: mode: - fp64-fp16-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true body: | bb.0: @@ -452,7 +457,8 @@ name: test_fmad_v2s16_denorm machineFunctionInfo: mode: - fp64-fp16-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true body: | bb.0: @@ -550,7 +556,8 @@ name: test_fmad_v2s16_denorm_flags machineFunctionInfo: mode: - fp64-fp16-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true body: | bb.0: @@ -648,7 +655,8 @@ name: test_fmad_v4s16_denorm machineFunctionInfo: mode: - fp64-fp16-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true body: | bb.0: @@ -822,7 +830,8 @@ name: test_fmad_v4s16_denorm_flags machineFunctionInfo: mode: - fp64-fp16-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true body: | bb.0: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s32.mir @@ -7,7 +7,8 @@ name: test_fmad_s32_flush machineFunctionInfo: mode: - fp32-denormals: false + fp32-input-denormals: false + fp32-output-denormals: false body: | bb.0: @@ -42,7 +43,8 @@ name: test_fmad_s32_flags_flush machineFunctionInfo: mode: - fp32-denormals: false + fp32-input-denormals: false + fp32-output-denormals: false body: | bb.0: @@ -77,7 +79,8 @@ name: test_fmad_v2s32_flush machineFunctionInfo: mode: - fp32-denormals: false + fp32-input-denormals: false + fp32-output-denormals: false body: | bb.0: @@ -127,7 +130,8 @@ name: test_fmad_v3s32_flush machineFunctionInfo: mode: - fp32-denormals: false + fp32-input-denormals: false + fp32-output-denormals: false body: | bb.0: @@ -180,7 +184,8 @@ name: test_fmad_v4s32_flush machineFunctionInfo: mode: - fp32-denormals: false + fp32-input-denormals: false + fp32-output-denormals: false body: | bb.0: @@ -236,7 +241,8 @@ name: test_fmad_s32_denorm machineFunctionInfo: mode: - fp32-denormals: true + fp32-input-denormals: true + fp32-output-denormals: true body: | bb.0: @@ -274,7 +280,8 @@ name: test_fmad_s32_flags_denorm machineFunctionInfo: mode: - fp32-denormals: true + fp32-input-denormals: true + fp32-output-denormals: true body: | bb.0: @@ -312,7 +319,8 @@ name: test_fmad_v2s32_denorm machineFunctionInfo: mode: - fp32-denormals: true + fp32-input-denormals: true + fp32-output-denormals: true body: | bb.0: @@ -368,7 +376,8 @@ name: test_fmad_v3s32_denorm machineFunctionInfo: mode: - fp32-denormals: true + fp32-input-denormals: true + fp32-output-denormals: true body: | bb.0: @@ -430,7 +439,8 @@ name: test_fmad_v4s32_denorm machineFunctionInfo: mode: - fp32-denormals: true + fp32-input-denormals: true + fp32-output-denormals: true body: | bb.0: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s64.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s64.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmad.s64.mir @@ -7,7 +7,8 @@ name: test_fmad_s64_flush machineFunctionInfo: mode: - fp64-fp16-denormals: false + fp64-fp16-output-denormals: false + fp64-fp16-input-denormals: false body: | bb.0: @@ -31,7 +32,8 @@ name: test_fmad_v2s64_flush machineFunctionInfo: mode: - fp64-fp16-denormals: false + fp64-fp16-input-denormals: false + fp64-fp16-output-denormals: false body: | bb.0: @@ -61,7 +63,8 @@ name: test_fmad_s64_denorm machineFunctionInfo: mode: - fp64-fp16-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true body: | bb.0: @@ -85,7 +88,8 @@ name: test_fmad_v2s64_denorm machineFunctionInfo: mode: - fp64-fp16-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true body: | bb.0: Index: llvm/test/CodeGen/AMDGPU/omod-nsz-flag.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/omod-nsz-flag.mir +++ llvm/test/CodeGen/AMDGPU/omod-nsz-flag.mir @@ -12,7 +12,7 @@ machineFunctionInfo: mode: ieee: false - fp32-denormals: false + fp32-output-denormals: false body: | bb.0: @@ -34,7 +34,7 @@ machineFunctionInfo: mode: ieee: false - fp32-denormals: false + fp32-output-denormals: false body: | bb.0: @@ -56,7 +56,7 @@ machineFunctionInfo: mode: ieee: false - fp32-denormals: false + fp32-output-denormals: false body: | bb.0: Index: llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir =================================================================== --- llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir +++ llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir @@ -25,8 +25,10 @@ # FULL-NEXT: mode: # FULL-NEXT: ieee: true # FULL-NEXT: dx10-clamp: true -# FULL-NEXT: fp32-denormals: true -# FULL-NEXT: fp64-fp16-denormals: true +# FULL-NEXT: fp32-input-denormals: true +# FULL-NEXT: fp32-output-denormals: true +# FULL-NEXT: fp64-fp16-input-denormals: true +# FULL-NEXT: fp64-fp16-output-denormals: true # FULL-NEXT: highBitsOf32BitAddress: 0 # FULL-NEXT: body: @@ -94,8 +96,10 @@ # FULL-NEXT: mode: # FULL-NEXT: ieee: true # FULL-NEXT: dx10-clamp: true -# FULL-NEXT: fp32-denormals: true -# FULL-NEXT: fp64-fp16-denormals: true +# FULL-NEXT: fp32-input-denormals: true +# FULL-NEXT: fp32-output-denormals: true +# FULL-NEXT: fp64-fp16-input-denormals: true +# FULL-NEXT: fp64-fp16-output-denormals: true # FULL-NEXT: highBitsOf32BitAddress: 0 # FULL-NEXT: body: @@ -133,8 +137,10 @@ # FULL-NEXT: mode: # FULL-NEXT: ieee: true # FULL-NEXT: dx10-clamp: true -# FULL-NEXT: fp32-denormals: true -# FULL-NEXT: fp64-fp16-denormals: true +# FULL-NEXT: fp32-input-denormals: true +# FULL-NEXT: fp32-output-denormals: true +# FULL-NEXT: fp64-fp16-input-denormals: true +# FULL-NEXT: fp64-fp16-output-denormals: true # FULL-NEXT: highBitsOf32BitAddress: 0 # FULL-NEXT: body: @@ -173,8 +179,10 @@ # FULL-NEXT: mode: # FULL-NEXT: ieee: true # FULL-NEXT: dx10-clamp: true -# FULL-NEXT: fp32-denormals: true -# FULL-NEXT: fp64-fp16-denormals: true +# FULL-NEXT: fp32-input-denormals: true +# FULL-NEXT: fp32-output-denormals: true +# FULL-NEXT: fp64-fp16-input-denormals: true +# FULL-NEXT: fp64-fp16-output-denormals: true # FULL-NEXT: highBitsOf32BitAddress: 0 # FULL-NEXT: body: @@ -247,16 +255,20 @@ # ALL: mode: # ALL-NEXT: ieee: false # ALL-NEXT: dx10-clamp: false -# ALL-NEXT: fp32-denormals: false -# ALL-NEXT: fp64-fp16-denormals: false +# ALL-NEXT: fp32-input-denormals: false +# ALL-NEXT: fp32-output-denormals: false +# ALL-NEXT: fp64-fp16-input-denormals: false +# ALL-NEXT: fp64-fp16-output-denormals: false name: parse_mode machineFunctionInfo: mode: ieee: false dx10-clamp: false - fp32-denormals: false - fp64-fp16-denormals: false + fp32-input-denormals: false + fp32-output-denormals: false + fp64-fp16-input-denormals: false + fp64-fp16-output-denormals: false body: | bb.0: Index: llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll =================================================================== --- llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll +++ llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll @@ -28,8 +28,10 @@ ; CHECK-NEXT: mode: ; CHECK-NEXT: ieee: true ; CHECK-NEXT: dx10-clamp: true -; CHECK-NEXT: fp32-denormals: false -; CHECK-NEXT: fp64-fp16-denormals: true +; CHECK-NEXT: fp32-input-denormals: false +; CHECK-NEXT: fp32-output-denormals: false +; CHECK-NEXT: fp64-fp16-input-denormals: true +; CHECK-NEXT: fp64-fp16-output-denormals: true ; CHECK-NEXT: highBitsOf32BitAddress: 0 ; CHECK-NEXT: body: define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) { @@ -57,8 +59,10 @@ ; CHECK-NEXT: mode: ; CHECK-NEXT: ieee: false ; CHECK-NEXT: dx10-clamp: true -; CHECK-NEXT: fp32-denormals: false -; CHECK-NEXT: fp64-fp16-denormals: true +; CHECK-NEXT: fp32-input-denormals: false +; CHECK-NEXT: fp32-output-denormals: false +; CHECK-NEXT: fp64-fp16-input-denormals: true +; CHECK-NEXT: fp64-fp16-output-denormals: true ; CHECK-NEXT: highBitsOf32BitAddress: 0 ; CHECK-NEXT: body: define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) { @@ -84,8 +88,10 @@ ; CHECK-NEXT: mode: ; CHECK-NEXT: ieee: true ; CHECK-NEXT: dx10-clamp: true -; CHECK-NEXT: fp32-denormals: false -; CHECK-NEXT: fp64-fp16-denormals: true +; CHECK-NEXT: fp32-input-denormals: false +; CHECK-NEXT: fp32-output-denormals: false +; CHECK-NEXT: fp64-fp16-input-denormals: true +; CHECK-NEXT: fp64-fp16-output-denormals: true ; CHECK-NEXT: highBitsOf32BitAddress: 0 ; CHECK-NEXT: body: define void @function() { @@ -111,8 +117,10 @@ ; CHECK-NEXT: mode: ; CHECK-NEXT: ieee: true ; CHECK-NEXT: dx10-clamp: true -; CHECK-NEXT: fp32-denormals: false -; CHECK-NEXT: fp64-fp16-denormals: true +; CHECK-NEXT: fp32-input-denormals: false +; CHECK-NEXT: fp32-output-denormals: false +; CHECK-NEXT: fp64-fp16-input-denormals: true +; CHECK-NEXT: fp64-fp16-output-denormals: true ; CHECK-NEXT: highBitsOf32BitAddress: 0 ; CHECK-NEXT: body: define void @function_nsz() #0 { @@ -123,8 +131,10 @@ ; CHECK: mode: ; CHECK-NEXT: ieee: true ; CHECK-NEXT: dx10-clamp: false -; CHECK-NEXT: fp32-denormals: false -; CHECK-NEXT: fp64-fp16-denormals: true +; CHECK-NEXT: fp32-input-denormals: false +; CHECK-NEXT: fp32-output-denormals: false +; CHECK-NEXT: fp64-fp16-input-denormals: true +; CHECK-NEXT: fp64-fp16-output-denormals: true define void @function_dx10_clamp_off() #1 { ret void } @@ -133,8 +143,10 @@ ; CHECK: mode: ; CHECK-NEXT: ieee: false ; CHECK-NEXT: dx10-clamp: true -; CHECK-NEXT: fp32-denormals: false -; CHECK-NEXT: fp64-fp16-denormals: true +; CHECK-NEXT: fp32-input-denormals: false +; CHECK-NEXT: fp32-output-denormals: false +; CHECK-NEXT: fp64-fp16-input-denormals: true +; CHECK-NEXT: fp64-fp16-output-denormals: true define void @function_ieee_off() #2 { ret void } @@ -143,8 +155,10 @@ ; CHECK: mode: ; CHECK-NEXT: ieee: false ; CHECK-NEXT: dx10-clamp: false -; CHECK-NEXT: fp32-denormals: false -; CHECK-NEXT: fp64-fp16-denormals: true +; CHECK-NEXT: fp32-input-denormals: false +; CHECK-NEXT: fp32-output-denormals: false +; CHECK-NEXT: fp64-fp16-input-denormals: true +; CHECK-NEXT: fp64-fp16-output-denormals: true define void @function_ieee_off_dx10_clamp_off() #3 { ret void }