Index: llvm/include/llvm/CodeGen/LowLevelType.h =================================================================== --- llvm/include/llvm/CodeGen/LowLevelType.h +++ llvm/include/llvm/CodeGen/LowLevelType.h @@ -71,6 +71,22 @@ ScalarTy.isPointer() ? ScalarTy.getAddressSpace() : 0}; } + /// Get a 16-bit IEEE half value. + /// TODO: Add IEEE semantics to type - This currently returns a simple `scalar(16)`. + static constexpr LLT float16() { + return scalar(16); + } + + /// Get a 32-bit IEEE float value. + static constexpr LLT float32() { + return scalar(32); + } + + /// Get a 64-bit IEEE double value. + static constexpr LLT float64() { + return scalar(64); + } + /// Get a low-level fixed-width vector of some number of elements and element /// width. static constexpr LLT fixed_vector(unsigned NumElements, Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2979,10 +2979,10 @@ // TODO: Always legal with future ftz flag. // FIXME: Do we need just output? - if (Ty == LLT::scalar(32) && + if (Ty == LLT::float32() && MFI->getMode().FP32Denormals == DenormalMode::getPreserveSign()) return true; - if (Ty == LLT::scalar(16) && + if (Ty == LLT::float16() && MFI->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()) return true; @@ -3513,27 +3513,26 @@ Register Src1 = MI.getOperand(2).getReg(); unsigned Flags = MI.getFlags(); LLT Ty = B.getMRI()->getType(Dst); - const LLT S16 = LLT::scalar(16); - const LLT S32 = LLT::scalar(32); + const LLT F16 = LLT::float16(); + const LLT F32 = LLT::float32(); - if (Ty == S32) { - auto Log = B.buildFLog2(S32, Src0, Flags); - auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {S32}) + if (Ty == F32) { + auto Log = B.buildFLog2(F32, Src0, Flags); + auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {F32}) .addUse(Log.getReg(0)) .addUse(Src1) .setMIFlags(Flags); B.buildFExp2(Dst, Mul, Flags); - } else if (Ty == S16) { + } else if (Ty == F16) { // There's no f16 fmul_legacy, so we need to convert for it. - auto Log = B.buildFLog2(S16, Src0, Flags); - auto Ext0 = B.buildFPExt(S32, Log, Flags); - auto Ext1 = B.buildFPExt(S32, Src1, Flags); - auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {S32}) + auto Log = B.buildFLog2(F16, Src0, Flags); + auto Ext0 = B.buildFPExt(F32, Log, Flags); + auto Ext1 = B.buildFPExt(F32, Src1, Flags); + auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {F32}) .addUse(Ext0.getReg(0)) .addUse(Ext1.getReg(0)) .setMIFlags(Flags); - - B.buildFExp2(Dst, B.buildFPTrunc(S16, Mul), Flags); + B.buildFExp2(Dst, B.buildFPTrunc(F16, Mul), Flags); } else return false; @@ -3558,11 +3557,11 @@ MachineIRBuilder &B) const { const LLT S1 = LLT::scalar(1); - const LLT S64 = LLT::scalar(64); + const LLT F64 = LLT::float64(); Register Dst = MI.getOperand(0).getReg(); Register OrigSrc = MI.getOperand(1).getReg(); unsigned Flags = MI.getFlags(); - assert(ST.hasFractBug() && MRI.getType(Dst) == S64 && + assert(ST.hasFractBug() && MRI.getType(Dst) == F64 && "this should not have been custom lowered"); // V_FRACT is buggy on SI, so the F32 version is never used and (x-floor(x)) @@ -3573,7 +3572,7 @@ // // Convert floor(x) to (x - fract(x)) - auto Fract = B.buildIntrinsic(Intrinsic::amdgcn_fract, {S64}) + auto Fract = B.buildIntrinsic(Intrinsic::amdgcn_fract, {F64}) .addUse(OrigSrc) .setMIFlags(Flags); @@ -3585,9 +3584,9 @@ Register ModSrc = stripAnySourceMods(OrigSrc, MRI); auto Const = - B.buildFConstant(S64, llvm::bit_cast(0x3fefffffffffffff)); + B.buildFConstant(F64, llvm::bit_cast(0x3fefffffffffffff)); - Register Min = MRI.createGenericVirtualRegister(S64); + Register Min = MRI.createGenericVirtualRegister(F64); // We don't need to concern ourselves with the snan handling difference, so // use the one which will directly select. @@ -3600,10 +3599,10 @@ Register CorrectedFract = Min; if (!MI.getFlag(MachineInstr::FmNoNans)) { auto IsNan = B.buildFCmp(CmpInst::FCMP_ORD, S1, ModSrc, ModSrc, Flags); - CorrectedFract = B.buildSelect(S64, IsNan, ModSrc, Min, Flags).getReg(0); + CorrectedFract = B.buildSelect(F64, IsNan, ModSrc, Min, Flags).getReg(0); } - auto NegFract = B.buildFNeg(S64, CorrectedFract, Flags); + auto NegFract = B.buildFNeg(F64, CorrectedFract, Flags); B.buildFAdd(Dst, OrigSrc, NegFract, Flags); MI.eraseFromParent();