Index: llvm/include/llvm/CodeGen/LowLevelType.h =================================================================== --- llvm/include/llvm/CodeGen/LowLevelType.h +++ llvm/include/llvm/CodeGen/LowLevelType.h @@ -71,6 +71,21 @@ ScalarTy.isPointer() ? ScalarTy.getAddressSpace() : 0}; } + /// Get a 16-bit IEEE half value. + static constexpr LLT float16() { + return scalar(16); + } + + /// Get a 32-bit IEEE float value. + static constexpr LLT float32() { + return scalar(32); + } + + /// Get a 64-bit IEEE double value. + static constexpr LLT float64() { + return scalar(64); + } + /// Get a low-level fixed-width vector of some number of elements and element /// width. static constexpr LLT fixed_vector(unsigned NumElements, Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2759,9 +2759,9 @@ // TODO: Always legal with future ftz flag. // FIXME: Do we need just output? - if (Ty == LLT::scalar(32) && !MFI->getMode().allFP32Denormals()) + if (Ty == LLT::float32() && !MFI->getMode().allFP32Denormals()) return true; - if (Ty == LLT::scalar(16) && !MFI->getMode().allFP64FP16Denormals()) + if (Ty == LLT::float16() && !MFI->getMode().allFP64FP16Denormals()) return true; MachineIRBuilder HelperBuilder(MI); @@ -2831,27 +2831,27 @@ Register Src1 = MI.getOperand(2).getReg(); unsigned Flags = MI.getFlags(); LLT Ty = B.getMRI()->getType(Dst); - const LLT S16 = LLT::scalar(16); - const LLT S32 = LLT::scalar(32); + const LLT F16 = LLT::float16(); + const LLT F32 = LLT::float32(); - if (Ty == S32) { - auto Log = B.buildFLog2(S32, Src0, Flags); - auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {S32}, false) + if (Ty == F32) { + auto Log = B.buildFLog2(F32, Src0, Flags); + auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {F32}, false) .addUse(Log.getReg(0)) .addUse(Src1) .setMIFlags(Flags); B.buildFExp2(Dst, Mul, Flags); - } else if (Ty == S16) { + } else if (Ty == F16) { // There's no f16 fmul_legacy, so we need to convert for it. - auto Log = B.buildFLog2(S16, Src0, Flags); - auto Ext0 = B.buildFPExt(S32, Log, Flags); - auto Ext1 = B.buildFPExt(S32, Src1, Flags); - auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {S32}, false) + auto Log = B.buildFLog2(F16, Src0, Flags); + auto Ext0 = B.buildFPExt(F32, Log, Flags); + auto Ext1 = B.buildFPExt(F32, Src1, Flags); + auto Mul = B.buildIntrinsic(Intrinsic::amdgcn_fmul_legacy, {F32}, false) .addUse(Ext0.getReg(0)) .addUse(Ext1.getReg(0)) .setMIFlags(Flags); - B.buildFExp2(Dst, B.buildFPTrunc(S16, Mul), Flags); + B.buildFExp2(Dst, B.buildFPTrunc(F16, Mul), Flags); } else return false; @@ -2876,11 +2876,11 @@ MachineIRBuilder &B) const { const LLT S1 = LLT::scalar(1); - const LLT S64 = LLT::scalar(64); + const LLT F64 = LLT::float64(); Register Dst = MI.getOperand(0).getReg(); Register OrigSrc = MI.getOperand(1).getReg(); unsigned Flags = MI.getFlags(); - assert(ST.hasFractBug() && MRI.getType(Dst) == S64 && + assert(ST.hasFractBug() && MRI.getType(Dst) == F64 && "this should not have been custom lowered"); // V_FRACT is buggy on SI, so the F32 version is never used and (x-floor(x)) @@ -2891,7 +2891,7 @@ // // Convert floor(x) to (x - fract(x)) - auto Fract = B.buildIntrinsic(Intrinsic::amdgcn_fract, {S64}, false) + auto Fract = B.buildIntrinsic(Intrinsic::amdgcn_fract, {F64}, false) .addUse(OrigSrc) .setMIFlags(Flags); @@ -2903,9 +2903,9 @@ Register ModSrc = stripAnySourceMods(OrigSrc, MRI); auto Const = - B.buildFConstant(S64, llvm::bit_cast(0x3fefffffffffffff)); + B.buildFConstant(F64, llvm::bit_cast(0x3fefffffffffffff)); - Register Min = MRI.createGenericVirtualRegister(S64); + Register Min = MRI.createGenericVirtualRegister(F64); // We don't need to concern ourselves with the snan handling difference, so // use the one which will directly select. @@ -2918,10 +2918,10 @@ Register CorrectedFract = Min; if (!MI.getFlag(MachineInstr::FmNoNans)) { auto IsNan = B.buildFCmp(CmpInst::FCMP_ORD, S1, ModSrc, ModSrc, Flags); - CorrectedFract = B.buildSelect(S64, IsNan, ModSrc, Min, Flags).getReg(0); + CorrectedFract = B.buildSelect(F64, IsNan, ModSrc, Min, Flags).getReg(0); } - auto NegFract = B.buildFNeg(S64, CorrectedFract, Flags); + auto NegFract = B.buildFNeg(F64, CorrectedFract, Flags); B.buildFAdd(Dst, OrigSrc, NegFract, Flags); MI.eraseFromParent();