Index: include/llvm/CodeGen/CommandFlags.h =================================================================== --- include/llvm/CodeGen/CommandFlags.h +++ include/llvm/CodeGen/CommandFlags.h @@ -128,6 +128,11 @@ cl::desc("Generate software floating point library calls"), cl::init(false)); +cl::opt +PromoteFP16ToFP32("promote-fp16", + cl::desc("Promote half-precision FP operations to single precision"), + cl::init(false)); + cl::opt FloatABIForCalls("float-abi", cl::desc("Choose float ABI type"), @@ -277,6 +282,7 @@ Options.HonorSignDependentRoundingFPMathOption = EnableHonorSignDependentRoundingFPMath; Options.UseSoftFloat = GenerateSoftFloatCalls; + Options.PromoteFP16ToFP32 = PromoteFP16ToFP32; if (FloatABIForCalls != FloatABI::Default) Options.FloatABIType = FloatABIForCalls; Options.NoZerosInBSS = DontPlaceZerosInBSS; Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -100,7 +100,8 @@ TypeExpandFloat, // Split this float into two of half the size. TypeScalarizeVector, // Replace this one-element vector with its element. TypeSplitVector, // Split this vector into two of half the size. - TypeWidenVector // This vector should be widened into a larger vector. + TypeWidenVector, // This vector should be widened into a larger vector. + TypePromoteFloat // Replace this float with a larger one. }; /// LegalizeKind holds the legalization kind that needs to happen to EVT Index: include/llvm/Target/TargetMachine.h =================================================================== --- include/llvm/Target/TargetMachine.h +++ include/llvm/Target/TargetMachine.h @@ -169,6 +169,8 @@ void setFastISel(bool Enable) { Options.EnableFastISel = Enable; } + bool getPromoteFP16Flag() const { return Options.PromoteFP16ToFP32; } + bool shouldPrintMachineCode() const { return Options.PrintMachineCode; } /// Returns the default value of asm verbosity. Index: include/llvm/Target/TargetOptions.h =================================================================== --- include/llvm/Target/TargetOptions.h +++ include/llvm/Target/TargetOptions.h @@ -71,7 +71,8 @@ : PrintMachineCode(false), NoFramePointerElim(false), LessPreciseFPMADOption(false), UnsafeFPMath(false), NoInfsFPMath(false), NoNaNsFPMath(false), - HonorSignDependentRoundingFPMathOption(false), UseSoftFloat(false), + HonorSignDependentRoundingFPMathOption(false), + UseSoftFloat(false), PromoteFP16ToFP32(false), NoZerosInBSS(false), JITEmitDebugInfo(false), JITEmitDebugInfoToDisk(false), GuaranteedTailCallOpt(false), DisableTailCalls(false), StackAlignmentOverride(0), @@ -142,6 +143,12 @@ /// target FP instructions. unsigned UseSoftFloat : 1; + /// PromoteFP16ToFP32 - This flag is enabled when -promote-fp16 flag is + /// specified on the command line. When this flag is set, half-precision + /// floating point (FP16) is considered as a load-store only type and + /// promoted to single-precision registers and operations. + unsigned PromoteFP16ToFP32 : 1; + /// NoZerosInBSS - By default some codegens place zero-initialized data to /// .bss section. This flag disables such behaviour (necessary, e.g. for /// crt*.o compiling). @@ -277,6 +284,7 @@ ARE_EQUAL(NoNaNsFPMath) && ARE_EQUAL(HonorSignDependentRoundingFPMathOption) && ARE_EQUAL(UseSoftFloat) && + ARE_EQUAL(PromoteFP16ToFP32) && ARE_EQUAL(NoZerosInBSS) && ARE_EQUAL(JITEmitDebugInfo) && ARE_EQUAL(JITEmitDebugInfoToDisk) && Index: lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1579,3 +1579,478 @@ return DAG.getTruncStore(Chain, SDLoc(N), Hi, Ptr, ST->getMemoryVT(), ST->getMemOperand()); } + +//===----------------------------------------------------------------------===// +// Float Operand Promotion +//===----------------------------------------------------------------------===// +// + +static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::f16) { + if (RetVT == MVT::f32 || + RetVT == MVT::f64) { + + return ISD::FP16_TO_FP; + } + } + else if (RetVT == MVT::f16) { + if (OpVT == MVT::f32 || + OpVT == MVT::f64) { + + return ISD::FP_TO_FP16; + } + } + + assert (false && "Attempt at an invalid promotion-related conversion"); + return ISD::DELETED_NODE; +} + +static RTLIB::Libcall GetPromotionLibcall(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::f16) { + if (RetVT == MVT::f32) + return RTLIB::FPEXT_F16_F32; + } + else if (RetVT == MVT::f16) { + if (OpVT == MVT::f32) + return RTLIB::FPROUND_F32_F16; + else if (OpVT == MVT::f64) + return RTLIB::FPROUND_F64_F16; + else if (OpVT == MVT::f80) + return RTLIB::FPROUND_F80_F16; + } + + assert (false && "Attempt at an invalid promotion-related libcall"); + return RTLIB::UNKNOWN_LIBCALL; +} + +static SDValue GetPromotedValue(SelectionDAG &DAG, const TargetLowering &TLI, + const SDValue &Operand, EVT OpVT, EVT RetVT, + SDLoc dl) { + + bool UseLibCall = false; + + EVT VT = RetVT; + if (RetVT == MVT::f16) + VT = MVT::i16; + if (UseLibCall) { + return TLI.makeLibCall(DAG, GetPromotionLibcall(OpVT, RetVT), VT, + &Operand, 1, false, dl).first; + } + else { + return DAG.getNode(GetPromotionOpcode(OpVT, RetVT), dl, VT, Operand); + } + +} + +bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) { + SDValue R = SDValue(); + + // Nodes that use a promotion-requiring floating point operand, but doesn't + // produce a promotion-requiring floating point result, need to be legalized + // to use the promoted float operand. Nodes that produce at least one + // promotion-requiring floating point result have their operands legalized as + // a part of PromoteFloatResult. + switch (N->getOpcode()) { + default: + llvm_unreachable("Do not know how to promote this operator's operand!"); + + case ISD::BITCAST: R = PromoteFloatOp_BITCAST(N, OpNo); break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: R = PromoteFloatOp_FP_TO_XINT(N, OpNo); break; + case ISD::FP_EXTEND: R = PromoteFloatOp_FP_EXTEND(N, OpNo); break; + case ISD::SELECT_CC: R = PromoteFloatOp_SELECT_CC(N, OpNo); break; + case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break; + case ISD::STORE: R = PromoteFloatOp_STORE(N, OpNo); break; + } + + if (R.getNode()) + ReplaceValueWith(SDValue(N, 0), R); + return false; +} + +SDValue DAGTypeLegalizer::PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo) { + SDValue Op = N->getOperand(0); + EVT OpVT = Op->getValueType(0); + + EVT VT = N->getValueType(0); + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); + assert (IVT == VT && "Bitcast to type of different size"); + + SDValue Promoted = GetPromotedFloat(N->getOperand(0)); + // Convert the promoted float value to the desired IVT. + return GetPromotedValue(DAG, TLI, Promoted, Promoted.getValueType(), OpVT, + SDLoc(N)); +} + +// Convert the promoted float value to the desired integer type +SDValue DAGTypeLegalizer::PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo) { + SDValue Op = GetPromotedFloat(N->getOperand(0)); + return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), N->getValueType(0), Op); +} + +SDValue DAGTypeLegalizer::PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo) { + SDValue Op = GetPromotedFloat(N->getOperand(0)); + EVT VT = N->getValueType(0); + + // Desired VT is same as promoted type. Use promoted float directly. + if (VT == Op->getValueType(0)) + return Op; + + // If node's operand is itself an extend/promotion operation, directly promote + // to VT. For e.g., convert f16 -> f32 -> f64 to f16 -> f64. + if (Op.getNode()->getOpcode() == ISD::FP16_TO_FP) { + Op = Op.getNode()->getOperand(0); + return GetPromotedValue(DAG, TLI, Op, MVT::f16, MVT::f32, SDLoc(N)); + // return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, Op); + } + + // Else, extend the promoted float value to the desired VT. + return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Op); +} + +// Promote the float operands used for comparison. The true- and false- +// operands have the same type as the result and are promoted, if needed, by +// PromoteFloatRes_SELECT_CC +SDValue DAGTypeLegalizer::PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo) { + SDValue LHS = GetPromotedFloat(N->getOperand(0)); + SDValue RHS = GetPromotedFloat(N->getOperand(1)); + + // TODO Use UpdateNodeOperands ala SoftenFloatOp_SELECT_CC + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0), + LHS, RHS, N->getOperand(2), N->getOperand(3), + N->getOperand(4)); +} + +// Construct a SETCC that compares the promoted values and sets the conditional +// code. +SDValue DAGTypeLegalizer::PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op0 = GetPromotedFloat(N->getOperand(0)); + SDValue Op1 = GetPromotedFloat(N->getOperand(1)); + ISD::CondCode CCCode = cast(N->getOperand(2))->get(); + + return DAG.getSetCC(SDLoc(N), NVT, Op0, Op1, CCCode); + +} + +// Lower the promoted Float down to the integer value of same size and construct +// a STORE of the integer value. +SDValue DAGTypeLegalizer::PromoteFloatOp_STORE(SDNode *N, unsigned OpNo) { + StoreSDNode *ST = cast(N); + SDValue Val = ST->getValue(); + SDLoc dl(N); + + SDValue Promoted = GetPromotedFloat(Val); + EVT VT = ST->getOperand(1)->getValueType(0); + + SDValue NewVal; + NewVal = GetPromotedValue(DAG, TLI, Promoted, Promoted.getValueType(), VT, + SDLoc(N)); + + SDNode *Src = Val.getNode(); + if (Src->getOpcode() == ISD::FP16_TO_FP) { + // Optimization: collapse FP16->FP32 followed by FP32 -> FP16. + NewVal = Src->getOperand(0); + } + else if (Src->getOpcode() == ISD::FP_ROUND) { + // Optimization: collapse FP64 -> FP32 -> FP16 to FP64 -> FP16. + // The second parameter to FP_ROUND is irrelevant for this store, since the + // rounded value is only stored to memory. + SDValue SrcOp0 = Src->getOperand(0); + NewVal = GetPromotedValue(DAG, TLI, SrcOp0, SrcOp0->getValueType(0), VT, dl); + } + return DAG.getStore(ST->getChain(), dl, NewVal, ST->getBasePtr(), + ST->getMemOperand()); +} + +//===----------------------------------------------------------------------===// +// Float Result Promotion +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { + SDValue R = SDValue(); + + switch (N->getOpcode()) { + // These opcodes cannot appear if promotion of FP16 is done in the backend + // instead of Clang + case ISD::FP16_TO_FP: + case ISD::FP_TO_FP16: + default: + llvm_unreachable("Do not know how to promote this operator's result!"); + + case ISD::BITCAST: R = PromoteFloatRes_BITCAST(N); break; + case ISD::ConstantFP: R = PromoteFloatRes_ConstantFP(N); break; + case ISD::EXTRACT_VECTOR_ELT: + R = PromoteFloatRes_EXTRACT_VECTOR_ELT(N); break; + case ISD::FCOPYSIGN: R = PromoteFloatRes_FCOPYSIGN(N); break; + + // Unary FP Operations + case ISD::FABS: + case ISD::FCEIL: + case ISD::FCOS: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FFLOOR: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: + case ISD::FNEARBYINT: + case ISD::FNEG: + case ISD::FRINT: + case ISD::FSIN: + case ISD::FSQRT: R = PromoteFloatRes_UnaryOp(N); break; + + /// FROUND + + // Binary FP Operations + case ISD::FADD: + case ISD::FDIV: + case ISD::FMA: + case ISD::FMAXNUM: + case ISD::FMINNUM: + case ISD::FMUL: + case ISD::FPOW: + case ISD::FREM: + case ISD::FSUB: R = PromoteFloatRes_BinOp(N); break; + + case ISD::FPOWI: R = PromoteFloatRes_FPOWI(N); break; + + case ISD::FROUND: // FROUND is same as FTRUNC + case ISD::FTRUNC: R = PromoteFloatRes_FTRUNC(N); break; + + case ISD::FP_ROUND: R = PromoteFloatRes_FP_ROUND(N); break; + case ISD::LOAD: R = PromoteFloatRes_LOAD(N); break; + case ISD::SELECT: R = PromoteFloatRes_SELECT(N); break; + case ISD::SELECT_CC: R = PromoteFloatRes_SELECT_CC(N); break; + + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: R = PromoteFloatRes_XINT_TO_FP(N); break; + case ISD::UNDEF: R = PromoteFloatRes_UNDEF(N); break; + + } + + if (R.getNode()) + SetPromotedFloat(SDValue(N, ResNo), R); +} + +// Bitcast from i16 to f16: convert the i16 to a f32 value instead. +// At this point, it is not possible to determine if the bitcast value is +// eventually stored to memory or promoted to f32 or promoted to a floating +// point at a higher precision. Some of these cases are handled by FP_EXTEND, +// STORE promotion handlers. +SDValue DAGTypeLegalizer::PromoteFloatRes_BITCAST(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return GetPromotedValue(DAG, TLI, N->getOperand(0), VT, NVT, SDLoc(N)); +} + +SDValue DAGTypeLegalizer::PromoteFloatRes_ConstantFP(SDNode *N) { + ConstantFPSDNode *CFPNode = cast(N); + EVT VT = N->getValueType(0); + + // Get the (bit-cast) APInt of the APFloat and build an integer constant + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + SDValue C = DAG.getConstant(CFPNode->getValueAPF().bitcastToAPInt(), + IVT); + + // Convert the Constant to the desired FP type + // FIXME We might be able to do the conversion during compilation and get rid + // of it from the object code + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return GetPromotedValue(DAG, TLI, C, VT, NVT, SDLoc(N)); +} + +// If the Index operand is a constant, try to redirect the extract operation to +// the correct legalized vector. If not, bit-convert the input vector to +// equivalent integer vector. Extract the element as an (bit-cast) integer +// value and convert it to the promoted type. +SDValue DAGTypeLegalizer::PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { + SDLoc dl(N); + + // If the index is constant, try to extract the value from the legalized + // vector type. + if (isa(N->getOperand(1))) { + SDValue Vec = N->getOperand(0); + SDValue Idx = N->getOperand(1); + EVT VecVT = Vec->getValueType(0); + EVT EltVT = VecVT.getVectorElementType(); + + uint64_t IdxVal = cast(Idx)->getZExtValue(); + + switch (getTypeAction(VecVT)) { + default: break; + case TargetLowering::TypeScalarizeVector: { + SDValue Res = GetScalarizedVector(N->getOperand(0)); + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); + } + case TargetLowering::TypeWidenVector: { + Vec = GetWidenedVector(Vec); + SDValue Res = DAG.getNode(N->getOpcode(), dl, EltVT, Vec, Idx); + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); + } + case TargetLowering::TypeSplitVector: { + SDValue Lo, Hi; + GetSplitVector(Vec, Lo, Hi); + + uint64_t LoElts = Lo.getValueType().getVectorNumElements(); + SDValue Res; + if (IdxVal < LoElts) + Res = DAG.getNode(N->getOpcode(), dl, EltVT, Lo, Idx); + else + Res = DAG.getNode(N->getOpcode(), dl, EltVT, Hi, + DAG.getConstant(IdxVal - LoElts, + Idx.getValueType())); + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); + } + + } + } + + // Bit-convert the input vector to the equivalent integer vector + SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); + EVT IVT = NewOp.getValueType().getVectorElementType(); + + // Extract the element as an (bit-cast) integer value + SDValue NewVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, IVT, + NewOp, N->getOperand(1)); + + // Convert the element to the desired FP type + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return GetPromotedValue(DAG, TLI, NewVal, VT, NVT, SDLoc(N)); +} + +// FCOPYSIGN(X, Y) returns the value of X with the sign of Y. If the result +// needs promotion, so does the argument X. Note that Y, if needed, will be +// handled during operand promotion. +SDValue DAGTypeLegalizer::PromoteFloatRes_FCOPYSIGN(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op0 = GetPromotedFloat(N->getOperand(0)); + SDValue Op1 = N->getOperand(1); + + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1); +} + +// Unary operation where the result and the operand have PromoteFloat type +// action. Construct a new SDNode with the promoted float value of the old +// operand. +SDValue DAGTypeLegalizer::PromoteFloatRes_UnaryOp(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op = GetPromotedFloat(N->getOperand(0)); + + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op); +} + +// Binary operations where the result and both operands have PromoteFloat type +// action. Construct a new SDNode with the promoted float values of the old +// operands. +SDValue DAGTypeLegalizer::PromoteFloatRes_BinOp(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op0 = GetPromotedFloat(N->getOperand(0)); + SDValue Op1 = GetPromotedFloat(N->getOperand(1)); + + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1); +} + +// Promote the Float (first) operand and retain the Integer (second) operand +SDValue DAGTypeLegalizer::PromoteFloatRes_FPOWI(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op0 = GetPromotedFloat(N->getOperand(0)); + SDValue Op1 = N->getOperand(1); + + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1); +} + +// Explicit operation to reduce precision. Round the input to the requested +// type, but promote it back to the legal output type. +SDValue DAGTypeLegalizer::PromoteFloatRes_FTRUNC(SDNode *N) { + SDLoc dl(N); + + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + + SDValue Op = N->getOperand(0); + // Round promoted float to desired precision + SDValue Round = GetPromotedValue(DAG, TLI, Op, NVT, VT, dl); + // Promote it back to the legal output type + return GetPromotedValue(DAG, TLI, Round, VT, NVT, dl); +} + +// Explicit operation to reduce precision. Similar to FROUND, reduce the value +// to half precision and promote it back to the legal type. +// TODO Verify if we need to handle the TRUNC parameter to FP_ROUND. If not, we +// can merge this function with PromoteFloatRes_FTRUNC. +SDValue DAGTypeLegalizer::PromoteFloatRes_FP_ROUND(SDNode *N) { + SDLoc dl(N); + + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + + SDValue Op = N->getOperand(0); + // Round promoted float to desired precision + SDValue Round = GetPromotedValue(DAG, TLI, Op, NVT, VT, dl); + // Promote it back to the legal output type + return GetPromotedValue(DAG, TLI, Round, VT, NVT, dl); +} + +SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) { + LoadSDNode *L = cast(N); + EVT VT = N->getValueType(0); + + // Load the value as an integer value with the same number of bits + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), + IVT, SDLoc(N), L->getChain(), L->getBasePtr(), + L->getOffset(), L->getPointerInfo(), IVT, L->isVolatile(), + L->isNonTemporal(), false, L->getAlignment(), + L->getAAInfo()); + // Legalize the chain result by replacing uses of the old value chain with the + // new one + ReplaceValueWith(SDValue(N, 1), newL.getValue(1)); + + // Convert the integer value to the desired FP type + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return GetPromotedValue(DAG, TLI, newL, VT, NVT, SDLoc(N)); +} + +// Construct a new SELECT node with the promoted true- and false- values. +SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT(SDNode *N) { + SDValue TrueVal = GetPromotedFloat(N->getOperand(1)); + SDValue FalseVal = GetPromotedFloat(N->getOperand(2)); + + return DAG.getNode(ISD::SELECT, SDLoc(N), TrueVal->getValueType(0), + N->getOperand(0), TrueVal, FalseVal); +} + +// Construct a new SELECT_CC node with the promoted true- and false- values. +// The operands used for comparison are promoted by PromoteFloatOp_SELECT_CC. +SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT_CC(SDNode *N) { + SDValue TrueVal = GetPromotedFloat(N->getOperand(2)); + SDValue FalseVal = GetPromotedFloat(N->getOperand(3)); + + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0), + N->getOperand(0), N->getOperand(1), TrueVal, FalseVal, + N->getOperand(4)); +} + +// Construct a SDNode that transforms the SINT or UINT operand to the promoted +// float type. +SDValue DAGTypeLegalizer::PromoteFloatRes_XINT_TO_FP(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0))); +} + Index: lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -251,6 +251,15 @@ case TargetLowering::TypeSoftenFloat: // Promote the integer operand by hand. return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp)); + case TargetLowering::TypePromoteFloat: { + // Convert the promoted float by hand. + if (NOutVT.bitsEq(NInVT)) { + SDValue PromotedOp = GetPromotedFloat(InOp); + SDValue Trunc = DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, PromotedOp); + return DAG.getNode(ISD::AssertZext, dl, + NOutVT, Trunc, DAG.getValueType(OutVT)); + } + } case TargetLowering::TypeExpandInteger: case TargetLowering::TypeExpandFloat: break; Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -93,6 +93,11 @@ /// the same size, this map indicates the converted value to use. SmallDenseMap SoftenedFloats; + /// PromotedFloats - For floating point nodes that have a smaller precision + /// than the smallest supported precision, this map indicates what promoted + /// value to use. + SmallDenseMap PromotedFloats; + /// ExpandedFloats - For float nodes that need to be expanded this map /// indicates which operands are the expanded version of the input. SmallDenseMap, 8> ExpandedFloats; @@ -499,6 +504,43 @@ void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, SDLoc dl); + + //===--------------------------------------------------------------------===// + // Float promotion support: LegalizeFloatTypes.cpp + //===--------------------------------------------------------------------===// + + SDValue GetPromotedFloat(SDValue Op) { + SDValue &PromotedOp = PromotedFloats[Op]; + RemapValue(PromotedOp); + assert(PromotedOp.getNode() && "Operand wasn't promoted?"); + return PromotedOp; + } + void SetPromotedFloat(SDValue Op, SDValue Result); + + void PromoteFloatResult(SDNode *N, unsigned ResNo); + SDValue PromoteFloatRes_BITCAST(SDNode *N); + SDValue PromoteFloatRes_BinOp(SDNode *N); + SDValue PromoteFloatRes_ConstantFP(SDNode *N); + SDValue PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue PromoteFloatRes_FCOPYSIGN(SDNode *N); + SDValue PromoteFloatRes_FPOWI(SDNode *N); + SDValue PromoteFloatRes_FP_ROUND(SDNode *N); + SDValue PromoteFloatRes_FTRUNC(SDNode *N); + SDValue PromoteFloatRes_LOAD(SDNode *N); + SDValue PromoteFloatRes_SELECT(SDNode *N); + SDValue PromoteFloatRes_SELECT_CC(SDNode *N); + SDValue PromoteFloatRes_UnaryOp(SDNode *N); + SDValue PromoteFloatRes_UNDEF(SDNode *N); + SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N); + + bool PromoteFloatOperand(SDNode *N, unsigned ResNo); + SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo); + //===--------------------------------------------------------------------===// // Scalarization Support: LegalizeVectorTypes.cpp //===--------------------------------------------------------------------===// Index: lib/CodeGen/SelectionDAG/LegalizeTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -259,6 +259,10 @@ WidenVectorResult(N, i); Changed = true; goto NodeDone; + case TargetLowering::TypePromoteFloat: + PromoteFloatResult(N, i); + Changed = true; + goto NodeDone; } } @@ -308,6 +312,10 @@ NeedsReanalyzing = WidenVectorOperand(N, i); Changed = true; break; + case TargetLowering::TypePromoteFloat: + NeedsReanalyzing = PromoteFloatOperand(N, i); + Changed = true; + break; } break; } @@ -753,6 +761,17 @@ OpEntry = Result; } +void DAGTypeLegalizer::SetPromotedFloat(SDValue Op, SDValue Result) { + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + "Invalid type for promoted float"); + AnalyzeNewValue(Result); + + SDValue &OpEntry = PromotedFloats[Op]; + assert(!OpEntry.getNode() && "Node is already promoted!"); + OpEntry = Result; +} + void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { // Note that in some cases vector operation operands may be greater than // the vector element type. For example BUILD_VECTOR of type <1 x i1> with Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -1256,10 +1256,18 @@ } if (!isTypeLegal(MVT::f16)) { - NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16]; - RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16]; - TransformToType[MVT::f16] = MVT::i16; - ValueTypeActions.setTypeAction(MVT::f16, TypeSoftenFloat); + if (!TM.getPromoteFP16Flag()) { + NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16]; + RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16]; + TransformToType[MVT::f16] = MVT::i16; + ValueTypeActions.setTypeAction(MVT::f16, TypeSoftenFloat); + } + else { + NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32]; + RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32]; + TransformToType[MVT::f16] = MVT::f32; + ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat); + } } // Loop over all of the vector value types to see which need transformations. Index: test/CodeGen/Generic/fp16-promote.ll =================================================================== --- /dev/null +++ test/CodeGen/Generic/fp16-promote.ll @@ -0,0 +1,131 @@ +; RUN: llc -promote-fp16 < %s + +@gV1 = common global <4 x half> zeroinitializer, align 8 +@gV2 = common global <4 x half> zeroinitializer, align 8 + +; test load +define half @test_load(half* %p) { + %1 = load half* %p, align 2 + ret half %1 +} + +; test store +define void @test_store(half* %p, half %v) { + store half %v, half* %p, align 2 + ret void +} + +; test a unary operation +declare half @llvm.fabs.f16(half %Val) +define half @test_fabs(half %v) { + %1 = tail call half @llvm.fabs.f16 (half %v) + ret half %1 +} + +; test a binary operation +define half @test_fadd(half %a, half %b) { + %1 = fadd half %a, %b + ret half %1 +} + +; test extract vector element +define half @test_extractelement(<4 x half> %a) { + %1 = extractelement <4 x half> %a, i32 2 + ret half %1 +} + +; test insert vector element +define <4 x half> @test_insertelement(<4 x half> %a, half %v) { + %1 = insertelement <4 x half> %a, half %v, i32 2 + ret <4 x half> %1 +} + +; test fpext to float +define float @test_fpext32(half %in) { + %1 = fpext half %in to float + ret float %1 +} + +; test fpext to double +define double @test_fpext64(half %in) { + %1 = fpext half %in to double + ret double %1 +} + +; test truncate from float +define half @test_fptrunc32(float %in) { + %1 = fptrunc float %in to half + ret half %1 +} + +; test truncate from double +define half @test_fptrunc64(double %in) { + %1 = fptrunc double %in to half + ret half %1 +} + +; test convert to i32 +define i32 @test_convert_to_i32(half %in) { + %1 = fptosi half %in to i32 + ret i32 %1 +} + +; test convert from i32 +define half @test_convert_from_i32(i32 %in) { + %1 = uitofp i32 %in to half + ret half %1 +} + +; test select +define half @test_select(i1 %sel, half %v1, half %v2) { + %1 = select i1 %sel, half %v1, half %v2 + ret half %1 +} + +; test bitcast from i16 +define half @test_bitcast_from_i16(i16 %in) { + %1 = bitcast i16 %in to half + ret half %1 +} + +; test bitcast to i16 +define i16 @test_bitcast_to_i16(half %in) { + %1 = bitcast half %in to i16 + ret i16 %1 +} + +; test fcopysign +declare half @llvm.copysign.f16(half %a, half %b) +define half @test_fcopysign(half %v1, half %v2) { + %1 = call half @llvm.copysign.f16(half %v1, half %v2) + ret half %1 +} + +; test fpowi +declare half @llvm.powi.f16(half %in, i32 %power) +define half @test_fpowi(half %in, i32 %power) { + %1 = call half @llvm.powi.f16(half %in, i32 %power) + ret half %1 +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; tests for vec4 operations +;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; test load +define <4 x half> @test_load_vec(<4 x half>* %p) { + %1 = load <4 x half>* %p, align 8 + ret <4 x half> %1 +} + +; test store vector +define void @test_store_vec(<4 x half>* %p, <4 x half> %v) { + store <4 x half> %v, <4 x half>* %p, align 8 + ret void +} + +; test vector add +define <4 x half> @test_vec4_fadd(<4 x half> %a, <4 x half> %b) { + %1 = fadd <4 x half> %a, %b + ret <4 x half> %1 +} +