Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -12113,58 +12113,35 @@ In, DAG.getUNDEF(SVT))); } -static SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) { - LLVMContext *Context = DAG.getContext(); +// The only differences between FABS and FNEG are the mask used and +// the logic op performed. +static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) { + assert((Op.getOpcode() == ISD::FABS || Op.getOpcode() == ISD::FNEG) && + "Wrong opcode for lowering FABS or FNEG."); + bool IsFABS = (Op.getOpcode() == ISD::FABS); SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); - MVT EltVT = VT; - unsigned NumElts = VT == MVT::f64 ? 2 : 4; - if (VT.isVector()) { - EltVT = VT.getVectorElementType(); - NumElts = VT.getVectorNumElements(); - } - Constant *C; - if (EltVT == MVT::f64) - C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble, - APInt(64, ~(1ULL << 63)))); - else - C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle, - APInt(32, ~(1U << 31)))); - C = ConstantVector::getSplat(NumElts, C); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy()); - unsigned Alignment = cast(CPIdx)->getAlignment(); - SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - false, false, false, Alignment); - if (VT.isVector()) { - MVT ANDVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; - return DAG.getNode(ISD::BITCAST, dl, VT, - DAG.getNode(ISD::AND, dl, ANDVT, - DAG.getNode(ISD::BITCAST, dl, ANDVT, - Op.getOperand(0)), - DAG.getNode(ISD::BITCAST, dl, ANDVT, Mask))); - } - return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask); -} + // Assume scalar op for initialization; update for vector if needed. + // Note that there are no scalar AND or XOR SSE/AVX instructions, so we + // generate a 16-byte vector constant and logic op even for the scalar case. + // Using a 16-byte mask allows folding the load of the mask with + // the logic op, so it can save (~4 bytes) on code size. -static SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) { - LLVMContext *Context = DAG.getContext(); - SDLoc dl(Op); - MVT VT = Op.getSimpleValueType(); + // FIXME: If -Os meant anything to the backend, we would probably not generate + // a 16-byte mask when we only need 4 or 8 bytes for the scalar case. MVT EltVT = VT; unsigned NumElts = VT == MVT::f64 ? 2 : 4; if (VT.isVector()) { EltVT = VT.getVectorElementType(); NumElts = VT.getVectorNumElements(); } - Constant *C; - if (EltVT == MVT::f64) - C = ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble, - APInt(64, 1ULL << 63))); - else - C = ConstantFP::get(*Context, APFloat(APFloat::IEEEsingle, - APInt(32, 1U << 31))); + + unsigned EltBits = EltVT.getSizeInBits(); + LLVMContext *Context = DAG.getContext(); + // For FABS, mask is 0x7f...; for FNEG, mask is 0x80... + APInt MaskElt = + IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignBit(EltBits); + Constant *C = ConstantInt::get(*Context, MaskElt); C = ConstantVector::getSplat(NumElts, C); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy()); @@ -12172,16 +12149,20 @@ SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), false, false, false, Alignment); + if (VT.isVector()) { - MVT XORVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits()/64); + // A vector operation requires casting operands to a vector type, performing + // the logic op, and casting the result back to the original value type. + MVT VecVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64); + SDValue Op0Casted = DAG.getNode(ISD::BITCAST, dl, VecVT, Op.getOperand(0)); + SDValue MaskCasted = DAG.getNode(ISD::BITCAST, dl, VecVT, Mask); + unsigned LogicOp = IsFABS ? ISD::AND : ISD::XOR; return DAG.getNode(ISD::BITCAST, dl, VT, - DAG.getNode(ISD::XOR, dl, XORVT, - DAG.getNode(ISD::BITCAST, dl, XORVT, - Op.getOperand(0)), - DAG.getNode(ISD::BITCAST, dl, XORVT, Mask))); + DAG.getNode(LogicOp, dl, VecVT, Op0Casted, MaskCasted)); } - - return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask); + // If not vector, then scalar. + unsigned LogicOp = IsFABS ? X86ISD::FAND : X86ISD::FXOR; + return DAG.getNode(LogicOp, dl, VT, Op.getOperand(0), Mask); } static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { @@ -16821,8 +16802,8 @@ case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG); case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); case ISD::LOAD: return LowerExtendedLoad(Op, Subtarget, DAG); - case ISD::FABS: return LowerFABS(Op, DAG); - case ISD::FNEG: return LowerFNEG(Op, DAG); + case ISD::FABS: return LowerFABSorFNEG(Op, DAG); + case ISD::FNEG: return LowerFABSorFNEG(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG);