Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -97,7 +97,8 @@ TypeLegal, // The target natively supports this type. TypePromoteInteger, // Replace this integer with a larger one. TypeExpandInteger, // Split this integer into two of half the size. - TypeSoftenFloat, // Convert this float to a same size integer type. + TypeSoftenFloat, // Convert this float to a same size integer type, + // if an operation is not supported in target HW. TypeExpandFloat, // Split this float into two of half the size. TypeScalarizeVector, // Replace this one-element vector with its element. TypeSplitVector, // Split this vector into two of half the size. @@ -1911,6 +1912,7 @@ /// up the MVT::LAST_VALUETYPE value to the next multiple of 8. uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 7) / 8]; +protected: ValueTypeActionImpl ValueTypeActions; private: Index: include/llvm/Target/TargetRegisterInfo.h =================================================================== --- include/llvm/Target/TargetRegisterInfo.h +++ include/llvm/Target/TargetRegisterInfo.h @@ -614,9 +614,13 @@ /// Find the largest common subclass of A and B. /// Return NULL if there is no common subclass. + /// The common subclass should contain + /// simple value type SVT if it is not the Any type. const TargetRegisterClass * getCommonSubClass(const TargetRegisterClass *A, - const TargetRegisterClass *B) const; + const TargetRegisterClass *B, + const MVT::SimpleValueType SVT = + MVT::SimpleValueType::Any) const; /// Returns a TargetRegisterClass used for pointer values. /// If a target supports multiple different pointer register classes, Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8722,6 +8722,22 @@ ZeroCmp, Zero, RV); } +static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { + // copysign(x, fp_extend(y)) -> copysign(x, y) + // copysign(x, fp_round(y)) -> copysign(x, y) + // Do not optimize out type conversion of f128 type yet. + // For some target like x86_64, configuration is changed + // to keep one f128 value in one SSE register, but + // instruction selection cannot handle FCOPYSIGN on + // SSE registers yet. + SDValue N1 = N->getOperand(1); + EVT N1VT = N1->getValueType(0); + EVT N1Op0VT = N1->getOperand(0)->getValueType(0); + return (N1.getOpcode() == ISD::FP_EXTEND || + N1.getOpcode() == ISD::FP_ROUND) && + (N1VT == N1Op0VT || N1Op0VT != MVT::f128); +} + SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -8765,7 +8781,7 @@ // copysign(x, fp_extend(y)) -> copysign(x, y) // copysign(x, fp_round(y)) -> copysign(x, y) - if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) + if (CanCombineFCOPYSIGN_EXTEND_ROUND(N)) return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0)); Index: lib/CodeGen/SelectionDAG/InstrEmitter.cpp =================================================================== --- lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -139,7 +139,7 @@ UseRC = RC; else if (RC) { const TargetRegisterClass *ComRC = - TRI->getCommonSubClass(UseRC, RC); + TRI->getCommonSubClass(UseRC, RC, VT.SimpleTy); // If multiple uses expect disjoint register classes, we emit // copies in AddRegisterOperand. if (ComRC) Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -154,6 +154,7 @@ SDValue ExpandVectorBuildThroughStack(SDNode* Node); SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP); + SDValue ExpandConstant(ConstantSDNode *CP); // if ExpandNode returns false, LegalizeOp falls back to ConvertNodeToLibcall bool ExpandNode(SDNode *Node); @@ -294,6 +295,20 @@ return Result; } +/// Expands the Constant node to a load from the constant pool. +SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) { + SDLoc dl(CP); + EVT VT = CP->getValueType(0); + SDValue CPIdx = DAG.getConstantPool(CP->getConstantIntValue(), + TLI.getPointerTy(DAG.getDataLayout())); + unsigned Alignment = cast(CPIdx)->getAlignment(); + SDValue Result = + DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + false, false, false, Alignment); + return Result; +} + /// Expands an unaligned store to 2 half-size stores. static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, const TargetLowering &TLI, @@ -1192,15 +1207,17 @@ #ifndef NDEBUG for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == - TargetLowering::TypeLegal && + assert((TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == + TargetLowering::TypeLegal || + TLI.isTypeLegal(Node->getValueType(i))) && "Unexpected illegal type!"); for (const SDValue &Op : Node->op_values()) - assert((TLI.getTypeAction(*DAG.getContext(), - Op.getValueType()) == TargetLowering::TypeLegal || - Op.getOpcode() == ISD::TargetConstant) && - "Unexpected illegal type!"); + assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == + TargetLowering::TypeLegal || + TLI.isTypeLegal(Op.getValueType()) || + Op.getOpcode() == ISD::TargetConstant) && + "Unexpected illegal type!"); #endif // Figure out the correct action; the way to query this varies by opcode @@ -3390,6 +3407,11 @@ Results.push_back(ExpandConstantFP(CFP, true)); break; } + case ISD::Constant: { + ConstantSDNode *CP = cast(Node); + Results.push_back(ExpandConstant(CP)); + break; + } case ISD::FSUB: { EVT VT = Node->getValueType(0); if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) && Index: lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -43,10 +43,10 @@ } //===----------------------------------------------------------------------===// -// Result Float to Integer Conversion. +// Convert Float Results to Integer for Non-HW-supported Operations. //===----------------------------------------------------------------------===// -void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { +bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); dbgs() << "\n"); SDValue R = SDValue(); @@ -59,20 +59,26 @@ #endif llvm_unreachable("Do not know how to soften the result of this operator!"); + case ISD::Register: + case ISD::CopyFromReg: + case ISD::CopyToReg: + assert(isLegalInHWReg(N->getValueType(ResNo)) && + "Unsupported SoftenFloatRes opcode!"); + // Only when isLegalInHWReg, we can skip check of the operands. + R = SDValue(N, ResNo); + break; case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break; - case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break; + case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N, ResNo); break; case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; - case ISD::ConstantFP: - R = SoftenFloatRes_ConstantFP(cast(N)); - break; + case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N, ResNo); break; case ISD::EXTRACT_VECTOR_ELT: R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break; - case ISD::FABS: R = SoftenFloatRes_FABS(N); break; + case ISD::FABS: R = SoftenFloatRes_FABS(N, ResNo); break; case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break; case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; case ISD::FADD: R = SoftenFloatRes_FADD(N); break; case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; - case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; + case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N, ResNo); break; case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break; case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break; case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break; @@ -84,7 +90,7 @@ case ISD::FMA: R = SoftenFloatRes_FMA(N); break; case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; - case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; + case ISD::FNEG: R = SoftenFloatRes_FNEG(N, ResNo); break; case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break; case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break; @@ -97,9 +103,9 @@ case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; - case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break; - case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break; - case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break; + case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break; + case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break; + case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break; case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break; @@ -107,11 +113,19 @@ } // If R is null, the sub-method took care of registering the result. - if (R.getNode()) + if (R.getNode()) { SetSoftenedFloat(SDValue(N, ResNo), R); + ReplaceSoftenFloatResult(N, ResNo, R); + } + // Return true only if the node is changed, + // assuming that the operands are also converted when necessary. + // Otherwise, return false to tell caller to scan operands. + return R.getNode() && R.getNode() != N; } -SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) { + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); return BitConvertToInteger(N->getOperand(0)); } @@ -130,10 +144,14 @@ BitConvertToInteger(N->getOperand(1))); } -SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) { - return DAG.getConstant(N->getValueAPF().bitcastToAPInt(), SDLoc(N), +SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, we can load better from the constant pool. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); + ConstantFPSDNode *CN = cast(N); + return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN), TLI.getTypeToTransformTo(*DAG.getContext(), - N->getValueType(0))); + CN->getValueType(0))); } SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { @@ -143,7 +161,10 @@ NewOp, N->getOperand(1)); } -SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, FABS can be implemented as native bitwise operations. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned Size = NVT.getSizeInBits(); @@ -206,7 +227,10 @@ NVT, Op, false, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, FCOPYSIGN can be implemented as native bitwise operations. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); SDValue LHS = GetSoftenedFloat(N->getOperand(0)); SDValue RHS = BitConvertToInteger(N->getOperand(1)); SDLoc dl(N); @@ -390,7 +414,10 @@ NVT, Op, false, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, FNEG can be implemented as native bitwise operations. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); // Expand Y = FNEG(X) -> Y = SUB -0.0, X @@ -580,7 +607,8 @@ NVT, Op, false, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) { + bool LegalInHWReg = isLegalInHWReg(N->getValueType(ResNo)); LoadSDNode *L = cast(N); EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); @@ -595,7 +623,8 @@ L->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. - ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + if (N != NewL.getValue(1).getNode()) + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); return NewL; } @@ -609,17 +638,24 @@ // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); - return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL)); + auto ExtendNode = DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL); + if (LegalInHWReg) + return ExtendNode; + return BitConvertToInteger(ExtendNode); } -SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo) { + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); SDValue LHS = GetSoftenedFloat(N->getOperand(1)); SDValue RHS = GetSoftenedFloat(N->getOperand(2)); return DAG.getSelect(SDLoc(N), LHS.getValueType(), N->getOperand(0), LHS, RHS); } -SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo) { + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); SDValue LHS = GetSoftenedFloat(N->getOperand(2)); SDValue RHS = GetSoftenedFloat(N->getOperand(3)); return DAG.getNode(ISD::SELECT_CC, SDLoc(N), @@ -645,7 +681,8 @@ // Legalized the chain result - switch anything that used the old chain to // use the new one. - ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1)); + if (N != NewVAARG.getValue(1).getNode()) + ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1)); return NewVAARG; } @@ -679,7 +716,7 @@ //===----------------------------------------------------------------------===// -// Operand Float to Integer Conversion.. +// Convert Float Operand to Integer for Non-HW-supported Operations. //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { @@ -689,6 +726,8 @@ switch (N->getOpcode()) { default: + if (CanSkipSoftenFloatOperand(N, OpNo)) + return false; #ifndef NDEBUG dbgs() << "SoftenFloatOperand Op #" << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"; @@ -704,14 +743,23 @@ case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break; case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; - case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break; + case ISD::STORE: + Res = SoftenFloatOp_STORE(N, OpNo); + // Do not try to analyze or soften this node again if the value is + // or can be held in a register. In that case, Res.getNode() should + // be equal to N. + if (Res.getNode() == N && + isLegalInHWReg(N->getOperand(OpNo).getValueType())) + return false; + // Otherwise, we need to reanalyze and lower the new Res nodes. + break; } // If the result is null, the sub-method took care of registering results etc. if (!Res.getNode()) return false; // If the result is N, the sub-method updated N in place. Tell the legalizer - // core about this. + // core about this to re-analyze. if (Res.getNode() == N) return true; @@ -722,6 +770,41 @@ return false; } +bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) { + if (!isLegalInHWReg(N->getOperand(OpNo).getValueType())) + return false; + // When the operand type can be kept in registers, SoftenFloatResult + // will call ReplaceValueWith to replace all references and we can + // skip softening this operand. + switch (N->getOperand(OpNo).getOpcode()) { + case ISD::BITCAST: + case ISD::ConstantFP: + case ISD::CopyFromReg: + case ISD::CopyToReg: + case ISD::FABS: + case ISD::FCOPYSIGN: + case ISD::FNEG: + case ISD::Register: + case ISD::SELECT: + case ISD::SELECT_CC: + return true; + } + // For some opcodes, SoftenFloatResult handles all conversion of softening + // and replacing operands, so that there is no need to soften operands + // again, although such opcode could be scanned for other illegal operands. + switch (N->getOpcode()) { + case ISD::ConstantFP: + case ISD::CopyFromReg: + case ISD::CopyToReg: + case ISD::FABS: + case ISD::FCOPYSIGN: + case ISD::FNEG: + case ISD::Register: + return true; + } + return false; +} + SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), GetSoftenedFloat(N->getOperand(0))); Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -72,6 +72,20 @@ return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal; } + /// isSimpleLegalType - Return true if this is a simple legal type. + bool isSimpleLegalType(EVT VT) const { + return VT.isSimple() && TLI.isTypeLegal(VT); + } + + /// isLegalInHWReg - Return true if this type can be passed in registers. + /// For example, x86_64's f128, should to be legally in registers + /// and only some operations converted to library calls or integer + /// bitwise operations. + bool isLegalInHWReg(EVT VT) const { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return VT == NVT && isSimpleLegalType(VT); + } + EVT getSetCCResultType(EVT VT) const { return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); } @@ -372,32 +386,48 @@ // Float to Integer Conversion Support: LegalizeFloatTypes.cpp //===--------------------------------------------------------------------===// - /// GetSoftenedFloat - Given a processed operand Op which was converted to an - /// integer of the same size, this returns the integer. The integer contains - /// exactly the same bits as Op - only the type changed. For example, if Op - /// is an f32 which was softened to an i32, then this method returns an i32, - /// the bits of which coincide with those of Op. + /// GetSoftenedFloat - Given an operand Op of Float type, returns the integer + /// if the Op is not supported in target HW and converted to the integer. + /// The integer contains exactly the same bits as Op - only the type changed. + /// For example, if Op is an f32 which was softened to an i32, then this method + /// returns an i32, the bits of which coincide with those of Op. + /// If the Op can be efficiently supported in target HW or the operand must + /// stay in a register, the Op is not converted to an integer. + /// In that case, the given op is returned. SDValue GetSoftenedFloat(SDValue Op) { SDValue &SoftenedOp = SoftenedFloats[Op]; + if (!SoftenedOp.getNode() && + isSimpleLegalType(Op.getValueType())) + return Op; RemapValue(SoftenedOp); assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?"); return SoftenedOp; } void SetSoftenedFloat(SDValue Op, SDValue Result); - // Result Float to Integer Conversion. - void SoftenFloatResult(SDNode *N, unsigned OpNo); + // Call ReplaceValueWith(SDValue(N, ResNo), Res) if necessary. + void ReplaceSoftenFloatResult(SDNode *N, unsigned ResNo, SDValue &NewRes) { + // When the result type can be kept in HW registers, the converted + // NewRes node could have the same type. We can save the effort in + // cloning every user of N in SoftenFloatOperand or other legalization functions, + // by calling ReplaceValueWith here to update all users. + if (NewRes.getNode() != N && isLegalInHWReg(N->getValueType(ResNo))) + ReplaceValueWith(SDValue(N, ResNo), NewRes); + } + + // Convert Float Results to Integer for Non-HW-supported Operations. + bool SoftenFloatResult(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); - SDValue SoftenFloatRes_BITCAST(SDNode *N); + SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); - SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N); + SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); - SDValue SoftenFloatRes_FABS(SDNode *N); + SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FMINNUM(SDNode *N); SDValue SoftenFloatRes_FMAXNUM(SDNode *N); SDValue SoftenFloatRes_FADD(SDNode *N); SDValue SoftenFloatRes_FCEIL(SDNode *N); - SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N); + SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FCOS(SDNode *N); SDValue SoftenFloatRes_FDIV(SDNode *N); SDValue SoftenFloatRes_FEXP(SDNode *N); @@ -409,7 +439,7 @@ SDValue SoftenFloatRes_FMA(SDNode *N); SDValue SoftenFloatRes_FMUL(SDNode *N); SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); - SDValue SoftenFloatRes_FNEG(SDNode *N); + SDValue SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FP_EXTEND(SDNode *N); SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N); SDValue SoftenFloatRes_FP_ROUND(SDNode *N); @@ -422,14 +452,19 @@ SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); SDValue SoftenFloatRes_FTRUNC(SDNode *N); - SDValue SoftenFloatRes_LOAD(SDNode *N); - SDValue SoftenFloatRes_SELECT(SDNode *N); - SDValue SoftenFloatRes_SELECT_CC(SDNode *N); + SDValue SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_UNDEF(SDNode *N); SDValue SoftenFloatRes_VAARG(SDNode *N); SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N); - // Operand Float to Integer Conversion. + // Return true if we can skip softening the given operand or SDNode because + // it was soften before by SoftenFloatResult and references to the operand + // were replaced by ReplaceValueWith. + bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo); + + // Convert Float Operand to Integer for Non-HW-supported Operations. bool SoftenFloatOperand(SDNode *N, unsigned OpNo); SDValue SoftenFloatOp_BITCAST(SDNode *N); SDValue SoftenFloatOp_BR_CC(SDNode *N); Index: lib/CodeGen/SelectionDAG/LegalizeTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -238,9 +238,13 @@ Changed = true; goto NodeDone; case TargetLowering::TypeSoftenFloat: - SoftenFloatResult(N, i); - Changed = true; - goto NodeDone; + Changed = SoftenFloatResult(N, i); + if (Changed) + goto NodeDone; + // If not changed, the result type should be legally in register. + assert(isLegalInHWReg(ResultVT) && + "Unchanged SoftenFloatResult should be legal in register!"); + goto ScanOperands; case TargetLowering::TypeExpandFloat: ExpandFloatResult(N, i); Changed = true; @@ -411,18 +415,27 @@ bool Failed = false; // Check that all result types are legal. + // A value type is illegal if its TypeAction is not TypeLegal, + // and TLI.RegClassForVT does not have a register class for this type. + // For example, the x86_64 target has f128 that is not TypeLegal, + // to have softened operators, but it also has FR128 register class to + // pass and return f128 values. Hence a legalized node can have f128 type. if (!IgnoreNodeResults(&Node)) for (unsigned i = 0, NumVals = Node.getNumValues(); i < NumVals; ++i) - if (!isTypeLegal(Node.getValueType(i))) { - dbgs() << "Result type " << i << " illegal!\n"; + if (!isTypeLegal(Node.getValueType(i)) && + !TLI.isTypeLegal(Node.getValueType(i))) { + dbgs() << "Result type " << i << " illegal: "; + Node.dump(); Failed = true; } // Check that all operand types are legal. for (unsigned i = 0, NumOps = Node.getNumOperands(); i < NumOps; ++i) if (!IgnoreNodeResults(Node.getOperand(i).getNode()) && - !isTypeLegal(Node.getOperand(i).getValueType())) { - dbgs() << "Operand type " << i << " illegal!\n"; + !isTypeLegal(Node.getOperand(i).getValueType()) && + !TLI.isTypeLegal(Node.getOperand(i).getValueType())) { + dbgs() << "Operand type " << i << " illegal: "; + Node.getOperand(i).dump(); Failed = true; } @@ -748,13 +761,23 @@ } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { - assert(Result.getValueType() == - TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + // f128 of x86_64 could be kept in SSE registers, + // but sometimes softened to i128. + assert((Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) || + Op.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && "Invalid type for softened float"); AnalyzeNewValue(Result); SDValue &OpEntry = SoftenedFloats[Op]; - assert(!OpEntry.getNode() && "Node is already converted to integer!"); + // Allow repeated calls to save f128 type nodes + // or any node with type that transforms to itself. + // Many operations on these types are not softened. + assert((!OpEntry.getNode()|| + Op.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && + "Node is already converted to integer!"); OpEntry = Result; } Index: lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -53,12 +53,17 @@ case TargetLowering::TypePromoteFloat: llvm_unreachable("Bitcast of a promotion-needing float should never need" "expansion"); - case TargetLowering::TypeSoftenFloat: - // Convert the integer operand instead. - SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); + case TargetLowering::TypeSoftenFloat: { + // Expand the floating point operand only if it was converted to integers. + // Otherwise, it is a legal type like f128 that can be saved in a register. + auto SoftenedOp = GetSoftenedFloat(InOp); + if (SoftenedOp == InOp) + break; + SplitInteger(SoftenedOp, Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; + } case TargetLowering::TypeExpandInteger: case TargetLowering::TypeExpandFloat: { auto &DL = DAG.getDataLayout(); Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2893,8 +2893,10 @@ return getConstantFP(APFloat(APFloat::IEEEhalf, Val), DL, VT); if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) return getConstantFP(APFloat(APFloat::IEEEsingle, Val), DL, VT); - else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) + if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) return getConstantFP(APFloat(APFloat::IEEEdouble, Val), DL, VT); + if (VT == MVT::f128 && C->getValueType(0) == MVT::i128) + return getConstantFP(APFloat(APFloat::IEEEquad, Val), DL, VT); break; case ISD::BSWAP: return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(), Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -626,7 +626,10 @@ static bool printOperand(raw_ostream &OS, const SelectionDAG *G, const SDValue Value) { - if (shouldPrintInline(*Value.getNode())) { + if (!Value.getNode()) { + OS << ""; + return false; + } else if (shouldPrintInline(*Value.getNode())) { OS << Value->getOperationName(G) << ':'; Value->print_types(OS, G); Value->print_details(OS, G); Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1072,7 +1072,9 @@ Op.getOperand(0).getValueType().isFloatingPoint()) { bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); - if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) { + if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple() && + Op.getOperand(0).getValueType() != MVT::f128) { + // Cannot eliminate/lower SHL for f128 yet. EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32; // Make a FGETSIGN + SHL to move the sign bit into the appropriate // place. We expect the SHL to be eliminated by other optimizations. Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -1654,6 +1654,10 @@ if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger) Cost *= 2; + // Do not loop with f128 type. + if (MTy == LK.second) + return std::make_pair(Cost, MTy.getSimpleVT()); + // Keep legalizing the type. MTy = LK.second; } Index: lib/CodeGen/TargetRegisterInfo.cpp =================================================================== --- lib/CodeGen/TargetRegisterInfo.cpp +++ lib/CodeGen/TargetRegisterInfo.cpp @@ -171,16 +171,24 @@ static inline const TargetRegisterClass *firstCommonClass(const uint32_t *A, const uint32_t *B, - const TargetRegisterInfo *TRI) { + const TargetRegisterInfo *TRI, + const MVT::SimpleValueType SVT = + MVT::SimpleValueType::Any) { + const MVT VT(SVT); for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32) - if (unsigned Common = *A++ & *B++) - return TRI->getRegClass(I + countTrailingZeros(Common)); + if (unsigned Common = *A++ & *B++) { + const TargetRegisterClass *RC = + TRI->getRegClass(I + countTrailingZeros(Common)); + if (SVT == MVT::SimpleValueType::Any || RC->hasType(VT)) + return RC; + } return nullptr; } const TargetRegisterClass * TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A, - const TargetRegisterClass *B) const { + const TargetRegisterClass *B, + const MVT::SimpleValueType SVT) const { // First take care of the trivial cases. if (A == B) return A; @@ -189,7 +197,7 @@ // Register classes are ordered topologically, so the largest common // sub-class it the common sub-class with the smallest ID. - return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this); + return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this, SVT); } const TargetRegisterClass * Index: lib/Target/X86/X86CallingConv.td =================================================================== --- lib/Target/X86/X86CallingConv.td +++ lib/Target/X86/X86CallingConv.td @@ -158,6 +158,7 @@ // The X86-64 calling convention always returns FP values in XMM0. CCIfType<[f32], CCAssignToReg<[XMM0, XMM1]>>, CCIfType<[f64], CCAssignToReg<[XMM0, XMM1]>>, + CCIfType<[f128], CCAssignToReg<[XMM0, XMM1]>>, // MMX vector types are always returned in XMM0. CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>, @@ -293,7 +294,7 @@ CCIfType<[v64i1], CCPromoteToType>, // The first 8 FP/Vector arguments are passed in XMM registers. - CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCIfType<[f32, f64, f128, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCIfSubtarget<"hasSSE1()", CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, @@ -318,7 +319,7 @@ // Long doubles get stack slots whose size and alignment depends on the // subtarget. - CCIfType<[f80], CCAssignToStack<0, 0>>, + CCIfType<[f80, f128], CCAssignToStack<0, 0>>, // Vectors get 16-byte stack slots that are 16-byte aligned. CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -296,6 +296,7 @@ setOperationAction(ISD::BR_CC , MVT::f32, Expand); setOperationAction(ISD::BR_CC , MVT::f64, Expand); setOperationAction(ISD::BR_CC , MVT::f80, Expand); + setOperationAction(ISD::BR_CC , MVT::f128, Expand); setOperationAction(ISD::BR_CC , MVT::i8, Expand); setOperationAction(ISD::BR_CC , MVT::i16, Expand); setOperationAction(ISD::BR_CC , MVT::i32, Expand); @@ -303,6 +304,7 @@ setOperationAction(ISD::SELECT_CC , MVT::f32, Expand); setOperationAction(ISD::SELECT_CC , MVT::f64, Expand); setOperationAction(ISD::SELECT_CC , MVT::f80, Expand); + setOperationAction(ISD::SELECT_CC , MVT::f128, Expand); setOperationAction(ISD::SELECT_CC , MVT::i8, Expand); setOperationAction(ISD::SELECT_CC , MVT::i16, Expand); setOperationAction(ISD::SELECT_CC , MVT::i32, Expand); @@ -415,12 +417,14 @@ setOperationAction(ISD::SELECT , MVT::f32 , Custom); setOperationAction(ISD::SELECT , MVT::f64 , Custom); setOperationAction(ISD::SELECT , MVT::f80 , Custom); + setOperationAction(ISD::SELECT , MVT::f128 , Custom); setOperationAction(ISD::SETCC , MVT::i8 , Custom); setOperationAction(ISD::SETCC , MVT::i16 , Custom); setOperationAction(ISD::SETCC , MVT::i32 , Custom); setOperationAction(ISD::SETCC , MVT::f32 , Custom); setOperationAction(ISD::SETCC , MVT::f64 , Custom); setOperationAction(ISD::SETCC , MVT::f80 , Custom); + setOperationAction(ISD::SETCC , MVT::f128 , Custom); setOperationAction(ISD::SETCCE , MVT::i8 , Custom); setOperationAction(ISD::SETCCE , MVT::i16 , Custom); setOperationAction(ISD::SETCCE , MVT::i32 , Custom); @@ -619,8 +623,16 @@ setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::FMA, MVT::f32, Expand); - // Long double always uses X87. + // Long double always uses X87, except f128 in MMX. if (!Subtarget->useSoftFloat()) { + if (Subtarget->is64Bit() && Subtarget->hasMMX()) { + addRegisterClass(MVT::f128, &X86::FR128RegClass); + ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat); + setOperationAction(ISD::FABS , MVT::f128, Custom); + setOperationAction(ISD::FNEG , MVT::f128, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom); + } + addRegisterClass(MVT::f80, &X86::RFP80RegClass); setOperationAction(ISD::UNDEF, MVT::f80, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); @@ -2353,7 +2365,7 @@ EVT CopyVT = VA.getLocVT(); // If this is x86-64, and we disabled SSE, we can't return FP values - if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && + if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) && ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) { report_fatal_error("SSE register return with SSE disabled"); } @@ -2637,6 +2649,8 @@ RC = &X86::FR32RegClass; else if (RegVT == MVT::f64) RC = &X86::FR64RegClass; + else if (RegVT == MVT::f128) + RC = &X86::FR128RegClass; else if (RegVT.is512BitVector()) RC = &X86::VR512RegClass; else if (RegVT.is256BitVector()) @@ -13385,6 +13399,8 @@ SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); + bool IsF128 = (VT == MVT::f128); + // FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOpt::Level to // decide if we should generate a 16-byte constant mask when we only need 4 or // 8 bytes for the scalar case. @@ -13397,6 +13413,11 @@ LogicVT = VT; EltVT = VT.getVectorElementType(); NumElts = VT.getVectorNumElements(); + } else if (IsF128) { + // SSE instructions are used for optimized f128 logical operations. + LogicVT = MVT::f128; + EltVT = VT; + NumElts = 1; } else { // There are no scalar bitwise logical SSE/AVX instructions, so we // generate a 16-byte vector constant and logic op even for the scalar case. @@ -13428,7 +13449,7 @@ IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR; SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0; - if (VT.isVector()) + if (VT.isVector() || IsF128) return DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask); // For the scalar case extend to a 128-bit vector, perform the logic op, @@ -13447,6 +13468,7 @@ SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); MVT SrcVT = Op1.getSimpleValueType(); + bool IsF128 = (VT == MVT::f128); // If second operand is smaller, extend it first. if (SrcVT.bitsLT(VT)) { @@ -13461,13 +13483,16 @@ // At this point the operands and the result should have the same // type, and that won't be f80 since that is not custom lowered. + assert((VT == MVT::f64 || VT == MVT::f32 || IsF128) && + "Unexpected type in LowerFCOPYSIGN"); const fltSemantics &Sem = - VT == MVT::f64 ? APFloat::IEEEdouble : APFloat::IEEEsingle; + VT == MVT::f64 ? APFloat::IEEEdouble : + (IsF128 ? APFloat::IEEEquad : APFloat::IEEEsingle); const unsigned SizeInBits = VT.getSizeInBits(); SmallVector CV( - VT == MVT::f64 ? 2 : 4, + VT == MVT::f64 ? 2 : (IsF128 ? 1 : 4), ConstantFP::get(*Context, APFloat(Sem, APInt(SizeInBits, 0)))); // First, clear all bits but the sign bit from the second operand (sign). @@ -13480,12 +13505,13 @@ // Perform all logic operations as 16-byte vectors because there are no // scalar FP logic instructions in SSE. This allows load folding of the // constants into the logic instructions. - MVT LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32; + MVT LogicVT = (VT == MVT::f64) ? MVT::v2f64 : (IsF128 ? MVT::f128 : MVT::v4f32); SDValue Mask1 = DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, false, false, 16); - Op1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op1); + if (!IsF128) + Op1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op1); SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op1, Mask1); // Next, clear the sign bit from the first operand (magnitude). @@ -13494,8 +13520,9 @@ APFloat APF = Op0CN->getValueAPF(); // If the magnitude is a positive zero, the sign bit alone is enough. if (APF.isPosZero()) - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, SignBit, - DAG.getIntPtrConstant(0, dl)); + return IsF128 ? SignBit : + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, SignBit, + DAG.getIntPtrConstant(0, dl)); APF.clearSign(); CV[0] = ConstantFP::get(*Context, APF); } else { @@ -13511,13 +13538,15 @@ false, false, false, 16); // If the magnitude operand wasn't a constant, we need to AND out the sign. if (!isa(Op0)) { - Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op0); + if (!IsF128) + Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op0); Val = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op0, Val); } // OR the magnitude value with the sign bit. Val = DAG.getNode(X86ISD::FOR, dl, LogicVT, Val, SignBit); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, Val, - DAG.getIntPtrConstant(0, dl)); + return IsF128 ? Val : + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, Val, + DAG.getIntPtrConstant(0, dl)); } static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) { @@ -14593,6 +14622,7 @@ // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of // these. if ((isOneConstant(Op1) || isNullConstant(Op1)) && + Op1.getValueType() != MVT::i128 && // getZExtValue() works up to i64 only. (CC == ISD::SETEQ || CC == ISD::SETNE)) { // If the input is a setcc, then reuse the input setcc or use a new one with @@ -21998,6 +22028,7 @@ return EmitLoweredTLSCall(MI, BB); case X86::CMOV_FR32: case X86::CMOV_FR64: + case X86::CMOV_FR128: case X86::CMOV_GR8: case X86::CMOV_GR16: case X86::CMOV_GR32: @@ -23661,7 +23692,8 @@ // ignored in unsafe-math mode). // We also try to create v2f32 min/max nodes, which we later widen to v4f32. if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() && - VT != MVT::f80 && (TLI.isTypeLegal(VT) || VT == MVT::v2f32) && + VT != MVT::f80 && VT != MVT::f128 && + (TLI.isTypeLegal(VT) || VT == MVT::v2f32) && (Subtarget->hasSSE2() || (Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) { ISD::CondCode CC = cast(Cond.getOperand(2))->get(); @@ -27749,6 +27781,7 @@ case MVT::f64: case MVT::i64: return std::make_pair(0U, &X86::FR64RegClass); + // TODO: handle f128 and i128 in FR128RegClass. // Vector types. case MVT::v16i8: case MVT::v8i16: @@ -27861,6 +27894,7 @@ // target independent register mapper will just pick the first match it can // find, ignoring the required type. + // TODO: handle f128 and i128 in FR128RegClass. if (VT == MVT::f32 || VT == MVT::i32) Res.second = &X86::FR32RegClass; else if (VT == MVT::f64 || VT == MVT::i64) Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -512,6 +512,7 @@ defm _FR32 : CMOVrr_PSEUDO; defm _FR64 : CMOVrr_PSEUDO; + defm _FR128 : CMOVrr_PSEUDO; defm _V4F32 : CMOVrr_PSEUDO; defm _V2F64 : CMOVrr_PSEUDO; defm _V2I64 : CMOVrr_PSEUDO; Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -953,11 +953,12 @@ return false; }]>; -def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>; -def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>; -def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>; -def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>; -def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>; +def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>; +def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>; +def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>; +def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>; +def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>; +def loadf128 : PatFrag<(ops node:$ptr), (f128 (load node:$ptr))>; def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>; def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>; Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -413,6 +413,8 @@ def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; + def : Pat<(f128 (bitconvert (i128 FR128:$src))), (f128 FR128:$src)>; + def : Pat<(i128 (bitconvert (f128 FR128:$src))), (i128 FR128:$src)>; } // Bitcasts between 256-bit vector types. Return the original type since @@ -8851,3 +8853,48 @@ defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", VR128, vx32mem, vy32mem>; } } + +//===----------------------------------------------------------------------===// +// Extra selection patterns for FR128, f128, f128mem + +def : Pat<(store (f128 FR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, (COPY_TO_REGCLASS (f128 FR128:$src), VR128))>; +// When the data is used as floating point, "movaps" should be faster and shorter +// than "movdqa". "movaps" is in SSE and movdqa is in SSE2. + +def : Pat<(loadf128 addr:$src), + (COPY_TO_REGCLASS (MOVAPSrm addr:$src), FR128)>; + +// andps is faster and shorter than andpd, andps is SSE and andpd is SSE2 +def : Pat<(X86fand FR128:$src1, (loadf128 addr:$src2)), + (COPY_TO_REGCLASS (ANDPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2), FR128)>; + +def : Pat<(X86fand FR128:$src1, FR128:$src2), + (COPY_TO_REGCLASS (ANDPSrr (COPY_TO_REGCLASS FR128:$src1, VR128), + (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>; + +def : Pat<(and FR128:$src1, FR128:$src2), + (COPY_TO_REGCLASS (ANDPSrr (COPY_TO_REGCLASS FR128:$src1, VR128), + (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>; + +def : Pat<(X86for FR128:$src1, (loadf128 addr:$src2)), + (COPY_TO_REGCLASS (ORPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2), FR128)>; + +def : Pat<(X86for FR128:$src1, FR128:$src2), + (COPY_TO_REGCLASS (ORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128), + (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>; + +def : Pat<(or FR128:$src1, FR128:$src2), + (COPY_TO_REGCLASS (ORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128), + (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>; + +def : Pat<(X86fxor FR128:$src1, (loadf128 addr:$src2)), + (COPY_TO_REGCLASS (XORPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2), FR128)>; + +def : Pat<(X86fxor FR128:$src1, FR128:$src2), + (COPY_TO_REGCLASS (XORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128), + (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>; + +def : Pat<(xor FR128:$src1, FR128:$src2), + (COPY_TO_REGCLASS (XORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128), + (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>; Index: lib/Target/X86/X86MCInstLower.cpp =================================================================== --- lib/Target/X86/X86MCInstLower.cpp +++ lib/Target/X86/X86MCInstLower.cpp @@ -1373,7 +1373,19 @@ if (isa(COp)) { CS << "u"; } else if (auto *CI = dyn_cast(COp)) { - CS << CI->getZExtValue(); + if (CI->getBitWidth() <= 64) { + CS << CI->getZExtValue(); + } else { + // print multi-word constant as (w0,w1) + auto Val = CI->getValue(); + CS << "("; + for (int i = 0, N = Val.getNumWords(); i < N; ++i) { + if (i > 0) + CS << ","; + CS << Val.getRawData()[i]; + } + CS << ")"; + } } else if (auto *CF = dyn_cast(COp)) { SmallString<32> Str; CF->getValueAPF().toString(Str); Index: lib/Target/X86/X86RegisterInfo.td =================================================================== --- lib/Target/X86/X86RegisterInfo.td +++ lib/Target/X86/X86RegisterInfo.td @@ -423,6 +423,8 @@ def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>; +def FR128 : RegisterClass<"X86", [i128, f128], 128, (add FR32)>; + // FIXME: This sets up the floating point register files as though they are f64 // values, though they really are f80 values. This will cause us to spill Index: test/CodeGen/X86/fp128-calling-conv.ll =================================================================== --- test/CodeGen/X86/fp128-calling-conv.ll +++ test/CodeGen/X86/fp128-calling-conv.ll @@ -0,0 +1,197 @@ +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s + +; double myD = 1.0; +@myD = global double 1.000000e+00, align 8 + +; long double myFP80 = 1.0L; // x86_64-linux-gnu +@myFP80 = global x86_fp80 0xK3FFF8000000000000000, align 16 + +; long double myFP128 = 1.0L; // x86_64-linux-android +@myFP128 = global fp128 0xL00000000000000003FFF000000000000, align 16 + +; The first few parameters are passed in registers and the other are on stack. + +define i64 @TestParam_L_0(i64 %d0, i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, i64 %d10, i64 %d11, i64 %d12, i64 %d13, i64 %d14, i64 %d15, i64 %d16, i64 %d17, i64 %d18, i64 %d19) { +entry: + ret i64 %d0 +; CHECK-LABEL: TestParam_L_0: +; CHECK: movq %rdi, %rax +; CHECK-NEXT: retq +} + +define i64 @TestParam_L_1(i64 %d0, i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, i64 %d10, i64 %d11, i64 %d12, i64 %d13, i64 %d14, i64 %d15, i64 %d16, i64 %d17, i64 %d18, i64 %d19) { +entry: + ret i64 %d1 +; CHECK-LABEL: TestParam_L_1: +; CHECK: movq %rsi, %rax +; CHECK-NEXT: retq +} + +define i64 @TestParam_L_2(i64 %d0, i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, i64 %d10, i64 %d11, i64 %d12, i64 %d13, i64 %d14, i64 %d15, i64 %d16, i64 %d17, i64 %d18, i64 %d19) { +entry: + ret i64 %d2 +; CHECK-LABEL: TestParam_L_2: +; CHECK: movq %rdx, %rax +; CHECK-NEXT: retq +} + +define i64 @TestParam_L_3(i64 %d0, i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, i64 %d10, i64 %d11, i64 %d12, i64 %d13, i64 %d14, i64 %d15, i64 %d16, i64 %d17, i64 %d18, i64 %d19) { +entry: + ret i64 %d3 +; CHECK-LABEL: TestParam_L_3: +; CHECK: movq %rcx, %rax +; CHECK-NEXT: retq +} + +define i64 @TestParam_L_4(i64 %d0, i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, i64 %d10, i64 %d11, i64 %d12, i64 %d13, i64 %d14, i64 %d15, i64 %d16, i64 %d17, i64 %d18, i64 %d19) { +entry: + ret i64 %d4 +; CHECK-LABEL: TestParam_L_4: +; CHECK: movq %r8, %rax +; CHECK-NEXT: retq +} + +define i64 @TestParam_L_5(i64 %d0, i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, i64 %d10, i64 %d11, i64 %d12, i64 %d13, i64 %d14, i64 %d15, i64 %d16, i64 %d17, i64 %d18, i64 %d19) { +entry: + ret i64 %d5 +; CHECK-LABEL: TestParam_L_5: +; CHECK: movq %r9, %rax +; CHECK-NEXT: retq +} + +define i64 @TestParam_L_6(i64 %d0, i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, i64 %d10, i64 %d11, i64 %d12, i64 %d13, i64 %d14, i64 %d15, i64 %d16, i64 %d17, i64 %d18, i64 %d19) { +entry: + ret i64 %d6 +; CHECK-LABEL: TestParam_L_6: +; CHECK: movq 8(%rsp), %rax +; CHECK-NEXT: retq +} + +define i64 @TestParam_L_7(i64 %d0, i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, i64 %d10, i64 %d11, i64 %d12, i64 %d13, i64 %d14, i64 %d15, i64 %d16, i64 %d17, i64 %d18, i64 %d19) { +entry: + ret i64 %d7 +; CHECK-LABEL: TestParam_L_7: +; CHECK: movq 16(%rsp), %rax +; CHECK-NEXT: retq +} + +define float @TestParam_F_0(float %d0, float %d1, float %d2, float %d3, float %d4, float %d5, float %d6, float %d7, float %d8, float %d9, float %d10, float %d11, float %d12, float %d13, float %d14, float %d15, float %d16, float %d17, float %d18, float %d19) { +entry: + ret float %d0 +; CHECK-LABEL: TestParam_F_0: +; CHECK-NOT: mov +; CHECK: retq +} + +define float @TestParam_F_1(float %d0, float %d1, float %d2, float %d3, float %d4, float %d5, float %d6, float %d7, float %d8, float %d9, float %d10, float %d11, float %d12, float %d13, float %d14, float %d15, float %d16, float %d17, float %d18, float %d19) { +entry: + ret float %d1 +; CHECK-LABEL: TestParam_F_1: +; CHECK: movaps %xmm1, %xmm0 +; CHECK-NEXT: retq +} + +define float @TestParam_F_7(float %d0, float %d1, float %d2, float %d3, float %d4, float %d5, float %d6, float %d7, float %d8, float %d9, float %d10, float %d11, float %d12, float %d13, float %d14, float %d15, float %d16, float %d17, float %d18, float %d19) { +entry: + ret float %d7 +; CHECK-LABEL: TestParam_F_7: +; CHECK: movaps %xmm7, %xmm0 +; CHECK-NEXT: retq +} + +define float @TestParam_F_8(float %d0, float %d1, float %d2, float %d3, float %d4, float %d5, float %d6, float %d7, float %d8, float %d9, float %d10, float %d11, float %d12, float %d13, float %d14, float %d15, float %d16, float %d17, float %d18, float %d19) { +entry: + ret float %d8 +; CHECK-LABEL: TestParam_F_8: +; CHECK: movss 8(%rsp), %xmm0 +; CHECK-NEXT: retq +} + +define float @TestParam_F_9(float %d0, float %d1, float %d2, float %d3, float %d4, float %d5, float %d6, float %d7, float %d8, float %d9, float %d10, float %d11, float %d12, float %d13, float %d14, float %d15, float %d16, float %d17, float %d18, float %d19) { +entry: + ret float %d9 +; CHECK-LABEL: TestParam_F_9: +; CHECK: movss 16(%rsp), %xmm0 +; CHECK-NEXT: retq +} + +define double @TestParam_D_0(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10, double %d11, double %d12, double %d13, double %d14, double %d15, double %d16, double %d17, double %d18, double %d19) { +entry: + ret double %d0 +; CHECK-LABEL: TestParam_D_0: +; CHECK-NOT: mov +; CHECK: retq +} + +define double @TestParam_D_1(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10, double %d11, double %d12, double %d13, double %d14, double %d15, double %d16, double %d17, double %d18, double %d19) { +entry: + ret double %d1 +; CHECK-LABEL: TestParam_D_1: +; CHECK: movaps %xmm1, %xmm0 +; CHECK-NEXT: retq +} + +define double @TestParam_D_7(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10, double %d11, double %d12, double %d13, double %d14, double %d15, double %d16, double %d17, double %d18, double %d19) { +entry: + ret double %d7 +; CHECK-LABEL: TestParam_D_7: +; CHECK: movaps %xmm7, %xmm0 +; CHECK-NEXT: retq +} + +define double @TestParam_D_8(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10, double %d11, double %d12, double %d13, double %d14, double %d15, double %d16, double %d17, double %d18, double %d19) { +entry: + ret double %d8 +; CHECK-LABEL: TestParam_D_8: +; CHECK: movsd 8(%rsp), %xmm0 +; CHECK-NEXT: retq +} + +define double @TestParam_D_9(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10, double %d11, double %d12, double %d13, double %d14, double %d15, double %d16, double %d17, double %d18, double %d19) { +entry: + ret double %d9 +; CHECK-LABEL: TestParam_D_9: +; CHECK: movsd 16(%rsp), %xmm0 +; CHECK-NEXT: retq +} + +define fp128 @TestParam_FP128_0(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) { +entry: + ret fp128 %d0 +; CHECK-LABEL: TestParam_FP128_0: +; CHECK-NOT: mov +; CHECK: retq +} + +define fp128 @TestParam_FP128_1(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) { +entry: + ret fp128 %d1 +; CHECK-LABEL: TestParam_FP128_1: +; CHECK: movaps %xmm1, %xmm0 +; CHECK-NEXT: retq +} + +define fp128 @TestParam_FP128_7(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) { +entry: + ret fp128 %d7 +; CHECK-LABEL: TestParam_FP128_7: +; CHECK: movaps %xmm7, %xmm0 +; CHECK-NEXT: retq +} + +define fp128 @TestParam_FP128_8(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) { +entry: + ret fp128 %d8 +; CHECK-LABEL: TestParam_FP128_8: +; CHECK: movaps 8(%rsp), %xmm0 +; CHECK-NEXT: retq +} + +define fp128 @TestParam_FP128_9(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) { +entry: + ret fp128 %d9 +; CHECK-LABEL: TestParam_FP128_9: +; CHECK: movaps 24(%rsp), %xmm0 +; CHECK-NEXT: retq +} Index: test/CodeGen/X86/fp128-cast.ll =================================================================== --- test/CodeGen/X86/fp128-cast.ll +++ test/CodeGen/X86/fp128-cast.ll @@ -0,0 +1,372 @@ +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s + +; Check soft floating point conversion function calls. + +@vi32 = common global i32 0, align 4 +@vi64 = common global i64 0, align 8 +@vf32 = common global float 0.000000e+00, align 4 +@vf64 = common global double 0.000000e+00, align 8 +@vf128 = common global fp128 0xL00000000000000000000000000000000, align 16 + +define void @TestCastF32_I32() { +entry: + %0 = load float, float* @vf32, align 4 + %conv = fptosi float %0 to i32 + store i32 %conv, i32* @vi32, align 4 + ret void +; CHECK-LABEL: TestCastF32_I32: +; CHECK: cvttss2si vf32(%rip), %eax +; CHECK-NEXT: movl %eax, vi32(%rip) +; CHECK-NEXT: retq +} + +define void @TestCastF32_I64() { +entry: + %0 = load float, float* @vf32, align 4 + %conv = fptosi float %0 to i32 + %conv1 = sext i32 %conv to i64 + store i64 %conv1, i64* @vi64, align 8 + ret void +; CHECK-LABEL: TestCastF32_I64: +; CHECK: cvttss2si vf32(%rip), %eax +; CHECK-NEXT: cltq +; CHECK-NEXT: movq %rax, vi64(%rip) +; CHECK-NEXT: retq +} + +define void @TestCastF32_F64() { +entry: + %0 = load float, float* @vf32, align 4 + %conv = fpext float %0 to double + store double %conv, double* @vf64, align 8 + ret void +; CHECK-LABEL: TestCastF32_F64: +; CHECK: movss vf32(%rip), %xmm0 +; CHECK-NEXT: cvtss2sd %xmm0, %xmm0 +; CHECK-NEXT: movsd %xmm0, vf64(%rip) +; CHECK-NEXT: retq +} + +define void @TestCastF32_F128() { +entry: + %0 = load float, float* @vf32, align 4 + %conv = fpext float %0 to fp128 + store fp128 %conv, fp128* @vf128, align 16 + ret void +; CHECK-LABEL: TestCastF32_F128: +; CHECK: movss vf32(%rip), %xmm0 +; CHECK-NEXT: callq __extendsftf2 +; CHECK-NEXT: movaps %xmm0, vf128(%rip) +; CHECK: retq +} + +define void @TestCastF64_I32() { +entry: + %0 = load double, double* @vf64, align 8 + %conv = fptosi double %0 to i32 + store i32 %conv, i32* @vi32, align 4 + ret void +; CHECK-LABEL: TestCastF64_I32: +; CHECK: cvttsd2si vf64(%rip), %eax +; CHECK-NEXT: movl %eax, vi32(%rip) +; CHECK-NEXT: retq +} + +define void @TestCastF64_I64() { +entry: + %0 = load double, double* @vf64, align 8 + %conv = fptosi double %0 to i32 + %conv1 = sext i32 %conv to i64 + store i64 %conv1, i64* @vi64, align 8 + ret void +; CHECK-LABEL: TestCastF64_I64: +; CHECK: cvttsd2si vf64(%rip), %eax +; CHECK-NEXT: cltq +; CHECK-NEXT: movq %rax, vi64(%rip) +; CHECK-NEXT: retq +} + +define void @TestCastF64_F32() { +entry: + %0 = load double, double* @vf64, align 8 + %conv = fptrunc double %0 to float + store float %conv, float* @vf32, align 4 + ret void +; CHECK-LABEL: TestCastF64_F32: +; CHECK: movsd vf64(%rip), %xmm0 +; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0 +; CHECK-NEXT: movss %xmm0, vf32(%rip) +; CHECK-NEXT: retq +} + +define void @TestCastF64_F128() { +entry: + %0 = load double, double* @vf64, align 8 + %conv = fpext double %0 to fp128 + store fp128 %conv, fp128* @vf128, align 16 + ret void +; CHECK-LABEL: TestCastF64_F128: +; CHECK: movsd vf64(%rip), %xmm0 +; CHECK-NEXT: callq __extenddftf2 +; CHECK-NEXT: movapd %xmm0, vf128(%rip) +; CHECK: ret +} + +define void @TestCastF128_I32() { +entry: + %0 = load fp128, fp128* @vf128, align 16 + %conv = fptosi fp128 %0 to i32 + store i32 %conv, i32* @vi32, align 4 + ret void +; CHECK-LABEL: TestCastF128_I32: +; CHECK: movaps vf128(%rip), %xmm0 +; CHECK-NEXT: callq __fixtfsi +; CHECK-NEXT: movl %eax, vi32(%rip) +; CHECK: retq +} + +define void @TestCastF128_I64() { +entry: + %0 = load fp128, fp128* @vf128, align 16 + %conv = fptosi fp128 %0 to i32 + %conv1 = sext i32 %conv to i64 + store i64 %conv1, i64* @vi64, align 8 + ret void +; CHECK-LABEL: TestCastF128_I64: +; CHECK: movaps vf128(%rip), %xmm0 +; CHECK-NEXT: callq __fixtfsi +; CHECK-NEXT: cltq +; CHECK-NEXT: movq %rax, vi64(%rip) +; CHECK: retq +} + +define void @TestCastF128_F32() { +entry: + %0 = load fp128, fp128* @vf128, align 16 + %conv = fptrunc fp128 %0 to float + store float %conv, float* @vf32, align 4 + ret void +; CHECK-LABEL: TestCastF128_F32: +; CHECK: movaps vf128(%rip), %xmm0 +; CHECK-NEXT: callq __trunctfsf2 +; CHECK-NEXT: movss %xmm0, vf32(%rip) +; CHECK: retq +} + +define void @TestCastF128_F64() { +entry: + %0 = load fp128, fp128* @vf128, align 16 + %conv = fptrunc fp128 %0 to double + store double %conv, double* @vf64, align 8 + ret void +; CHECK-LABEL: TestCastF128_F64: +; CHECK: movapd vf128(%rip), %xmm0 +; CHECK-NEXT: callq __trunctfdf2 +; CHECK-NEXT: movsd %xmm0, vf64(%rip) +; CHECK: retq +} + +define void @TestCastI32_I64() { +entry: + %0 = load i32, i32* @vi32, align 4 + %conv = sitofp i32 %0 to float + %conv1 = fptosi float %conv to i64 + store i64 %conv1, i64* @vi64, align 8 + ret void +; CHECK-LABEL: TestCastI32_I64: +; CHECK: cvtsi2ssl vi32(%rip), %xmm0 +; CHECK-NEXT: cvttss2si %xmm0, %rax +; CHECK-NEXT: movq %rax, vi64(%rip) +; CHECK-NEXT: retq +} + +define void @TestCastI32_F32() { +entry: + %0 = load i32, i32* @vi32, align 4 + %conv = sitofp i32 %0 to float + store float %conv, float* @vf32, align 4 + ret void +; CHECK-LABEL: TestCastI32_F32: +; CHECK: cvtsi2ssl vi32(%rip), %xmm0 +; CHECK-NEXT: movss %xmm0, vf32(%rip) +; CHECK-NEXT: retq +} + +define void @TestCastI32_F64() { +entry: + %0 = load i32, i32* @vi32, align 4 + %conv = sitofp i32 %0 to double + store double %conv, double* @vf64, align 8 + ret void +; CHECK-LABEL: TestCastI32_F64: +; CHECK: cvtsi2sdl vi32(%rip), %xmm0 +; CHECK-NEXT: movsd %xmm0, vf64(%rip) +; CHECK-NEXT: retq +} + +define void @TestCastI32_F128() { +entry: + %0 = load i32, i32* @vi32, align 4 + %conv = sitofp i32 %0 to fp128 + store fp128 %conv, fp128* @vf128, align 16 + ret void +; CHECK-LABEL: TestCastI32_F128: +; CHECK: movl vi32(%rip), %edi +; CHECK-NEXT: callq __floatsitf +; CHECK-NEXT: movaps %xmm0, vf128(%rip) +; CHECK: retq +} + +define void @TestCastI64_I32(){ +entry: + %0 = load i64, i64* @vi64, align 8 + %conv = sitofp i64 %0 to float + %conv1 = fptosi float %conv to i32 + store i32 %conv1, i32* @vi32, align 4 + ret void +; CHECK-LABEL: TestCastI64_I32: +; CHECK: cvtsi2ssq vi64(%rip), %xmm0 +; CHECK: cvttss2si %xmm0, %eax +; CHECK: movl %eax, vi32(%rip) +; CHECK: retq +} + +define void @TestCastI64_F32(){ +entry: + %0 = load i64, i64* @vi64, align 8 + %conv = sitofp i64 %0 to float + store float %conv, float* @vf32, align 4 + ret void +; CHECK-LABEL: TestCastI64_F32: +; CHECK: cvtsi2ssq vi64(%rip), %xmm0 +; CHECK-NEXT: movss %xmm0, vf32(%rip) +; CHECK-NEXT: retq +} + +define void @TestCastI64_F64(){ +entry: + %0 = load i64, i64* @vi64, align 8 + %conv = sitofp i64 %0 to double + store double %conv, double* @vf64, align 8 + ret void +; CHECK-LABEL: TestCastI64_F64: +; CHECK: cvtsi2sdq vi64(%rip), %xmm0 +; CHECK-NEXT: movsd %xmm0, vf64(%rip) +; CHECK-NEXT: retq +} + +define void @TestCastI64_F128(){ +entry: + %0 = load i64, i64* @vi64, align 8 + %conv = sitofp i64 %0 to fp128 + store fp128 %conv, fp128* @vf128, align 16 + ret void +; CHECK-LABEL: TestCastI64_F128: +; CHECK: movq vi64(%rip), %rdi +; CHECK-NEXT: callq __floatditf +; CHECK-NEXT: movaps %xmm0, vf128(%rip) +; CHECK: retq +} + +define i32 @TestConst32(float %v) { +entry: + %cmp = fcmp ogt float %v, 1.000000e+00 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestConst32: +; CHECK: ucomiss {{.*}}, %xmm0 +; CHECK-NEXT: seta %al +; CHECK: retq +} + +define i32 @TestConst64(double %v) { +entry: + %cmp = fcmp ogt double %v, 1.000000e+00 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestConst64: +; CHECK: ucomisd {{.*}}, %xmm0 +; CHECK-NEXT: seta %al +; CHECK: retq +} + +define i32 @TestConst128(fp128 %v) { +entry: + %cmp = fcmp ogt fp128 %v, 0xL00000000000000003FFF000000000000 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestConst128: +; CHECK: movaps {{.*}}, %xmm1 +; CHECK-NEXT: callq __gttf2 +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: setg %al +; CHECK: retq +} + +define i32 @TestBits128(fp128 %ld) { +entry: + %mul = fmul fp128 %ld, %ld + %0 = bitcast fp128 %mul to i128 + %u.sroa.0.4.extract.shift = lshr i128 %0, 32 + %or5 = or i128 %u.sroa.0.4.extract.shift, %0 + %or = trunc i128 %or5 to i32 + %cmp = icmp eq i32 %or, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestBits128: +; CHECK: movaps %xmm0, %xmm1 +; CHECK-NEXT: callq __multf3 +; CHECK-NEXT: movaps %xmm0, (%rsp) +; CHECK-NEXT: movq (%rsp), +; CHECK-NEXT: movq % +; CHECK-NEXT: shrq $32, +; CHECK: orl +; CHECK-NEXT: sete %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK: retq +} + +define fp128 @TestPair128(i64 %a, i64 %b) { +entry: + %conv = zext i64 %a to i128 + %shl = shl nuw i128 %conv, 64 + %conv1 = zext i64 %b to i128 + %or = or i128 %shl, %conv1 + %add = add i128 %or, 3 + %0 = bitcast i128 %add to fp128 + ret fp128 %0 +; CHECK-LABEL: TestPair128: +; CHECK: addq $3, %rsi +; CHECK-NEXT: movq %rsi, -24(%rsp) +; CHECK-NEXT: adcq $0, %rdi +; CHECK-NEXT: movq %rdi, -16(%rsp) +; CHECK-NEXT: movaps -24(%rsp), %xmm0 +; CHECK-NEXT: retq +} + +define fp128 @TestTruncCopysign(fp128 %x, i32 %n) { +entry: + %cmp = icmp sgt i32 %n, 50000 + br i1 %cmp, label %if.then, label %cleanup + +if.then: ; preds = %entry + %conv = fptrunc fp128 %x to double + %call = tail call double @copysign(double 0x7FF0000000000000, double %conv) #2 + %conv1 = fpext double %call to fp128 + br label %cleanup + +cleanup: ; preds = %entry, %if.then + %retval.0 = phi fp128 [ %conv1, %if.then ], [ %x, %entry ] + ret fp128 %retval.0 +; CHECK-LABEL: TestTruncCopysign: +; CHECK: callq __trunctfdf2 +; CHECK-NEXT: andpd {{.*}}, %xmm0 +; CHECK-NEXT: orpd {{.*}}, %xmm0 +; CHECK-NEXT: callq __extenddftf2 +; CHECK: retq +} + +declare double @copysign(double, double) #1 + +attributes #2 = { nounwind readnone } Index: test/CodeGen/X86/fp128-compare.ll =================================================================== --- test/CodeGen/X86/fp128-compare.ll +++ test/CodeGen/X86/fp128-compare.ll @@ -0,0 +1,211 @@ +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s + +define i32 @TestComp32GT(float %d1, float %d2) { +entry: + %cmp = fcmp ogt float %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp32GT: +; CHECK: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: seta %al +} + +define i32 @TestComp64GT(double %d1, double %d2) { +entry: + %cmp = fcmp ogt double %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp64GT: +; CHECK: ucomisd %xmm1, %xmm0 +; CHECK-NEXT: seta %al +} + +define i32 @TestComp128GT(fp128 %d1, fp128 %d2) { +entry: + %cmp = fcmp ogt fp128 %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp128GT: +; CHECK: callq __gttf2 +; CHECK: setg %al +; CHECK: retq +} + +define i32 @TestComp32GE(float %d1, float %d2) { +entry: + %cmp = fcmp oge float %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp32GE: +; CHECK: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: setae %al +} + +define i32 @TestComp64GE(double %d1, double %d2) { +entry: + %cmp = fcmp oge double %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp64GE: +; CHECK: ucomisd %xmm1, %xmm0 +; CHECK-NEXT: setae %al +} + +define i32 @TestComp128GE(fp128 %d1, fp128 %d2) { +entry: + %cmp = fcmp oge fp128 %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp128GE: +; CHECK: callq __getf2 +; CHECK: testl %eax, %eax +; CHECK: retq +} + +define i32 @TestComp32LT(float %d1, float %d2) { +entry: + %cmp = fcmp olt float %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp32LT: +; CHECK: ucomiss %xmm0, %xmm1 +; CHECK-NEXT: seta %al +} + +define i32 @TestComp64LT(double %d1, double %d2) { +entry: + %cmp = fcmp olt double %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp64LT: +; CHECK: ucomisd %xmm0, %xmm1 +; CHECK-NEXT: seta %al +} + +define i32 @TestComp128LT(fp128 %d1, fp128 %d2) { +entry: + %cmp = fcmp olt fp128 %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp128LT: +; CHECK: callq __lttf2 +; CHECK-NEXT: shrl $31, %eax +; CHECK: retq +} + +define i32 @TestComp32LE(float %d1, float %d2) { +entry: + %cmp = fcmp ole float %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp32LE: +; CHECK: ucomiss %xmm0, %xmm1 +; CHECK-NEXT: setae %al +} + +define i32 @TestComp64LE(double %d1, double %d2) { +entry: + %cmp = fcmp ole double %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp64LE: +; CHECK: ucomisd %xmm0, %xmm1 +; CHECK-NEXT: setae %al +} + +define i32 @TestComp128LE(fp128 %d1, fp128 %d2) { +entry: + %cmp = fcmp ole fp128 %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp128LE: +; CHECK: callq __letf2 +; CHECK-NEXT: testl %eax, %eax +; CHECK: retq +} + +define i32 @TestComp32EQ(float %d1, float %d2) { +entry: + %cmp = fcmp oeq float %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp32EQ: +; CHECK: cmpeqss %xmm1, %xmm0 +; CHECK-NEXT: movd %xmm0, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: retq +} + +define i32 @TestComp64EQ(double %d1, double %d2) { +entry: + %cmp = fcmp oeq double %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp64EQ: +; CHECK: cmpeqsd %xmm1, %xmm0 +; CHECK-NEXT: movd %xmm0, %rax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: retq +} + +define i32 @TestComp128EQ(fp128 %d1, fp128 %d2) { +entry: + %cmp = fcmp oeq fp128 %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp128EQ: +; CHECK: callq __eqtf2 +; CHECK-NEXT: testl %eax, %eax +; CHECK: retq +} + +define i32 @TestComp32NE(float %d1, float %d2) { +entry: + %cmp = fcmp une float %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp32NE: +; CHECK: cmpneqss %xmm1, %xmm0 +; CHECK-NEXT: movd %xmm0, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: retq +} + +define i32 @TestComp64NE(double %d1, double %d2) { +entry: + %cmp = fcmp une double %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp64NE: +; CHECK: cmpneqsd %xmm1, %xmm0 +; CHECK-NEXT: movd %xmm0, %rax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: retq +} + +define i32 @TestComp128NE(fp128 %d1, fp128 %d2) { +entry: + %cmp = fcmp une fp128 %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp128NE: +; CHECK: callq __netf2 +; CHECK-NEXT: testl %eax, %eax +; CHECK: retq +} + +define fp128 @TestMax(fp128 %x, fp128 %y) { +entry: + %cmp = fcmp ogt fp128 %x, %y + %cond = select i1 %cmp, fp128 %x, fp128 %y + ret fp128 %cond +; CHECK-LABEL: TestMax: +; CHECK: movaps %xmm1 +; CHECK: movaps %xmm0 +; CHECK: callq __gttf2 +; CHECK: movaps {{.*}}, %xmm0 +; CHECK: testl %eax, %eax +; CHECK: movaps {{.*}}, %xmm0 +; CHECK: retq +} Index: test/CodeGen/X86/fp128-i128.ll =================================================================== --- test/CodeGen/X86/fp128-i128.ll +++ test/CodeGen/X86/fp128-i128.ll @@ -0,0 +1,241 @@ +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s + +; Check some i128 instruction patterns triggered by fp128. + +define void @TestUnionLD1(fp128 %s, i64 %n) #0 { +entry: + %0 = bitcast fp128 %s to i128 + %1 = zext i64 %n to i128 + %bf.value = shl nuw i128 %1, 64 + %bf.shl = and i128 %bf.value, 5192296858534809181786422619668480 + %bf.clear = and i128 %0, -5192296858534809181786422619668481 + %bf.set = or i128 %bf.shl, %bf.clear + %2 = bitcast i128 %bf.set to fp128 + tail call void @foo(fp128 %2) #2 + ret void +; CHECK-LABEL: TestUnionLD1: +; CHECK: movaps %xmm0, -24(%rsp) +; CHECK-NEXT: movq -24(%rsp), %rax +; CHECK-NEXT: movabsq $281474976710655, %rcx +; CHECK-NEXT: andq %rdi, %rcx +; CHECK-NEXT: movabsq $-281474976710656, %rdx +; CHECK-NEXT: andq -16(%rsp), %rdx +; CHECK-NEXT: movq %rax, -40(%rsp) +; CHECK-NEXT: orq %rcx, %rdx +; CHECK-NEXT: movq %rdx, -32(%rsp) +; CHECK-NEXT: movaps -40(%rsp), %xmm0 +; CHECK-NEXT: jmp foo +} + +define fp128 @TestUnionLD2(fp128 %s) #0 { +entry: + %0 = bitcast fp128 %s to i128 + %bf.clear = and i128 %0, -18446744073709551616 + %1 = bitcast i128 %bf.clear to fp128 + ret fp128 %1 +; CHECK-LABEL: TestUnionLD2: +; CHECK: movaps %xmm0, -24(%rsp) +; CHECK-NEXT: movq -16(%rsp), %rax +; CHECK-NEXT: movq %rax, -32(%rsp) +; CHECK-NEXT: movq $0, -40(%rsp) +; CHECK-NEXT: movaps -40(%rsp), %xmm0 +; CHECK-NEXT: retq +} + +define fp128 @TestI128_1(fp128 %x) #0 { +entry: + %0 = bitcast fp128 %x to i128 + %bf.clear = and i128 %0, 170141183460469231731687303715884105727 + %1 = bitcast i128 %bf.clear to fp128 + %cmp = fcmp olt fp128 %1, 0xL999999999999999A3FFB999999999999 + %cond = select i1 %cmp, fp128 0xL00000000000000003FFF000000000000, fp128 0xL00000000000000004000000000000000 + ret fp128 %cond +; CHECK-LABEL: TestI128_1: +; CHECK: movaps %xmm0, +; CHECK: movabsq $9223372036854775807, +; CHECK: callq __lttf2 +; CHECK: testl %eax, %eax +; CHECK: movaps {{.*}}, %xmm0 +; CHECK: retq +} + +define fp128 @TestI128_2(fp128 %x, fp128 %y) #0 { +entry: + %0 = bitcast fp128 %x to i128 + %cmp = icmp sgt i128 %0, -1 + %cond = select i1 %cmp, fp128 %x, fp128 %y + ret fp128 %cond +; CHECK-LABEL: TestI128_2: +; CHECK: movaps %xmm0, -24(%rsp) +; CHECK-NEXT: cmpq $0, -16(%rsp) +; CHECK-NEXT: jns +; CHECK: movaps %xmm1, %xmm0 +; CHECK: retq +} + +define fp128 @TestI128_3(fp128 %x, i32* nocapture readnone %ex) #0 { +entry: + %0 = bitcast fp128 %x to i128 + %bf.cast = and i128 %0, 170135991163610696904058773219554885632 + %cmp = icmp eq i128 %bf.cast, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %mul = fmul fp128 %x, 0xL00000000000000004201000000000000 + %1 = bitcast fp128 %mul to i128 + %bf.clear4 = and i128 %1, -170135991163610696904058773219554885633 + %bf.set = or i128 %bf.clear4, 85060207136517546210586590865283612672 + br label %if.end + +if.end: ; preds = %if.then, %entry + %u.sroa.0.0 = phi i128 [ %bf.set, %if.then ], [ %0, %entry ] + %2 = bitcast i128 %u.sroa.0.0 to fp128 + ret fp128 %2 +; CHECK-LABEL: TestI128_3: +; CHECK: movaps %xmm0, +; CHECK: movabsq $9223090561878065152, +; CHECK: testq +; CHECK: callq __multf3 +; CHECK-NEXT: movaps %xmm0 +; CHECK: movabsq $-9223090561878065153, +; CHECK: movabsq $4611123068473966592, +; CHECK: retq +} + +define fp128 @TestI128_4(fp128 %x) #0 { +entry: + %0 = bitcast fp128 %x to i128 + %bf.clear = and i128 %0, -18446744073709551616 + %1 = bitcast i128 %bf.clear to fp128 + %add = fadd fp128 %1, %x + ret fp128 %add +; CHECK-LABEL: TestI128_4: +; CHECK: movaps %xmm0, %xmm1 +; CHECK-NEXT: movaps %xmm1, 16(%rsp) +; CHECK-NEXT: movq 24(%rsp), %rax +; CHECK-NEXT: movq %rax, 8(%rsp) +; CHECK-NEXT: movq $0, (%rsp) +; CHECK-NEXT: movaps (%rsp), %xmm0 +; CHECK-NEXT: callq __addtf3 +; CHECK: retq +} + +define { i64, i64 } @TestShift128(i64 %x.coerce0, i64 %x.coerce1) #0 { +entry: + %.fca.1.insert = insertvalue { i64, i64 } { i64 0, i64 undef }, i64 %x.coerce0, 1 + ret { i64, i64 } %.fca.1.insert +; CHECK-LABEL: TestShift128: +; CHECK: xorl %eax, %eax +; CHECK-NEXT: movq %rdi, %rdx +; CHECK-NEXT: retq +} + +@v128 = common global i128 0, align 16 +@v128_2 = common global i128 0, align 16 + +define void @TestShift128_2() #2 { +entry: + %0 = load i128, i128* @v128, align 16 + %shl = shl i128 %0, 96 + %1 = load i128, i128* @v128_2, align 16 + %or = or i128 %shl, %1 + store i128 %or, i128* @v128, align 16 + ret void +; CHECK-LABEL: TestShift128_2: +; CHECK: movq v128(%rip), %rax +; CHECK-NEXT: shlq $32, %rax +; CHECK-NEXT: movq v128_2(%rip), %rcx +; CHECK-NEXT: orq v128_2+8(%rip), %rax +; CHECK-NEXT: movq %rcx, v128(%rip) +; CHECK-NEXT: movq %rax, v128+8(%rip) +; CHECK-NEXT: retq +} + +define fp128 @acosl(fp128 %x) #0 { +entry: + %0 = bitcast fp128 %x to i128 + %bf.clear = and i128 %0, -18446744073709551616 + %1 = bitcast i128 %bf.clear to fp128 + %add = fadd fp128 %1, %x + ret fp128 %add +; CHECK-LABEL: acosl: +; CHECK: movaps %xmm0, %xmm1 +; CHECK-NEXT: movaps %xmm1, 16(%rsp) +; CHECK-NEXT: movq 24(%rsp), %rax +; CHECK-NEXT: movq %rax, 8(%rsp) +; CHECK-NEXT: movq $0, (%rsp) +; CHECK-NEXT: movaps (%rsp), %xmm0 +; CHECK-NEXT: callq __addtf3 +; CHECK: retq +} + +; Compare i128 values and check i128 constants. +define fp128 @TestComp(fp128 %x, fp128 %y) #0 { +entry: + %0 = bitcast fp128 %x to i128 + %cmp = icmp sgt i128 %0, -1 + %cond = select i1 %cmp, fp128 %x, fp128 %y + ret fp128 %cond +; CHECK-LABEL: TestComp: +; CHECK: movaps %xmm0, -24(%rsp) +; CHECK-NEXT: cmpq $0, -16(%rsp) +; CHECK-NEXT: jns +; CHECK: movaps %xmm1, %xmm0 +; CHECK: retq +} + +declare void @foo(fp128) #1 + +; Test logical operations on fp128 values. +define fp128 @TestFABS_LD(fp128 %x) #0 { +entry: + %call = tail call fp128 @fabsl(fp128 %x) #2 + ret fp128 %call +; CHECK-LABEL: TestFABS_LD +; CHECK: andps {{.*}}, %xmm0 +; CHECK-NEXT: retq +} + +declare fp128 @fabsl(fp128) #1 + +declare fp128 @copysignl(fp128, fp128) #1 + +; Test more complicated logical operations generated from copysignl. +define void @TestCopySign({ fp128, fp128 }* noalias nocapture sret %agg.result, { fp128, fp128 }* byval nocapture readonly align 16 %z) #0 { +entry: + %z.realp = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %z, i64 0, i32 0 + %z.real = load fp128, fp128* %z.realp, align 16 + %z.imagp = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %z, i64 0, i32 1 + %z.imag4 = load fp128, fp128* %z.imagp, align 16 + %cmp = fcmp ogt fp128 %z.real, %z.imag4 + %sub = fsub fp128 %z.imag4, %z.imag4 + br i1 %cmp, label %if.then, label %cleanup + +if.then: ; preds = %entry + %call = tail call fp128 @fabsl(fp128 %sub) #2 + br label %cleanup + +cleanup: ; preds = %entry, %if.then + %z.real.sink = phi fp128 [ %z.real, %if.then ], [ %sub, %entry ] + %call.sink = phi fp128 [ %call, %if.then ], [ %z.real, %entry ] + %call5 = tail call fp128 @copysignl(fp128 %z.real.sink, fp128 %z.imag4) #2 + %0 = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %agg.result, i64 0, i32 0 + %1 = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %agg.result, i64 0, i32 1 + store fp128 %call.sink, fp128* %0, align 16 + store fp128 %call5, fp128* %1, align 16 + ret void +; CHECK-LABEL: TestCopySign +; CHECK-NOT: call +; CHECK: callq __subtf3 +; CHECK-NOT: call +; CHECK: callq __gttf2 +; CHECK-NOT: call +; CHECK: andps {{.*}}, %xmm0 +; CHECK: retq +} + + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+ssse3,+sse3,+popcnt,+sse,+sse2,+sse4.1,+sse4.2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+ssse3,+sse3,+popcnt,+sse,+sse2,+sse4.1,+sse4.2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone } Index: test/CodeGen/X86/fp128-libcalls.ll =================================================================== --- test/CodeGen/X86/fp128-libcalls.ll +++ test/CodeGen/X86/fp128-libcalls.ll @@ -0,0 +1,201 @@ +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s + +; Check all soft floating point library function calls. + +@vf64 = common global double 0.000000e+00, align 8 +@vf128 = common global fp128 0xL00000000000000000000000000000000, align 16 + +define void @Test64Add(double %d1, double %d2) { +entry: + %add = fadd double %d1, %d2 + store double %add, double* @vf64, align 8 + ret void +; CHECK-LABEL: Test64Add: +; CHECK: addsd %xmm1, %xmm0 +; CHECK-NEXT: movsd %xmm0, vf64(%rip) +; CHECK-NEXT: retq +} + +define void @Test64_1Add(double %d1) { +entry: + %0 = load double, double* @vf64, align 8 + %add = fadd double %0, %d1 + store double %add, double* @vf64, align 8 + ret void +; CHECK-LABEL: Test64_1Add: +; CHECK: addsd vf64(%rip), %xmm0 +; CHECK-NEXT: movsd %xmm0, vf64(%rip) +; CHECK-NEXT: retq +} + +define void @Test128Add(fp128 %d1, fp128 %d2) { +entry: + %add = fadd fp128 %d1, %d2 + store fp128 %add, fp128* @vf128, align 16 + ret void +; CHECK-LABEL: Test128Add: +; CHECK: callq __addtf3 +; CHECK-NEXT: movaps %xmm0, vf128(%rip) +; CHECK: retq +} + +define void @Test128_1Add(fp128 %d1){ +entry: + %0 = load fp128, fp128* @vf128, align 16 + %add = fadd fp128 %0, %d1 + store fp128 %add, fp128* @vf128, align 16 + ret void +; CHECK-LABEL: Test128_1Add: +; CHECK: movaps %xmm0, %xmm1 +; CHECK-NEXT: movaps vf128(%rip), %xmm0 +; CHECK-NEXT: callq __addtf3 +; CHECK-NEXT: movaps %xmm0, vf128(%rip) +; CHECK: retq +} + +define void @Test64Sub(double %d1, double %d2){ +entry: + %sub = fsub double %d1, %d2 + store double %sub, double* @vf64, align 8 + ret void +; CHECK-LABEL: Test64Sub: +; CHECK: subsd %xmm1, %xmm0 +; CHECK-NEXT: movsd %xmm0, vf64(%rip) +; CHECK-NEXT: retq +} + +define void @Test64_1Sub(double %d1){ +entry: + %0 = load double, double* @vf64, align 8 + %sub = fsub double %0, %d1 + store double %sub, double* @vf64, align 8 + ret void +; CHECK-LABEL: Test64_1Sub: +; CHECK: movsd vf64(%rip), %xmm1 +; CHECK-NEXT: subsd %xmm0, %xmm1 +; CHECK-NEXT: movsd %xmm1, vf64(%rip) +; CHECK-NEXT: retq +} + +define void @Test128Sub(fp128 %d1, fp128 %d2){ +entry: + %sub = fsub fp128 %d1, %d2 + store fp128 %sub, fp128* @vf128, align 16 + ret void +; CHECK-LABEL: Test128Sub: +; CHECK: callq __subtf3 +; CHECK-NEXT: movaps %xmm0, vf128(%rip) +; CHECK: retq +} + +define void @Test128_1Sub(fp128 %d1){ +entry: + %0 = load fp128, fp128* @vf128, align 16 + %sub = fsub fp128 %0, %d1 + store fp128 %sub, fp128* @vf128, align 16 + ret void +; CHECK-LABEL: Test128_1Sub: +; CHECK: movaps %xmm0, %xmm1 +; CHECK-NEXT: movaps vf128(%rip), %xmm0 +; CHECK-NEXT: callq __subtf3 +; CHECK-NEXT: movaps %xmm0, vf128(%rip) +; CHECK: retq +} + +define void @Test64Mul(double %d1, double %d2){ +entry: + %mul = fmul double %d1, %d2 + store double %mul, double* @vf64, align 8 + ret void +; CHECK-LABEL: Test64Mul: +; CHECK: mulsd %xmm1, %xmm0 +; CHECK-NEXT: movsd %xmm0, vf64(%rip) +; CHECK-NEXT: retq +} + +define void @Test64_1Mul(double %d1){ +entry: + %0 = load double, double* @vf64, align 8 + %mul = fmul double %0, %d1 + store double %mul, double* @vf64, align 8 + ret void +; CHECK-LABEL: Test64_1Mul: +; CHECK: mulsd vf64(%rip), %xmm0 +; CHECK-NEXT: movsd %xmm0, vf64(%rip) +; CHECK-NEXT: retq +} + +define void @Test128Mul(fp128 %d1, fp128 %d2){ +entry: + %mul = fmul fp128 %d1, %d2 + store fp128 %mul, fp128* @vf128, align 16 + ret void +; CHECK-LABEL: Test128Mul: +; CHECK: callq __multf3 +; CHECK-NEXT: movaps %xmm0, vf128(%rip) +; CHECK: retq +} + +define void @Test128_1Mul(fp128 %d1){ +entry: + %0 = load fp128, fp128* @vf128, align 16 + %mul = fmul fp128 %0, %d1 + store fp128 %mul, fp128* @vf128, align 16 + ret void +; CHECK-LABEL: Test128_1Mul: +; CHECK: movaps %xmm0, %xmm1 +; CHECK-NEXT: movaps vf128(%rip), %xmm0 +; CHECK-NEXT: callq __multf3 +; CHECK-NEXT: movaps %xmm0, vf128(%rip) +; CHECK: retq +} + +define void @Test64Div(double %d1, double %d2){ +entry: + %div = fdiv double %d1, %d2 + store double %div, double* @vf64, align 8 + ret void +; CHECK-LABEL: Test64Div: +; CHECK: divsd %xmm1, %xmm0 +; CHECK-NEXT: movsd %xmm0, vf64(%rip) +; CHECK-NEXT: retq +} + +define void @Test64_1Div(double %d1){ +entry: + %0 = load double, double* @vf64, align 8 + %div = fdiv double %0, %d1 + store double %div, double* @vf64, align 8 + ret void +; CHECK-LABEL: Test64_1Div: +; CHECK: movsd vf64(%rip), %xmm1 +; CHECK-NEXT: divsd %xmm0, %xmm1 +; CHECK-NEXT: movsd %xmm1, vf64(%rip) +; CHECK-NEXT: retq +} + +define void @Test128Div(fp128 %d1, fp128 %d2){ +entry: + %div = fdiv fp128 %d1, %d2 + store fp128 %div, fp128* @vf128, align 16 + ret void +; CHECK-LABEL: Test128Div: +; CHECK: callq __divtf3 +; CHECK-NEXT: movaps %xmm0, vf128(%rip) +; CHECK: retq +} + +define void @Test128_1Div(fp128 %d1){ +entry: + %0 = load fp128, fp128* @vf128, align 16 + %div = fdiv fp128 %0, %d1 + store fp128 %div, fp128* @vf128, align 16 + ret void +; CHECK-LABEL: Test128_1Div: +; CHECK: movaps %xmm0, %xmm1 +; CHECK-NEXT: movaps vf128(%rip), %xmm0 +; CHECK-NEXT: callq __divtf3 +; CHECK-NEXT: movaps %xmm0, vf128(%rip) +; CHECK: retq +} Index: test/CodeGen/X86/fp128-load.ll =================================================================== --- test/CodeGen/X86/fp128-load.ll +++ test/CodeGen/X86/fp128-load.ll @@ -0,0 +1,69 @@ +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s + +; double myD = 1.0; +@my_double = global double 1.000000e+00, align 8 + +; long double myFP80 = 1.0L; // x86_64-linux-gnu +@my_fp80 = global x86_fp80 0xK3FFF8000000000000000, align 16 + +; long double myFP128 = 1.0L; // x86_64-linux-android +@my_fp128 = global fp128 0xL00000000000000003FFF000000000000, align 16 + +define double @get_double() { +entry: + %0 = load double, double* @my_double, align 8 + ret double %0 +; CHECK-LABEL: get_double: +; CHECK: movsd my_double(%rip), %xmm0 +; CHECK-NEXT: retq +} + +define x86_fp80 @get_fp80() { +entry: + %0 = load x86_fp80, x86_fp80* @my_fp80, align 16 + ret x86_fp80 %0 +; CHECK-LABEL: get_fp80: +; CHECK: fldt my_fp80(%rip) +; CHECK-NEXT: retq +} + +define fp128 @get_fp128() { +entry: + %0 = load fp128, fp128* @my_fp128, align 16 + ret fp128 %0 +; CHECK-LABEL: get_fp128: +; CHECK: movaps my_fp128(%rip), %xmm0 +; CHECK-NEXT: retq +} + +@TestLoadExtend.data = internal unnamed_addr constant [2 x float] [float 0x3FB99999A0000000, float 0x3FC99999A0000000], align 4 + +define fp128 @TestLoadExtend(fp128 %x, i32 %n) { +entry: + %idxprom = sext i32 %n to i64 + %arrayidx = getelementptr inbounds [2 x float], [2 x float]* @TestLoadExtend.data, i64 0, i64 %idxprom + %0 = load float, float* %arrayidx, align 4 + %conv = fpext float %0 to fp128 + ret fp128 %conv +; CHECK-LABEL: TestLoadExtend: +; CHECK: movslq %edi, %rax +; CHECK-NEXT: movss TestLoadExtend.data(,%rax,4), %xmm0 +; CHECK-NEXT: callq __extendsftf2 +; CHECK: retq +} + +; CHECK-LABEL: my_double: +; CHECK-NEXT: .quad 4607182418800017408 +; CHECK-NEXT: .size my_double, 8 + +; CHECK-LABEL: my_fp80: +; CHECK-NEXT: .quad -9223372036854775808 +; CHECK-NEXT: .short 16383 +; CHECK-NEXT: .zero 6 +; CHECK-NEXT: .size my_fp80, 16 + +; CHECK-LABEL: my_fp128: +; CHECK-NEXT: .quad 0 +; CHECK-NEXT: .quad 4611404543450677248 +; CHECK-NEXT: .size my_fp128, 16 Index: test/CodeGen/X86/fp128-store.ll =================================================================== --- test/CodeGen/X86/fp128-store.ll +++ test/CodeGen/X86/fp128-store.ll @@ -0,0 +1,39 @@ +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s + +; double myD = 1.0; +@myD = global double 1.000000e+00, align 8 + +; long double myFP80 = 1.0L; // x86_64-linux-gnu +@myFP80 = global x86_fp80 0xK3FFF8000000000000000, align 16 + +; long double myFP128 = 1.0L; // x86_64-linux-android +@myFP128 = global fp128 0xL00000000000000003FFF000000000000, align 16 + +define void @set_D(double %x) { +entry: + store double %x, double* @myD, align 8 + ret void +; CHECK-LABEL: set_D: +; CHECK: movsd %xmm0, myD(%rip) +; CHECK-NEXT: retq +} + +define void @set_FP80(x86_fp80 %x) { +entry: + store x86_fp80 %x, x86_fp80* @myFP80, align 16 + ret void +; CHECK-LABEL: set_FP80: +; CHECK: fldt 8(%rsp) +; CHECK-NEXT: fstpt myFP80(%rip) +; CHECK-NEXT: retq +} + +define void @set_FP128(fp128 %x) { +entry: + store fp128 %x, fp128* @myFP128, align 16 + ret void +; CHECK-LABEL: set_FP128: +; CHECK: movaps %xmm0, myFP128(%rip) +; CHECK-NEXT: retq +} Index: test/CodeGen/X86/soft-fp.ll =================================================================== --- test/CodeGen/X86/soft-fp.ll +++ test/CodeGen/X86/soft-fp.ll @@ -1,8 +1,14 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2,+soft-float | FileCheck %s -; RUN: llc < %s -march=x86-64 -mattr=+sse2,+soft-float | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-gnux32 -mattr=+sse2,+soft-float | FileCheck %s +; RUN: llc < %s -march=x86 -mattr=+mmx,+sse,+soft-float \ +; RUN: | FileCheck %s --check-prefix=SOFT1 --check-prefix=CHECK +; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2,+soft-float \ +; RUN: | FileCheck %s --check-prefix=SOFT2 --check-prefix=CHECK +; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse \ +; RUN: | FileCheck %s --check-prefix=SSE1 --check-prefix=CHECK +; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 \ +; RUN: | FileCheck %s --check-prefix=SSE2 --check-prefix=CHECK +; RUN: llc < %s -mtriple=x86_64-gnux32 -mattr=+mmx,+sse2,+soft-float | FileCheck %s -; CHECK-NOT: xmm{[0-9]+} +; CHECK-NOT: xmm{{[0-9]+}} %struct.__va_list_tag = type { i32, i32, i8*, i8* } @@ -15,6 +21,8 @@ call void @bar(%struct.__va_list_tag* %va3) nounwind call void @llvm.va_end(i8* %va12) ret i32 undef +; CHECK-LABEL: t1: +; CHECK: ret{{[lq]}} } declare void @llvm.va_start(i8*) nounwind @@ -27,4 +35,23 @@ entry: %0 = fadd float %a, %b ; [#uses=1] ret float %0 +; CHECK-LABEL: t2: +; SOFT1-NOT: xmm{{[0-9]+}} +; SOFT2-NOT: xmm{{[0-9]+}} +; SSE1: xmm{{[0-9]+}} +; SSE2: xmm{{[0-9]+}} +; CHECK: ret{{[lq]}} +} + +; soft-float means no SSE instruction and passing fp128 as pair of i64. +define fp128 @t3(fp128 %a, fp128 %b) nounwind readnone { +entry: + %0 = fadd fp128 %b, %a + ret fp128 %0 +; CHECK-LABEL: t3: +; SOFT1-NOT: xmm{{[0-9]+}} +; SOFT2-NOT: xmm{{[0-9]+}} +; SSE1: xmm{{[0-9]+}} +; SSE2: xmm{{[0-9]+}} +; CHECK: ret{{[lq]}} } Index: utils/TableGen/X86RecognizableInstr.cpp =================================================================== --- utils/TableGen/X86RecognizableInstr.cpp +++ utils/TableGen/X86RecognizableInstr.cpp @@ -951,6 +951,7 @@ TYPE("f128mem", TYPE_M128) TYPE("f256mem", TYPE_M256) TYPE("f512mem", TYPE_M512) + TYPE("FR128", TYPE_XMM128) TYPE("FR64", TYPE_XMM64) TYPE("FR64X", TYPE_XMM64) TYPE("f64mem", TYPE_M64FP) @@ -1069,6 +1070,7 @@ // register IDs in 8-bit immediates nowadays. ENCODING("FR32", ENCODING_IB) ENCODING("FR64", ENCODING_IB) + ENCODING("FR128", ENCODING_IB) ENCODING("VR128", ENCODING_IB) ENCODING("VR256", ENCODING_IB) ENCODING("FR32X", ENCODING_IB) @@ -1091,6 +1093,7 @@ ENCODING("GR8", ENCODING_RM) ENCODING("VR128", ENCODING_RM) ENCODING("VR128X", ENCODING_RM) + ENCODING("FR128", ENCODING_RM) ENCODING("FR64", ENCODING_RM) ENCODING("FR32", ENCODING_RM) ENCODING("FR64X", ENCODING_RM) @@ -1120,6 +1123,7 @@ ENCODING("GR64", ENCODING_REG) ENCODING("GR8", ENCODING_REG) ENCODING("VR128", ENCODING_REG) + ENCODING("FR128", ENCODING_REG) ENCODING("FR64", ENCODING_REG) ENCODING("FR32", ENCODING_REG) ENCODING("VR64", ENCODING_REG) @@ -1157,6 +1161,7 @@ ENCODING("GR32", ENCODING_VVVV) ENCODING("GR64", ENCODING_VVVV) ENCODING("FR32", ENCODING_VVVV) + ENCODING("FR128", ENCODING_VVVV) ENCODING("FR64", ENCODING_VVVV) ENCODING("VR128", ENCODING_VVVV) ENCODING("VR256", ENCODING_VVVV)