Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -1903,6 +1903,7 @@ /// up the MVT::LAST_VALUETYPE value to the next multiple of 16. uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 15) / 16]; +protected: ValueTypeActionImpl ValueTypeActions; private: Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8585,7 +8585,12 @@ // copysign(x, fp_extend(y)) -> copysign(x, y) // copysign(x, fp_round(y)) -> copysign(x, y) - if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) + // Do not optimize out type conversion of f128 type yet. + // Store and truncate operators for f128 are very limited. + EVT N1VT = N1->getValueType(0); + EVT N1Op0VT = N1->getOperand(0)->getValueType(0); + if ((N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) && + (N1VT == N1Op0VT || (N1VT != MVT::f128 && N1Op0VT != MVT::f128))) return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0)); Index: lib/CodeGen/SelectionDAG/InstrEmitter.cpp =================================================================== --- lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -140,6 +140,11 @@ else if (RC) { const TargetRegisterClass *ComRC = TRI->getCommonSubClass(UseRC, RC); + // Current TRI->getCommonSubClass might return a register class + // ComRC such that ComRC->hasType(Node->getSimpleValueType(ResNo)) + // is not true. This could happen when RC or UseRC is FR128. + // Until that behavior of FR128 is changed, + // we need to handle such case later before using UseRC. // If multiple uses expect disjoint register classes, we emit // copies in AddRegisterOperand. if (ComRC) @@ -159,8 +164,9 @@ // Figure out the register class to create for the destreg. if (VRBase) { DstRC = MRI->getRegClass(VRBase); - } else if (UseRC) { - assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!"); + } else if (UseRC && UseRC->hasType(VT)) { + // If TRI->getCommonSubClass(UseRC, RC) returns some register class, + // e.g. FR128, that does not contain VT, it should not be used for DstRC. DstRC = UseRC; } else { DstRC = TLI->getRegClassFor(VT); Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -146,6 +146,7 @@ SDValue ExpandVectorBuildThroughStack(SDNode* Node); SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP); + SDValue ExpandConstant(ConstantSDNode *CP); std::pair ExpandAtomic(SDNode *Node); @@ -286,6 +287,20 @@ return Result; } +/// Expands the Constant node to a load from the constant pool. +SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) { + SDLoc dl(CP); + EVT VT = CP->getValueType(0); + SDValue CPIdx = DAG.getConstantPool(CP->getConstantIntValue(), + TLI.getPointerTy(DAG.getDataLayout())); + unsigned Alignment = cast(CPIdx)->getAlignment(); + SDValue Result = + DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + false, false, false, Alignment); + return Result; +} + /// Expands an unaligned store to 2 half-size stores. static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, const TargetLowering &TLI, @@ -1185,15 +1200,17 @@ #ifndef NDEBUG for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == - TargetLowering::TypeLegal && + assert((TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == + TargetLowering::TypeLegal || + TLI.isTypeLegal(Node->getValueType(i))) && "Unexpected illegal type!"); for (const SDValue &Op : Node->op_values()) - assert((TLI.getTypeAction(*DAG.getContext(), - Op.getValueType()) == TargetLowering::TypeLegal || - Op.getOpcode() == ISD::TargetConstant) && - "Unexpected illegal type!"); + assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == + TargetLowering::TypeLegal || + TLI.isTypeLegal(Op.getValueType()) || + Op.getOpcode() == ISD::TargetConstant) && + "Unexpected illegal type!"); #endif // Figure out the correct action; the way to query this varies by opcode @@ -3513,6 +3530,11 @@ Results.push_back(ExpandConstantFP(CFP, true)); break; } + case ISD::Constant: { + ConstantSDNode *CP = cast(Node); + Results.push_back(ExpandConstant(CP)); + break; + } case ISD::FSUB: { EVT VT = Node->getValueType(0); if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) && Index: lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -46,13 +46,44 @@ // Result Float to Integer Conversion. //===----------------------------------------------------------------------===// -void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { +bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); dbgs() << "\n"); - SDValue R = SDValue(); + SDValue R(N, ResNo); + // Some types, e.g. x86_64's f128, want to be legally in registers + // but need some operations converted to library calls or integer + // bitwise operations + EVT VT = N->getValueType(ResNo); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + bool KeepFloat = VT.isSimple() && VT == NVT && TLI.isTypeLegal(VT); + + // When the value can be kept in register, like f128 in FR128 for x86_64, + // we do not soften some op code but will go on to legalize the operands. + if (KeepFloat) + switch (N->getOpcode()) { + case ISD::BITCAST: + case ISD::SELECT: + case ISD::SELECT_CC: + return false; + // On x86_64 FABS, FNEG, and FCOPYSIGN can be simplified to + // native bitwise operations. + case ISD::FABS: + case ISD::FCOPYSIGN: + case ISD::FNEG: + // We can load better from the constant pool. + // Change it and not to scan the operand again. + case ISD::ConstantFP: + // Mark this node as softened. + SetSoftenedFloat(R, R); + return true; + } + switch (N->getOpcode()) { default: + // For CopyToReg, CopyFromReg, etc. + if (KeepFloat) + return false; #ifndef NDEBUG dbgs() << "SoftenFloatResult #" << ResNo << ": "; N->dump(&DAG); dbgs() << "\n"; @@ -97,7 +128,7 @@ case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; - case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break; + case ISD::LOAD: R = SoftenFloatRes_LOAD(N, KeepFloat); break; case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break; case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break; case ISD::SINT_TO_FP: @@ -107,8 +138,16 @@ } // If R is null, the sub-method took care of registering the result. - if (R.getNode()) + if (R.getNode()) { SetSoftenedFloat(SDValue(N, ResNo), R); + // Some soften sub-methods do not call ReplaceValueWith and depend + // on the ScanOperands in LegalizeTypes.cpp to replace every operand. + // But SoftenFloatOperand is not smart enough to replace every operand. + // See CodeGen/X86/fp128-abs.ll, fp128-load.ll. + if (R.getNode() != N && KeepFloat) + ReplaceValueWith(SDValue(N, ResNo), R); + } + return true; } SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) { @@ -571,7 +610,7 @@ NVT, &Op, 1, false, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, bool KeepFloat) { LoadSDNode *L = cast(N); EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); @@ -586,7 +625,8 @@ L->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. - ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + if (N != NewL.getValue(1).getNode()) + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); return NewL; } @@ -600,7 +640,10 @@ // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); - return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL)); + auto ExtendNode = DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL); + if (KeepFloat) + return ExtendNode; + return BitConvertToInteger(ExtendNode); } SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) { @@ -636,7 +679,8 @@ // Legalized the chain result - switch anything that used the old chain to // use the new one. - ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1)); + if (N != NewVAARG.getValue(1).getNode()) + ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1)); return NewVAARG; } @@ -677,9 +721,20 @@ DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); + EVT OpVT = N->getOperand(OpNo).getValueType(); + bool KeepFloat = OpVT.isSimple() && TLI.isTypeLegal(OpVT); + // When an operand is float but can be kept in register, + // many of the SoftenFloatOp_* functions will call GetSoftenedFloat + // and get the unsoftened float operands, which should be fine. + switch (N->getOpcode()) { default: + // When result type is legal, N could have op code like CopyToReg, + // CopyFromReg, Register, etc. They do not need to be softened. + // Assume all child nodes were softened if changed in place. + if (KeepFloat) + return false; #ifndef NDEBUG dbgs() << "SoftenFloatOperand Op #" << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"; @@ -695,7 +750,15 @@ case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break; case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; - case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break; + case ISD::STORE: + Res = SoftenFloatOp_STORE(N, OpNo); + // Do not try to analyze or soften this node again if the value is + // or can be hold in a register. In that case, Res.getNode() should + // be euqal to N. + if (Res.getNode() == N && KeepFloat) + return false; + // Otherwise, we need to reanalyze and lower the new Res nodes. + break; } // If the result is null, the sub-method took care of registering results etc. @@ -709,7 +772,8 @@ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && "Invalid operand expansion"); - ReplaceValueWith(SDValue(N, 0), Res); + if (N != Res.getNode()) + ReplaceValueWith(SDValue(N, 0), Res); return false; } Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -381,8 +381,14 @@ /// exactly the same bits as Op - only the type changed. For example, if Op /// is an f32 which was softened to an i32, then this method returns an i32, /// the bits of which coincide with those of Op. + /// If Op can be legally stored in a register, it might not have been + /// converted to an integer. In that cast, the given op is returned. SDValue GetSoftenedFloat(SDValue Op) { SDValue &SoftenedOp = SoftenedFloats[Op]; + if (!SoftenedOp.getNode() && + Op.getValueType().isSimple() && + TLI.isTypeLegal(Op.getValueType())) + return Op; RemapValue(SoftenedOp); assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?"); return SoftenedOp; @@ -390,7 +396,7 @@ void SetSoftenedFloat(SDValue Op, SDValue Result); // Result Float to Integer Conversion. - void SoftenFloatResult(SDNode *N, unsigned OpNo); + bool SoftenFloatResult(SDNode *N, unsigned OpNo); SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_BITCAST(SDNode *N); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); @@ -426,7 +432,7 @@ SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); SDValue SoftenFloatRes_FTRUNC(SDNode *N); - SDValue SoftenFloatRes_LOAD(SDNode *N); + SDValue SoftenFloatRes_LOAD(SDNode *N, bool KeepFloat); SDValue SoftenFloatRes_SELECT(SDNode *N); SDValue SoftenFloatRes_SELECT_CC(SDNode *N); SDValue SoftenFloatRes_UNDEF(SDNode *N); Index: lib/CodeGen/SelectionDAG/LegalizeTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -238,9 +238,13 @@ Changed = true; goto NodeDone; case TargetLowering::TypeSoftenFloat: - SoftenFloatResult(N, i); - Changed = true; - goto NodeDone; + // Some fp128 type nodes might not be softened, to keep in + // registers for other operations. If not changed, go to + // check the operands. + Changed = SoftenFloatResult(N, i); + if (Changed) + goto NodeDone; + goto ScanOperands; case TargetLowering::TypeExpandFloat: ExpandFloatResult(N, i); Changed = true; @@ -411,10 +415,17 @@ bool Failed = false; // Check that all result types are legal. + // A value type is illegal if its TypeAction is not TypeLegal, + // and TLI.RegClassForVT does not have a register class for this type. + // For example, the x86_64 target has f128 that is not TypeLegal, + // to have softened operators, but it also has FR128 register class to + // pass and return f128 values. Hence a legalized node can have f128 type. if (!IgnoreNodeResults(&Node)) for (unsigned i = 0, NumVals = Node.getNumValues(); i < NumVals; ++i) - if (!isTypeLegal(Node.getValueType(i))) { - dbgs() << "Result type " << i << " illegal!\n"; + if (!isTypeLegal(Node.getValueType(i)) && + !TLI.isTypeLegal(Node.getValueType(i))) { + dbgs() << "Result type " << i << " illegal: "; + Node.dump(); Failed = true; } @@ -421,8 +432,10 @@ // Check that all operand types are legal. for (unsigned i = 0, NumOps = Node.getNumOperands(); i < NumOps; ++i) if (!IgnoreNodeResults(Node.getOperand(i).getNode()) && - !isTypeLegal(Node.getOperand(i).getValueType())) { - dbgs() << "Operand type " << i << " illegal!\n"; + !isTypeLegal(Node.getOperand(i).getValueType()) && + !TLI.isTypeLegal(Node.getOperand(i).getValueType())) { + dbgs() << "Operand type " << i << " illegal: "; + Node.getOperand(i).dump(); Failed = true; } @@ -748,13 +761,23 @@ } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { - assert(Result.getValueType() == - TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + // f128 of x86_64 could be kept in SSE registers, + // but sometimes softened to i128. + assert((Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) || + Op.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && "Invalid type for softened float"); AnalyzeNewValue(Result); SDValue &OpEntry = SoftenedFloats[Op]; - assert(!OpEntry.getNode() && "Node is already converted to integer!"); + // Allow repeated calls to save f128 type nodes + // or any node with type that transforms to itself. + // Many operations on these types are not softened. + assert((!OpEntry.getNode()|| + Op.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && + "Node is already converted to integer!"); OpEntry = Result; } Index: lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -53,12 +53,17 @@ case TargetLowering::TypePromoteFloat: llvm_unreachable("Bitcast of a promotion-needing float should never need" "expansion"); - case TargetLowering::TypeSoftenFloat: - // Convert the integer operand instead. - SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); + case TargetLowering::TypeSoftenFloat: { + // Expand the floating point operand only if it was softened. + // Otherwise, it could be f128 that can be copied to i128 in register. + auto SoftenedOp = GetSoftenedFloat(InOp); + if (SoftenedOp == InOp) + break; + SplitInteger(SoftenedOp, Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; + } case TargetLowering::TypeExpandInteger: case TargetLowering::TypeExpandFloat: { auto &DL = DAG.getDataLayout(); Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2899,8 +2899,10 @@ return getConstantFP(APFloat(APFloat::IEEEhalf, Val), DL, VT); if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) return getConstantFP(APFloat(APFloat::IEEEsingle, Val), DL, VT); - else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) + if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) return getConstantFP(APFloat(APFloat::IEEEdouble, Val), DL, VT); + if (VT == MVT::f128 && C->getValueType(0) == MVT::i128) + return getConstantFP(APFloat(APFloat::IEEEquad, Val), DL, VT); break; case ISD::BSWAP: return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(), Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1069,7 +1069,9 @@ Op.getOperand(0).getValueType().isFloatingPoint()) { bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); - if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) { + if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple() && + Op.getOperand(0).getValueType() != MVT::f128) { + // Cannot eliminate/lower SHL for f128 yet. EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32; // Make a FGETSIGN + SHL to move the sign bit into the appropriate // place. We expect the SHL to be eliminated by other optimizations. Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -1657,6 +1657,10 @@ if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger) Cost *= 2; + // Do not loop with f128 type. + if (MTy == LK.second) + return std::make_pair(Cost, MTy.getSimpleVT()); + // Keep legalizing the type. MTy = LK.second; } Index: lib/Target/X86/X86CallingConv.td =================================================================== --- lib/Target/X86/X86CallingConv.td +++ lib/Target/X86/X86CallingConv.td @@ -158,6 +158,7 @@ // The X86-64 calling convention always returns FP values in XMM0. CCIfType<[f32], CCAssignToReg<[XMM0, XMM1]>>, CCIfType<[f64], CCAssignToReg<[XMM0, XMM1]>>, + CCIfType<[f128], CCAssignToReg<[XMM0, XMM1]>>, // MMX vector types are always returned in XMM0. CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>, @@ -280,7 +281,7 @@ CCIfType<[v64i1], CCPromoteToType>, // The first 8 FP/Vector arguments are passed in XMM registers. - CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCIfType<[f32, f64, f128, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCIfSubtarget<"hasSSE1()", CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, @@ -305,7 +306,7 @@ // Long doubles get stack slots whose size and alignment depends on the // subtarget. - CCIfType<[f80], CCAssignToStack<0, 0>>, + CCIfType<[f80, f128], CCAssignToStack<0, 0>>, // Vectors get 16-byte stack slots that are 16-byte aligned. CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -287,6 +287,7 @@ setOperationAction(ISD::BR_CC , MVT::f32, Expand); setOperationAction(ISD::BR_CC , MVT::f64, Expand); setOperationAction(ISD::BR_CC , MVT::f80, Expand); + setOperationAction(ISD::BR_CC , MVT::f128, Expand); setOperationAction(ISD::BR_CC , MVT::i8, Expand); setOperationAction(ISD::BR_CC , MVT::i16, Expand); setOperationAction(ISD::BR_CC , MVT::i32, Expand); @@ -294,6 +295,7 @@ setOperationAction(ISD::SELECT_CC , MVT::f32, Expand); setOperationAction(ISD::SELECT_CC , MVT::f64, Expand); setOperationAction(ISD::SELECT_CC , MVT::f80, Expand); + setOperationAction(ISD::SELECT_CC , MVT::f128, Expand); setOperationAction(ISD::SELECT_CC , MVT::i8, Expand); setOperationAction(ISD::SELECT_CC , MVT::i16, Expand); setOperationAction(ISD::SELECT_CC , MVT::i32, Expand); @@ -406,6 +408,7 @@ setOperationAction(ISD::SELECT , MVT::f32 , Custom); setOperationAction(ISD::SELECT , MVT::f64 , Custom); setOperationAction(ISD::SELECT , MVT::f80 , Custom); + setOperationAction(ISD::SELECT , MVT::f128 , Custom); setOperationAction(ISD::SETCC , MVT::i8 , Custom); setOperationAction(ISD::SETCC , MVT::i16 , Custom); setOperationAction(ISD::SETCC , MVT::i32 , Custom); @@ -412,6 +415,7 @@ setOperationAction(ISD::SETCC , MVT::f32 , Custom); setOperationAction(ISD::SETCC , MVT::f64 , Custom); setOperationAction(ISD::SETCC , MVT::f80 , Custom); + setOperationAction(ISD::SETCC , MVT::f128 , Custom); if (Subtarget->is64Bit()) { setOperationAction(ISD::SELECT , MVT::i64 , Custom); setOperationAction(ISD::SETCC , MVT::i64 , Custom); @@ -616,8 +620,16 @@ setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::FMA, MVT::f32, Expand); - // Long double always uses X87. + // Long double always uses X87, except f128 in MMX. if (!Subtarget->useSoftFloat()) { + if (Subtarget->is64Bit() && Subtarget->hasMMX()) { + addRegisterClass(MVT::f128, &X86::FR128RegClass); + ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat); + setOperationAction(ISD::FABS , MVT::f128, Custom); + setOperationAction(ISD::FNEG , MVT::f128, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom); + } + addRegisterClass(MVT::f80, &X86::RFP80RegClass); setOperationAction(ISD::UNDEF, MVT::f80, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); @@ -2289,7 +2301,7 @@ EVT CopyVT = VA.getLocVT(); // If this is x86-64, and we disabled SSE, we can't return FP values - if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && + if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) && ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) { report_fatal_error("SSE register return with SSE disabled"); } @@ -2568,6 +2580,8 @@ RC = &X86::FR32RegClass; else if (RegVT == MVT::f64) RC = &X86::FR64RegClass; + else if (RegVT == MVT::f128) + RC = &X86::FR128RegClass; else if (RegVT.is512BitVector()) RC = &X86::VR512RegClass; else if (RegVT.is256BitVector()) @@ -13080,6 +13094,8 @@ SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); + bool IsF128 = (VT == MVT::f128); + // FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOpt::Level to // decide if we should generate a 16-byte constant mask when we only need 4 or // 8 bytes for the scalar case. @@ -13092,6 +13108,11 @@ LogicVT = VT; EltVT = VT.getVectorElementType(); NumElts = VT.getVectorNumElements(); + } else if (IsF128) { + // SSE instructions are used for optimized f128 logical operations. + LogicVT = MVT::f128; + EltVT = VT; + NumElts = 1; } else { // There are no scalar bitwise logical SSE/AVX instructions, so we // generate a 16-byte vector constant and logic op even for the scalar case. @@ -13123,7 +13144,7 @@ IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR; SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0; - if (VT.isVector()) + if (VT.isVector() || IsF128) return DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask); // For the scalar case extend to a 128-bit vector, perform the logic op, @@ -13142,6 +13163,7 @@ SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); MVT SrcVT = Op1.getSimpleValueType(); + bool IsF128 = (VT == MVT::f128); // If second operand is smaller, extend it first. if (SrcVT.bitsLT(VT)) { @@ -13156,13 +13178,16 @@ // At this point the operands and the result should have the same // type, and that won't be f80 since that is not custom lowered. + assert((VT == MVT::f64 || VT == MVT::f32 || IsF128) && + "Unexpected type in LowerFCOPYSIGN"); const fltSemantics &Sem = - VT == MVT::f64 ? APFloat::IEEEdouble : APFloat::IEEEsingle; + VT == MVT::f64 ? APFloat::IEEEdouble : + (IsF128 ? APFloat::IEEEquad : APFloat::IEEEsingle); const unsigned SizeInBits = VT.getSizeInBits(); SmallVector CV( - VT == MVT::f64 ? 2 : 4, + VT == MVT::f64 ? 2 : (IsF128 ? 1 : 4), ConstantFP::get(*Context, APFloat(Sem, APInt(SizeInBits, 0)))); // First, clear all bits but the sign bit from the second operand (sign). @@ -13175,12 +13200,13 @@ // Perform all logic operations as 16-byte vectors because there are no // scalar FP logic instructions in SSE. This allows load folding of the // constants into the logic instructions. - MVT LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32; + MVT LogicVT = (VT == MVT::f64) ? MVT::v2f64 : (IsF128 ? MVT::f128 : MVT::v4f32); SDValue Mask1 = DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, false, false, 16); - Op1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op1); + if (!IsF128) + Op1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op1); SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op1, Mask1); // Next, clear the sign bit from the first operand (magnitude). @@ -13189,8 +13215,9 @@ APFloat APF = Op0CN->getValueAPF(); // If the magnitude is a positive zero, the sign bit alone is enough. if (APF.isPosZero()) - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, SignBit, - DAG.getIntPtrConstant(0, dl)); + return IsF128 ? SignBit : + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, SignBit, + DAG.getIntPtrConstant(0, dl)); APF.clearSign(); CV[0] = ConstantFP::get(*Context, APF); } else { @@ -13206,13 +13233,15 @@ false, false, false, 16); // If the magnitude operand wasn't a constant, we need to AND out the sign. if (!isa(Op0)) { - Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op0); + if (!IsF128) + Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op0); Val = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op0, Val); } // OR the magnitude value with the sign bit. Val = DAG.getNode(X86ISD::FOR, dl, LogicVT, Val, SignBit); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, Val, - DAG.getIntPtrConstant(0, dl)); + return IsF128 ? Val : + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, Val, + DAG.getIntPtrConstant(0, dl)); } static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) { @@ -14242,6 +14271,7 @@ // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of // these. if (Op1.getOpcode() == ISD::Constant && + Op1.getValueType() != MVT::i128 && // getZExtValue() works up to i64 only. (cast(Op1)->getZExtValue() == 1 || cast(Op1)->isNullValue()) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { @@ -21302,6 +21332,7 @@ return EmitLoweredTLSCall(MI, BB); case X86::CMOV_FR32: case X86::CMOV_FR64: + case X86::CMOV_FR128: case X86::CMOV_GR8: case X86::CMOV_GR16: case X86::CMOV_GR32: @@ -22886,7 +22917,8 @@ // ignored in unsafe-math mode). // We also try to create v2f32 min/max nodes, which we later widen to v4f32. if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() && - VT != MVT::f80 && (TLI.isTypeLegal(VT) || VT == MVT::v2f32) && + VT != MVT::f80 && VT != MVT::f128 && + (TLI.isTypeLegal(VT) || VT == MVT::v2f32) && (Subtarget->hasSSE2() || (Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) { ISD::CondCode CC = cast(Cond.getOperand(2))->get(); @@ -26759,6 +26791,7 @@ case MVT::f64: case MVT::i64: return std::make_pair(0U, &X86::FR64RegClass); + // TODO: handle f128 and i128 in FR128RegClass. // Vector types. case MVT::v16i8: case MVT::v8i16: @@ -26871,6 +26904,7 @@ // target independent register mapper will just pick the first match it can // find, ignoring the required type. + // TODO: handle f128 and i128 in FR128RegClass. if (VT == MVT::f32 || VT == MVT::i32) Res.second = &X86::FR32RegClass; else if (VT == MVT::f64 || VT == MVT::i64) Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -506,6 +506,7 @@ defm _FR32 : CMOVrr_PSEUDO; defm _FR64 : CMOVrr_PSEUDO; + defm _FR128 : CMOVrr_PSEUDO; defm _V4F32 : CMOVrr_PSEUDO; defm _V2F64 : CMOVrr_PSEUDO; defm _V2I64 : CMOVrr_PSEUDO; Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -952,11 +952,12 @@ return false; }]>; -def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>; -def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>; -def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>; -def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>; -def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>; +def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>; +def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>; +def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>; +def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>; +def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>; +def loadf128 : PatFrag<(ops node:$ptr), (f128 (load node:$ptr))>; def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>; def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>; Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -413,6 +413,8 @@ def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; + def : Pat<(f128 (bitconvert (i128 FR128:$src))), (f128 FR128:$src)>; + def : Pat<(i128 (bitconvert (f128 FR128:$src))), (i128 FR128:$src)>; } // Bitcasts between 256-bit vector types. Return the original type since @@ -8867,3 +8869,48 @@ defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", VR128, vx32mem, vy32mem>; } } + +//===----------------------------------------------------------------------===// +// Extra selection patterns for FR128, f128, f128mem + +def : Pat<(store (f128 FR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, (COPY_TO_REGCLASS (f128 FR128:$src), VR128))>; +// When the data is used as floating point, "movaps" should be faster and shorter +// than "movdqa". "movaps" is in SSE and movdqa is in SSE2. + +def : Pat<(loadf128 addr:$src), + (COPY_TO_REGCLASS (MOVAPSrm addr:$src), FR128)>; + +// andps is faster and shorter than andpd, andps is SSE and andpd is SSE2 +def : Pat<(X86fand FR128:$src1, (loadf128 addr:$src2)), + (COPY_TO_REGCLASS (ANDPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2), FR128)>; + +def : Pat<(X86fand FR128:$src1, FR128:$src2), + (COPY_TO_REGCLASS (ANDPSrr (COPY_TO_REGCLASS FR128:$src1, VR128), + (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>; + +def : Pat<(and FR128:$src1, FR128:$src2), + (COPY_TO_REGCLASS (ANDPSrr (COPY_TO_REGCLASS FR128:$src1, VR128), + (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>; + +def : Pat<(X86for FR128:$src1, (loadf128 addr:$src2)), + (COPY_TO_REGCLASS (ORPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2), FR128)>; + +def : Pat<(X86for FR128:$src1, FR128:$src2), + (COPY_TO_REGCLASS (ORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128), + (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>; + +def : Pat<(or FR128:$src1, FR128:$src2), + (COPY_TO_REGCLASS (ORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128), + (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>; + +def : Pat<(X86fxor FR128:$src1, (loadf128 addr:$src2)), + (COPY_TO_REGCLASS (XORPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2), FR128)>; + +def : Pat<(X86fxor FR128:$src1, FR128:$src2), + (COPY_TO_REGCLASS (XORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128), + (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>; + +def : Pat<(xor FR128:$src1, FR128:$src2), + (COPY_TO_REGCLASS (XORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128), + (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>; Index: lib/Target/X86/X86MCInstLower.cpp =================================================================== --- lib/Target/X86/X86MCInstLower.cpp +++ lib/Target/X86/X86MCInstLower.cpp @@ -1334,7 +1334,19 @@ if (isa(COp)) { CS << "u"; } else if (auto *CI = dyn_cast(COp)) { - CS << CI->getZExtValue(); + if (CI->getBitWidth() <= 64) { + CS << CI->getZExtValue(); + } else { + // print multi-word constant as (w0,w1) + auto Val = CI->getValue(); + CS << "("; + for (int i = 0, N = Val.getNumWords(); i < N; ++i) { + if (i > 0) + CS << ","; + CS << Val.getRawData()[i]; + } + CS << ")"; + } } else if (auto *CF = dyn_cast(COp)) { SmallString<32> Str; CF->getValueAPF().toString(Str); Index: lib/Target/X86/X86RegisterInfo.td =================================================================== --- lib/Target/X86/X86RegisterInfo.td +++ lib/Target/X86/X86RegisterInfo.td @@ -423,7 +423,9 @@ def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>; +def FR128 : RegisterClass<"X86", [i128, f128], 128, (add FR32)>; + // FIXME: This sets up the floating point register files as though they are f64 // values, though they really are f80 values. This will cause us to spill // values as 64-bit quantities instead of 80-bit quantities, which is much much @@ -491,4 +493,4 @@ def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;} // Bound registers -def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>; \ No newline at end of file +def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>; Index: test/CodeGen/X86/fp128-calling-conv.ll =================================================================== --- test/CodeGen/X86/fp128-calling-conv.ll +++ test/CodeGen/X86/fp128-calling-conv.ll @@ -0,0 +1,197 @@ +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android | FileCheck %s +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu | FileCheck %s + +; double myD = 1.0; +@myD = global double 1.000000e+00, align 8 + +; long double myFP80 = 1.0L; // x86_64-linux-gnu +@myFP80 = global x86_fp80 0xK3FFF8000000000000000, align 16 + +; long double myFP128 = 1.0L; // x86_64-linux-android +@myFP128 = global fp128 0xL00000000000000003FFF000000000000, align 16 + +; The first few parameters are passed in registers and the other are on stack. + +define i64 @TestParam_L_0(i64 %d0, i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, i64 %d10, i64 %d11, i64 %d12, i64 %d13, i64 %d14, i64 %d15, i64 %d16, i64 %d17, i64 %d18, i64 %d19) { +entry: + ret i64 %d0 +; CHECK-LABEL: TestParam_L_0: +; CHECK: movq %rdi, %rax +; CHECK-NEXT: retq +} + +define i64 @TestParam_L_1(i64 %d0, i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, i64 %d10, i64 %d11, i64 %d12, i64 %d13, i64 %d14, i64 %d15, i64 %d16, i64 %d17, i64 %d18, i64 %d19) { +entry: + ret i64 %d1 +; CHECK-LABEL: TestParam_L_1: +; CHECK: movq %rsi, %rax +; CHECK-NEXT: retq +} + +define i64 @TestParam_L_2(i64 %d0, i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, i64 %d10, i64 %d11, i64 %d12, i64 %d13, i64 %d14, i64 %d15, i64 %d16, i64 %d17, i64 %d18, i64 %d19) { +entry: + ret i64 %d2 +; CHECK-LABEL: TestParam_L_2: +; CHECK: movq %rdx, %rax +; CHECK-NEXT: retq +} + +define i64 @TestParam_L_3(i64 %d0, i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, i64 %d10, i64 %d11, i64 %d12, i64 %d13, i64 %d14, i64 %d15, i64 %d16, i64 %d17, i64 %d18, i64 %d19) { +entry: + ret i64 %d3 +; CHECK-LABEL: TestParam_L_3: +; CHECK: movq %rcx, %rax +; CHECK-NEXT: retq +} + +define i64 @TestParam_L_4(i64 %d0, i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, i64 %d10, i64 %d11, i64 %d12, i64 %d13, i64 %d14, i64 %d15, i64 %d16, i64 %d17, i64 %d18, i64 %d19) { +entry: + ret i64 %d4 +; CHECK-LABEL: TestParam_L_4: +; CHECK: movq %r8, %rax +; CHECK-NEXT: retq +} + +define i64 @TestParam_L_5(i64 %d0, i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, i64 %d10, i64 %d11, i64 %d12, i64 %d13, i64 %d14, i64 %d15, i64 %d16, i64 %d17, i64 %d18, i64 %d19) { +entry: + ret i64 %d5 +; CHECK-LABEL: TestParam_L_5: +; CHECK: movq %r9, %rax +; CHECK-NEXT: retq +} + +define i64 @TestParam_L_6(i64 %d0, i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, i64 %d10, i64 %d11, i64 %d12, i64 %d13, i64 %d14, i64 %d15, i64 %d16, i64 %d17, i64 %d18, i64 %d19) { +entry: + ret i64 %d6 +; CHECK-LABEL: TestParam_L_6: +; CHECK: movq 8(%rsp), %rax +; CHECK-NEXT: retq +} + +define i64 @TestParam_L_7(i64 %d0, i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, i64 %d10, i64 %d11, i64 %d12, i64 %d13, i64 %d14, i64 %d15, i64 %d16, i64 %d17, i64 %d18, i64 %d19) { +entry: + ret i64 %d7 +; CHECK-LABEL: TestParam_L_7: +; CHECK: movq 16(%rsp), %rax +; CHECK-NEXT: retq +} + +define float @TestParam_F_0(float %d0, float %d1, float %d2, float %d3, float %d4, float %d5, float %d6, float %d7, float %d8, float %d9, float %d10, float %d11, float %d12, float %d13, float %d14, float %d15, float %d16, float %d17, float %d18, float %d19) { +entry: + ret float %d0 +; CHECK-LABEL: TestParam_F_0: +; CHECK-NOT: mov +; CHECK: retq +} + +define float @TestParam_F_1(float %d0, float %d1, float %d2, float %d3, float %d4, float %d5, float %d6, float %d7, float %d8, float %d9, float %d10, float %d11, float %d12, float %d13, float %d14, float %d15, float %d16, float %d17, float %d18, float %d19) { +entry: + ret float %d1 +; CHECK-LABEL: TestParam_F_1: +; CHECK: movaps %xmm1, %xmm0 +; CHECK-NEXT: retq +} + +define float @TestParam_F_7(float %d0, float %d1, float %d2, float %d3, float %d4, float %d5, float %d6, float %d7, float %d8, float %d9, float %d10, float %d11, float %d12, float %d13, float %d14, float %d15, float %d16, float %d17, float %d18, float %d19) { +entry: + ret float %d7 +; CHECK-LABEL: TestParam_F_7: +; CHECK: movaps %xmm7, %xmm0 +; CHECK-NEXT: retq +} + +define float @TestParam_F_8(float %d0, float %d1, float %d2, float %d3, float %d4, float %d5, float %d6, float %d7, float %d8, float %d9, float %d10, float %d11, float %d12, float %d13, float %d14, float %d15, float %d16, float %d17, float %d18, float %d19) { +entry: + ret float %d8 +; CHECK-LABEL: TestParam_F_8: +; CHECK: movss 8(%rsp), %xmm0 +; CHECK-NEXT: retq +} + +define float @TestParam_F_9(float %d0, float %d1, float %d2, float %d3, float %d4, float %d5, float %d6, float %d7, float %d8, float %d9, float %d10, float %d11, float %d12, float %d13, float %d14, float %d15, float %d16, float %d17, float %d18, float %d19) { +entry: + ret float %d9 +; CHECK-LABEL: TestParam_F_9: +; CHECK: movss 16(%rsp), %xmm0 +; CHECK-NEXT: retq +} + +define double @TestParam_D_0(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10, double %d11, double %d12, double %d13, double %d14, double %d15, double %d16, double %d17, double %d18, double %d19) { +entry: + ret double %d0 +; CHECK-LABEL: TestParam_D_0: +; CHECK-NOT: mov +; CHECK: retq +} + +define double @TestParam_D_1(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10, double %d11, double %d12, double %d13, double %d14, double %d15, double %d16, double %d17, double %d18, double %d19) { +entry: + ret double %d1 +; CHECK-LABEL: TestParam_D_1: +; CHECK: movaps %xmm1, %xmm0 +; CHECK-NEXT: retq +} + +define double @TestParam_D_7(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10, double %d11, double %d12, double %d13, double %d14, double %d15, double %d16, double %d17, double %d18, double %d19) { +entry: + ret double %d7 +; CHECK-LABEL: TestParam_D_7: +; CHECK: movaps %xmm7, %xmm0 +; CHECK-NEXT: retq +} + +define double @TestParam_D_8(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10, double %d11, double %d12, double %d13, double %d14, double %d15, double %d16, double %d17, double %d18, double %d19) { +entry: + ret double %d8 +; CHECK-LABEL: TestParam_D_8: +; CHECK: movsd 8(%rsp), %xmm0 +; CHECK-NEXT: retq +} + +define double @TestParam_D_9(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10, double %d11, double %d12, double %d13, double %d14, double %d15, double %d16, double %d17, double %d18, double %d19) { +entry: + ret double %d9 +; CHECK-LABEL: TestParam_D_9: +; CHECK: movsd 16(%rsp), %xmm0 +; CHECK-NEXT: retq +} + +define fp128 @TestParam_FP128_0(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) { +entry: + ret fp128 %d0 +; CHECK-LABEL: TestParam_FP128_0: +; CHECK-NOT: mov +; CHECK: retq +} + +define fp128 @TestParam_FP128_1(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) { +entry: + ret fp128 %d1 +; CHECK-LABEL: TestParam_FP128_1: +; CHECK: movaps %xmm1, %xmm0 +; CHECK-NEXT: retq +} + +define fp128 @TestParam_FP128_7(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) { +entry: + ret fp128 %d7 +; CHECK-LABEL: TestParam_FP128_7: +; CHECK: movaps %xmm7, %xmm0 +; CHECK-NEXT: retq +} + +define fp128 @TestParam_FP128_8(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) { +entry: + ret fp128 %d8 +; CHECK-LABEL: TestParam_FP128_8: +; CHECK: movaps 8(%rsp), %xmm0 +; CHECK-NEXT: retq +} + +define fp128 @TestParam_FP128_9(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) { +entry: + ret fp128 %d9 +; CHECK-LABEL: TestParam_FP128_9: +; CHECK: movaps 24(%rsp), %xmm0 +; CHECK-NEXT: retq +} Index: test/CodeGen/X86/fp128-cast.ll =================================================================== --- test/CodeGen/X86/fp128-cast.ll +++ test/CodeGen/X86/fp128-cast.ll @@ -0,0 +1,372 @@ +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android | FileCheck %s +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu | FileCheck %s + +; Check soft floating point conversion function calls. + +@vi32 = common global i32 0, align 4 +@vi64 = common global i64 0, align 8 +@vf32 = common global float 0.000000e+00, align 4 +@vf64 = common global double 0.000000e+00, align 8 +@vf128 = common global fp128 0xL00000000000000000000000000000000, align 16 + +define void @TestCastF32_I32() { +entry: + %0 = load float, float* @vf32, align 4 + %conv = fptosi float %0 to i32 + store i32 %conv, i32* @vi32, align 4 + ret void +; CHECK-LABEL: TestCastF32_I32: +; CHECK: cvttss2si vf32(%rip), %eax +; CHECK-NEXT: movl %eax, vi32(%rip) +; CHECK-NEXT: retq +} + +define void @TestCastF32_I64() { +entry: + %0 = load float, float* @vf32, align 4 + %conv = fptosi float %0 to i32 + %conv1 = sext i32 %conv to i64 + store i64 %conv1, i64* @vi64, align 8 + ret void +; CHECK-LABEL: TestCastF32_I64: +; CHECK: cvttss2si vf32(%rip), %eax +; CHECK-NEXT: cltq +; CHECK-NEXT: movq %rax, vi64(%rip) +; CHECK-NEXT: retq +} + +define void @TestCastF32_F64() { +entry: + %0 = load float, float* @vf32, align 4 + %conv = fpext float %0 to double + store double %conv, double* @vf64, align 8 + ret void +; CHECK-LABEL: TestCastF32_F64: +; CHECK: movss vf32(%rip), %xmm0 +; CHECK-NEXT: cvtss2sd %xmm0, %xmm0 +; CHECK-NEXT: movsd %xmm0, vf64(%rip) +; CHECK-NEXT: retq +} + +define void @TestCastF32_F128() { +entry: + %0 = load float, float* @vf32, align 4 + %conv = fpext float %0 to fp128 + store fp128 %conv, fp128* @vf128, align 16 + ret void +; CHECK-LABEL: TestCastF32_F128: +; CHECK: movss vf32(%rip), %xmm0 +; CHECK-NEXT: callq __extendsftf2 +; CHECK-NEXT: movaps %xmm0, vf128(%rip) +; CHECK: retq +} + +define void @TestCastF64_I32() { +entry: + %0 = load double, double* @vf64, align 8 + %conv = fptosi double %0 to i32 + store i32 %conv, i32* @vi32, align 4 + ret void +; CHECK-LABEL: TestCastF64_I32: +; CHECK: cvttsd2si vf64(%rip), %eax +; CHECK-NEXT: movl %eax, vi32(%rip) +; CHECK-NEXT: retq +} + +define void @TestCastF64_I64() { +entry: + %0 = load double, double* @vf64, align 8 + %conv = fptosi double %0 to i32 + %conv1 = sext i32 %conv to i64 + store i64 %conv1, i64* @vi64, align 8 + ret void +; CHECK-LABEL: TestCastF64_I64: +; CHECK: cvttsd2si vf64(%rip), %eax +; CHECK-NEXT: cltq +; CHECK-NEXT: movq %rax, vi64(%rip) +; CHECK-NEXT: retq +} + +define void @TestCastF64_F32() { +entry: + %0 = load double, double* @vf64, align 8 + %conv = fptrunc double %0 to float + store float %conv, float* @vf32, align 4 + ret void +; CHECK-LABEL: TestCastF64_F32: +; CHECK: movsd vf64(%rip), %xmm0 +; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0 +; CHECK-NEXT: movss %xmm0, vf32(%rip) +; CHECK-NEXT: retq +} + +define void @TestCastF64_F128() { +entry: + %0 = load double, double* @vf64, align 8 + %conv = fpext double %0 to fp128 + store fp128 %conv, fp128* @vf128, align 16 + ret void +; CHECK-LABEL: TestCastF64_F128: +; CHECK: movsd vf64(%rip), %xmm0 +; CHECK-NEXT: callq __extenddftf2 +; CHECK-NEXT: movapd %xmm0, vf128(%rip) +; CHECK: ret +} + +define void @TestCastF128_I32() { +entry: + %0 = load fp128, fp128* @vf128, align 16 + %conv = fptosi fp128 %0 to i32 + store i32 %conv, i32* @vi32, align 4 + ret void +; CHECK-LABEL: TestCastF128_I32: +; CHECK: movaps vf128(%rip), %xmm0 +; CHECK-NEXT: callq __fixtfsi +; CHECK-NEXT: movl %eax, vi32(%rip) +; CHECK: retq +} + +define void @TestCastF128_I64() { +entry: + %0 = load fp128, fp128* @vf128, align 16 + %conv = fptosi fp128 %0 to i32 + %conv1 = sext i32 %conv to i64 + store i64 %conv1, i64* @vi64, align 8 + ret void +; CHECK-LABEL: TestCastF128_I64: +; CHECK: movaps vf128(%rip), %xmm0 +; CHECK-NEXT: callq __fixtfsi +; CHECK-NEXT: cltq +; CHECK-NEXT: movq %rax, vi64(%rip) +; CHECK: retq +} + +define void @TestCastF128_F32() { +entry: + %0 = load fp128, fp128* @vf128, align 16 + %conv = fptrunc fp128 %0 to float + store float %conv, float* @vf32, align 4 + ret void +; CHECK-LABEL: TestCastF128_F32: +; CHECK: movaps vf128(%rip), %xmm0 +; CHECK-NEXT: callq __trunctfsf2 +; CHECK-NEXT: movss %xmm0, vf32(%rip) +; CHECK: retq +} + +define void @TestCastF128_F64() { +entry: + %0 = load fp128, fp128* @vf128, align 16 + %conv = fptrunc fp128 %0 to double + store double %conv, double* @vf64, align 8 + ret void +; CHECK-LABEL: TestCastF128_F64: +; CHECK: movapd vf128(%rip), %xmm0 +; CHECK-NEXT: callq __trunctfdf2 +; CHECK-NEXT: movsd %xmm0, vf64(%rip) +; CHECK: retq +} + +define void @TestCastI32_I64() { +entry: + %0 = load i32, i32* @vi32, align 4 + %conv = sitofp i32 %0 to float + %conv1 = fptosi float %conv to i64 + store i64 %conv1, i64* @vi64, align 8 + ret void +; CHECK-LABEL: TestCastI32_I64: +; CHECK: cvtsi2ssl vi32(%rip), %xmm0 +; CHECK-NEXT: cvttss2si %xmm0, %rax +; CHECK-NEXT: movq %rax, vi64(%rip) +; CHECK-NEXT: retq +} + +define void @TestCastI32_F32() { +entry: + %0 = load i32, i32* @vi32, align 4 + %conv = sitofp i32 %0 to float + store float %conv, float* @vf32, align 4 + ret void +; CHECK-LABEL: TestCastI32_F32: +; CHECK: cvtsi2ssl vi32(%rip), %xmm0 +; CHECK-NEXT: movss %xmm0, vf32(%rip) +; CHECK-NEXT: retq +} + +define void @TestCastI32_F64() { +entry: + %0 = load i32, i32* @vi32, align 4 + %conv = sitofp i32 %0 to double + store double %conv, double* @vf64, align 8 + ret void +; CHECK-LABEL: TestCastI32_F64: +; CHECK: cvtsi2sdl vi32(%rip), %xmm0 +; CHECK-NEXT: movsd %xmm0, vf64(%rip) +; CHECK-NEXT: retq +} + +define void @TestCastI32_F128() { +entry: + %0 = load i32, i32* @vi32, align 4 + %conv = sitofp i32 %0 to fp128 + store fp128 %conv, fp128* @vf128, align 16 + ret void +; CHECK-LABEL: TestCastI32_F128: +; CHECK: movl vi32(%rip), %edi +; CHECK-NEXT: callq __floatsitf +; CHECK-NEXT: movaps %xmm0, vf128(%rip) +; CHECK: retq +} + +define void @TestCastI64_I32(){ +entry: + %0 = load i64, i64* @vi64, align 8 + %conv = sitofp i64 %0 to float + %conv1 = fptosi float %conv to i32 + store i32 %conv1, i32* @vi32, align 4 + ret void +; CHECK-LABEL: TestCastI64_I32: +; CHECK: cvtsi2ssq vi64(%rip), %xmm0 +; CHECK: cvttss2si %xmm0, %eax +; CHECK: movl %eax, vi32(%rip) +; CHECK: retq +} + +define void @TestCastI64_F32(){ +entry: + %0 = load i64, i64* @vi64, align 8 + %conv = sitofp i64 %0 to float + store float %conv, float* @vf32, align 4 + ret void +; CHECK-LABEL: TestCastI64_F32: +; CHECK: cvtsi2ssq vi64(%rip), %xmm0 +; CHECK-NEXT: movss %xmm0, vf32(%rip) +; CHECK-NEXT: retq +} + +define void @TestCastI64_F64(){ +entry: + %0 = load i64, i64* @vi64, align 8 + %conv = sitofp i64 %0 to double + store double %conv, double* @vf64, align 8 + ret void +; CHECK-LABEL: TestCastI64_F64: +; CHECK: cvtsi2sdq vi64(%rip), %xmm0 +; CHECK-NEXT: movsd %xmm0, vf64(%rip) +; CHECK-NEXT: retq +} + +define void @TestCastI64_F128(){ +entry: + %0 = load i64, i64* @vi64, align 8 + %conv = sitofp i64 %0 to fp128 + store fp128 %conv, fp128* @vf128, align 16 + ret void +; CHECK-LABEL: TestCastI64_F128: +; CHECK: movq vi64(%rip), %rdi +; CHECK-NEXT: callq __floatditf +; CHECK-NEXT: movaps %xmm0, vf128(%rip) +; CHECK: retq +} + +define i32 @TestConst32(float %v) { +entry: + %cmp = fcmp ogt float %v, 1.000000e+00 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestConst32: +; CHECK: ucomiss {{.*}}, %xmm0 +; CHECK-NEXT: seta %al +; CHECK: retq +} + +define i32 @TestConst64(double %v) { +entry: + %cmp = fcmp ogt double %v, 1.000000e+00 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestConst64: +; CHECK: ucomisd {{.*}}, %xmm0 +; CHECK-NEXT: seta %al +; CHECK: retq +} + +define i32 @TestConst128(fp128 %v) { +entry: + %cmp = fcmp ogt fp128 %v, 0xL00000000000000003FFF000000000000 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestConst128: +; CHECK: movaps {{.*}}, %xmm1 +; CHECK-NEXT: callq __gttf2 +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: setg %al +; CHECK: retq +} + +define i32 @TestBits128(fp128 %ld) { +entry: + %mul = fmul fp128 %ld, %ld + %0 = bitcast fp128 %mul to i128 + %u.sroa.0.4.extract.shift = lshr i128 %0, 32 + %or5 = or i128 %u.sroa.0.4.extract.shift, %0 + %or = trunc i128 %or5 to i32 + %cmp = icmp eq i32 %or, 0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestBits128: +; CHECK: movaps %xmm0, %xmm1 +; CHECK-NEXT: callq __multf3 +; CHECK-NEXT: movaps %xmm0, (%rsp) +; CHECK-NEXT: movq (%rsp), +; CHECK-NEXT: movq % +; CHECK-NEXT: shrq $32, +; CHECK: orl +; CHECK-NEXT: sete %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK: retq +} + +define fp128 @TestPair128(i64 %a, i64 %b) { +entry: + %conv = zext i64 %a to i128 + %shl = shl nuw i128 %conv, 64 + %conv1 = zext i64 %b to i128 + %or = or i128 %shl, %conv1 + %add = add i128 %or, 3 + %0 = bitcast i128 %add to fp128 + ret fp128 %0 +; CHECK-LABEL: TestPair128: +; CHECK: addq $3, %rsi +; CHECK-NEXT: movq %rsi, -24(%rsp) +; CHECK-NEXT: adcq $0, %rdi +; CHECK-NEXT: movq %rdi, -16(%rsp) +; CHECK-NEXT: movaps -24(%rsp), %xmm0 +; CHECK-NEXT: retq +} + +define fp128 @TestTruncCopysign(fp128 %x, i32 %n) { +entry: + %cmp = icmp sgt i32 %n, 50000 + br i1 %cmp, label %if.then, label %cleanup + +if.then: ; preds = %entry + %conv = fptrunc fp128 %x to double + %call = tail call double @copysign(double 0x7FF0000000000000, double %conv) #2 + %conv1 = fpext double %call to fp128 + br label %cleanup + +cleanup: ; preds = %entry, %if.then + %retval.0 = phi fp128 [ %conv1, %if.then ], [ %x, %entry ] + ret fp128 %retval.0 +; CHECK-LABEL: TestTruncCopysign: +; CHECK: callq __trunctfdf2 +; CHECK-NEXT: andpd {{.*}}, %xmm0 +; CHECK-NEXT: orpd {{.*}}, %xmm0 +; CHECK-NEXT: callq __extenddftf2 +; CHECK: retq +} + +declare double @copysign(double, double) #1 + +attributes #2 = { nounwind readnone } Index: test/CodeGen/X86/fp128-compare.ll =================================================================== --- test/CodeGen/X86/fp128-compare.ll +++ test/CodeGen/X86/fp128-compare.ll @@ -0,0 +1,211 @@ +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android | FileCheck %s +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu | FileCheck %s + +define i32 @TestComp32GT(float %d1, float %d2) { +entry: + %cmp = fcmp ogt float %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp32GT: +; CHECK: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: seta %al +} + +define i32 @TestComp64GT(double %d1, double %d2) { +entry: + %cmp = fcmp ogt double %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp64GT: +; CHECK: ucomisd %xmm1, %xmm0 +; CHECK-NEXT: seta %al +} + +define i32 @TestComp128GT(fp128 %d1, fp128 %d2) { +entry: + %cmp = fcmp ogt fp128 %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp128GT: +; CHECK: callq __gttf2 +; CHECK: setg %al +; CHECK: retq +} + +define i32 @TestComp32GE(float %d1, float %d2) { +entry: + %cmp = fcmp oge float %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp32GE: +; CHECK: ucomiss %xmm1, %xmm0 +; CHECK-NEXT: setae %al +} + +define i32 @TestComp64GE(double %d1, double %d2) { +entry: + %cmp = fcmp oge double %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp64GE: +; CHECK: ucomisd %xmm1, %xmm0 +; CHECK-NEXT: setae %al +} + +define i32 @TestComp128GE(fp128 %d1, fp128 %d2) { +entry: + %cmp = fcmp oge fp128 %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp128GE: +; CHECK: callq __getf2 +; CHECK: testl %eax, %eax +; CHECK: retq +} + +define i32 @TestComp32LT(float %d1, float %d2) { +entry: + %cmp = fcmp olt float %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp32LT: +; CHECK: ucomiss %xmm0, %xmm1 +; CHECK-NEXT: seta %al +} + +define i32 @TestComp64LT(double %d1, double %d2) { +entry: + %cmp = fcmp olt double %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp64LT: +; CHECK: ucomisd %xmm0, %xmm1 +; CHECK-NEXT: seta %al +} + +define i32 @TestComp128LT(fp128 %d1, fp128 %d2) { +entry: + %cmp = fcmp olt fp128 %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp128LT: +; CHECK: callq __lttf2 +; CHECK-NEXT: shrl $31, %eax +; CHECK: retq +} + +define i32 @TestComp32LE(float %d1, float %d2) { +entry: + %cmp = fcmp ole float %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp32LE: +; CHECK: ucomiss %xmm0, %xmm1 +; CHECK-NEXT: setae %al +} + +define i32 @TestComp64LE(double %d1, double %d2) { +entry: + %cmp = fcmp ole double %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp64LE: +; CHECK: ucomisd %xmm0, %xmm1 +; CHECK-NEXT: setae %al +} + +define i32 @TestComp128LE(fp128 %d1, fp128 %d2) { +entry: + %cmp = fcmp ole fp128 %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp128LE: +; CHECK: callq __letf2 +; CHECK-NEXT: testl %eax, %eax +; CHECK: retq +} + +define i32 @TestComp32EQ(float %d1, float %d2) { +entry: + %cmp = fcmp oeq float %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp32EQ: +; CHECK: cmpeqss %xmm1, %xmm0 +; CHECK-NEXT: movd %xmm0, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: retq +} + +define i32 @TestComp64EQ(double %d1, double %d2) { +entry: + %cmp = fcmp oeq double %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp64EQ: +; CHECK: cmpeqsd %xmm1, %xmm0 +; CHECK-NEXT: movd %xmm0, %rax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: retq +} + +define i32 @TestComp128EQ(fp128 %d1, fp128 %d2) { +entry: + %cmp = fcmp oeq fp128 %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp128EQ: +; CHECK: callq __eqtf2 +; CHECK-NEXT: testl %eax, %eax +; CHECK: retq +} + +define i32 @TestComp32NE(float %d1, float %d2) { +entry: + %cmp = fcmp une float %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp32NE: +; CHECK: cmpneqss %xmm1, %xmm0 +; CHECK-NEXT: movd %xmm0, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: retq +} + +define i32 @TestComp64NE(double %d1, double %d2) { +entry: + %cmp = fcmp une double %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp64NE: +; CHECK: cmpneqsd %xmm1, %xmm0 +; CHECK-NEXT: movd %xmm0, %rax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: retq +} + +define i32 @TestComp128NE(fp128 %d1, fp128 %d2) { +entry: + %cmp = fcmp une fp128 %d1, %d2 + %conv = zext i1 %cmp to i32 + ret i32 %conv +; CHECK-LABEL: TestComp128NE: +; CHECK: callq __netf2 +; CHECK-NEXT: testl %eax, %eax +; CHECK: retq +} + +define fp128 @TestMax(fp128 %x, fp128 %y) { +entry: + %cmp = fcmp ogt fp128 %x, %y + %cond = select i1 %cmp, fp128 %x, fp128 %y + ret fp128 %cond +; CHECK-LABEL: TestMax: +; CHECK: movaps %xmm1 +; CHECK: movaps %xmm0 +; CHECK: callq __gttf2 +; CHECK: movaps {{.*}}, %xmm0 +; CHECK: testl %eax, %eax +; CHECK: movaps {{.*}}, %xmm0 +; CHECK: retq +} Index: test/CodeGen/X86/fp128-i128.ll =================================================================== --- test/CodeGen/X86/fp128-i128.ll +++ test/CodeGen/X86/fp128-i128.ll @@ -0,0 +1,241 @@ +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android | FileCheck %s +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu | FileCheck %s + +; Check some i128 instruction patterns triggered by fp128. + +define void @TestUnionLD1(fp128 %s, i64 %n) #0 { +entry: + %0 = bitcast fp128 %s to i128 + %1 = zext i64 %n to i128 + %bf.value = shl nuw i128 %1, 64 + %bf.shl = and i128 %bf.value, 5192296858534809181786422619668480 + %bf.clear = and i128 %0, -5192296858534809181786422619668481 + %bf.set = or i128 %bf.shl, %bf.clear + %2 = bitcast i128 %bf.set to fp128 + tail call void @foo(fp128 %2) #2 + ret void +; CHECK-LABEL: TestUnionLD1: +; CHECK: movaps %xmm0, -24(%rsp) +; CHECK-NEXT: movq -24(%rsp), %rax +; CHECK-NEXT: movabsq $281474976710655, %rcx +; CHECK-NEXT: andq %rdi, %rcx +; CHECK-NEXT: movabsq $-281474976710656, %rdx +; CHECK-NEXT: andq -16(%rsp), %rdx +; CHECK-NEXT: movq %rax, -40(%rsp) +; CHECK-NEXT: orq %rcx, %rdx +; CHECK-NEXT: movq %rdx, -32(%rsp) +; CHECK-NEXT: movaps -40(%rsp), %xmm0 +; CHECK-NEXT: jmp foo +} + +define fp128 @TestUnionLD2(fp128 %s) #0 { +entry: + %0 = bitcast fp128 %s to i128 + %bf.clear = and i128 %0, -18446744073709551616 + %1 = bitcast i128 %bf.clear to fp128 + ret fp128 %1 +; CHECK-LABEL: TestUnionLD2: +; CHECK: movaps %xmm0, -24(%rsp) +; CHECK-NEXT: movq -16(%rsp), %rax +; CHECK-NEXT: movq %rax, -32(%rsp) +; CHECK-NEXT: movq $0, -40(%rsp) +; CHECK-NEXT: movaps -40(%rsp), %xmm0 +; CHECK-NEXT: retq +} + +define fp128 @TestI128_1(fp128 %x) #0 { +entry: + %0 = bitcast fp128 %x to i128 + %bf.clear = and i128 %0, 170141183460469231731687303715884105727 + %1 = bitcast i128 %bf.clear to fp128 + %cmp = fcmp olt fp128 %1, 0xL999999999999999A3FFB999999999999 + %cond = select i1 %cmp, fp128 0xL00000000000000003FFF000000000000, fp128 0xL00000000000000004000000000000000 + ret fp128 %cond +; CHECK-LABEL: TestI128_1: +; CHECK: movaps %xmm0, +; CHECK: movabsq $9223372036854775807, +; CHECK: callq __lttf2 +; CHECK: testl %eax, %eax +; CHECK: movaps {{.*}}, %xmm0 +; CHECK: retq +} + +define fp128 @TestI128_2(fp128 %x, fp128 %y) #0 { +entry: + %0 = bitcast fp128 %x to i128 + %cmp = icmp sgt i128 %0, -1 + %cond = select i1 %cmp, fp128 %x, fp128 %y + ret fp128 %cond +; CHECK-LABEL: TestI128_2: +; CHECK: movaps %xmm0, -24(%rsp) +; CHECK-NEXT: cmpq $0, -16(%rsp) +; CHECK-NEXT: jns +; CHECK: movaps %xmm1, %xmm0 +; CHECK: retq +} + +define fp128 @TestI128_3(fp128 %x, i32* nocapture readnone %ex) #0 { +entry: + %0 = bitcast fp128 %x to i128 + %bf.cast = and i128 %0, 170135991163610696904058773219554885632 + %cmp = icmp eq i128 %bf.cast, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %mul = fmul fp128 %x, 0xL00000000000000004201000000000000 + %1 = bitcast fp128 %mul to i128 + %bf.clear4 = and i128 %1, -170135991163610696904058773219554885633 + %bf.set = or i128 %bf.clear4, 85060207136517546210586590865283612672 + br label %if.end + +if.end: ; preds = %if.then, %entry + %u.sroa.0.0 = phi i128 [ %bf.set, %if.then ], [ %0, %entry ] + %2 = bitcast i128 %u.sroa.0.0 to fp128 + ret fp128 %2 +; CHECK-LABEL: TestI128_3: +; CHECK: movaps %xmm0, +; CHECK: movabsq $9223090561878065152, +; CHECK: testq +; CHECK: callq __multf3 +; CHECK-NEXT: movaps %xmm0 +; CHECK: movabsq $-9223090561878065153, +; CHECK: movabsq $4611123068473966592, +; CHECK: retq +} + +define fp128 @TestI128_4(fp128 %x) #0 { +entry: + %0 = bitcast fp128 %x to i128 + %bf.clear = and i128 %0, -18446744073709551616 + %1 = bitcast i128 %bf.clear to fp128 + %add = fadd fp128 %1, %x + ret fp128 %add +; CHECK-LABEL: TestI128_4: +; CHECK: movaps %xmm0, %xmm1 +; CHECK-NEXT: movaps %xmm1, 16(%rsp) +; CHECK-NEXT: movq 24(%rsp), %rax +; CHECK-NEXT: movq %rax, 8(%rsp) +; CHECK-NEXT: movq $0, (%rsp) +; CHECK-NEXT: movaps (%rsp), %xmm0 +; CHECK-NEXT: callq __addtf3 +; CHECK: retq +} + +define { i64, i64 } @TestShift128(i64 %x.coerce0, i64 %x.coerce1) #0 { +entry: + %.fca.1.insert = insertvalue { i64, i64 } { i64 0, i64 undef }, i64 %x.coerce0, 1 + ret { i64, i64 } %.fca.1.insert +; CHECK-LABEL: TestShift128: +; CHECK: xorl %eax, %eax +; CHECK-NEXT: movq %rdi, %rdx +; CHECK-NEXT: retq +} + +@v128 = common global i128 0, align 16 +@v128_2 = common global i128 0, align 16 + +define void @TestShift128_2() #2 { +entry: + %0 = load i128, i128* @v128, align 16 + %shl = shl i128 %0, 96 + %1 = load i128, i128* @v128_2, align 16 + %or = or i128 %shl, %1 + store i128 %or, i128* @v128, align 16 + ret void +; CHECK-LABEL: TestShift128_2: +; CHECK: movq v128(%rip), %rax +; CHECK-NEXT: shlq $32, %rax +; CHECK-NEXT: movq v128_2(%rip), %rcx +; CHECK-NEXT: orq v128_2+8(%rip), %rax +; CHECK-NEXT: movq %rcx, v128(%rip) +; CHECK-NEXT: movq %rax, v128+8(%rip) +; CHECK-NEXT: retq +} + +define fp128 @acosl(fp128 %x) #0 { +entry: + %0 = bitcast fp128 %x to i128 + %bf.clear = and i128 %0, -18446744073709551616 + %1 = bitcast i128 %bf.clear to fp128 + %add = fadd fp128 %1, %x + ret fp128 %add +; CHECK-LABEL: acosl: +; CHECK: movaps %xmm0, %xmm1 +; CHECK-NEXT: movaps %xmm1, 16(%rsp) +; CHECK-NEXT: movq 24(%rsp), %rax +; CHECK-NEXT: movq %rax, 8(%rsp) +; CHECK-NEXT: movq $0, (%rsp) +; CHECK-NEXT: movaps (%rsp), %xmm0 +; CHECK-NEXT: callq __addtf3 +; CHECK: retq +} + +; Compare i128 values and check i128 constants. +define fp128 @TestComp(fp128 %x, fp128 %y) #0 { +entry: + %0 = bitcast fp128 %x to i128 + %cmp = icmp sgt i128 %0, -1 + %cond = select i1 %cmp, fp128 %x, fp128 %y + ret fp128 %cond +; CHECK-LABEL: TestComp: +; CHECK: movaps %xmm0, -24(%rsp) +; CHECK-NEXT: cmpq $0, -16(%rsp) +; CHECK-NEXT: jns +; CHECK: movaps %xmm1, %xmm0 +; CHECK: retq +} + +declare void @foo(fp128) #1 + +; Test logical operations on fp128 values. +define fp128 @TestFABS_LD(fp128 %x) #0 { +entry: + %call = tail call fp128 @fabsl(fp128 %x) #2 + ret fp128 %call +; CHECK-LABEL: TestFABS_LD +; CHECK: andps {{.*}}, %xmm0 +; CHECK-NEXT: retq +} + +declare fp128 @fabsl(fp128) #1 + +declare fp128 @copysignl(fp128, fp128) #1 + +; Test more complicated logical operations generated from copysignl. +define void @TestCopySign({ fp128, fp128 }* noalias nocapture sret %agg.result, { fp128, fp128 }* byval nocapture readonly align 16 %z) #0 { +entry: + %z.realp = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %z, i64 0, i32 0 + %z.real = load fp128, fp128* %z.realp, align 16 + %z.imagp = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %z, i64 0, i32 1 + %z.imag4 = load fp128, fp128* %z.imagp, align 16 + %cmp = fcmp ogt fp128 %z.real, %z.imag4 + %sub = fsub fp128 %z.imag4, %z.imag4 + br i1 %cmp, label %if.then, label %cleanup + +if.then: ; preds = %entry + %call = tail call fp128 @fabsl(fp128 %sub) #2 + br label %cleanup + +cleanup: ; preds = %entry, %if.then + %z.real.sink = phi fp128 [ %z.real, %if.then ], [ %sub, %entry ] + %call.sink = phi fp128 [ %call, %if.then ], [ %z.real, %entry ] + %call5 = tail call fp128 @copysignl(fp128 %z.real.sink, fp128 %z.imag4) #2 + %0 = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %agg.result, i64 0, i32 0 + %1 = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %agg.result, i64 0, i32 1 + store fp128 %call.sink, fp128* %0, align 16 + store fp128 %call5, fp128* %1, align 16 + ret void +; CHECK-LABEL: TestCopySign +; CHECK-NOT: call +; CHECK: callq __subtf3 +; CHECK-NOT: call +; CHECK: callq __gttf2 +; CHECK-NOT: call +; CHECK: andps {{.*}}, %xmm0 +; CHECK: retq +} + + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+ssse3,+sse3,+popcnt,+sse,+sse2,+sse4.1,+sse4.2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+ssse3,+sse3,+popcnt,+sse,+sse2,+sse4.1,+sse4.2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone } Index: test/CodeGen/X86/fp128-libcalls.ll =================================================================== --- test/CodeGen/X86/fp128-libcalls.ll +++ test/CodeGen/X86/fp128-libcalls.ll @@ -0,0 +1,201 @@ +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android | FileCheck %s +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu | FileCheck %s + +; Check all soft floating point library function calls. + +@vf64 = common global double 0.000000e+00, align 8 +@vf128 = common global fp128 0xL00000000000000000000000000000000, align 16 + +define void @Test64Add(double %d1, double %d2) { +entry: + %add = fadd double %d1, %d2 + store double %add, double* @vf64, align 8 + ret void +; CHECK-LABEL: Test64Add: +; CHECK: addsd %xmm1, %xmm0 +; CHECK-NEXT: movsd %xmm0, vf64(%rip) +; CHECK-NEXT: retq +} + +define void @Test64_1Add(double %d1) { +entry: + %0 = load double, double* @vf64, align 8 + %add = fadd double %0, %d1 + store double %add, double* @vf64, align 8 + ret void +; CHECK-LABEL: Test64_1Add: +; CHECK: addsd vf64(%rip), %xmm0 +; CHECK-NEXT: movsd %xmm0, vf64(%rip) +; CHECK-NEXT: retq +} + +define void @Test128Add(fp128 %d1, fp128 %d2) { +entry: + %add = fadd fp128 %d1, %d2 + store fp128 %add, fp128* @vf128, align 16 + ret void +; CHECK-LABEL: Test128Add: +; CHECK: callq __addtf3 +; CHECK-NEXT: movaps %xmm0, vf128(%rip) +; CHECK: retq +} + +define void @Test128_1Add(fp128 %d1){ +entry: + %0 = load fp128, fp128* @vf128, align 16 + %add = fadd fp128 %0, %d1 + store fp128 %add, fp128* @vf128, align 16 + ret void +; CHECK-LABEL: Test128_1Add: +; CHECK: movaps %xmm0, %xmm1 +; CHECK-NEXT: movaps vf128(%rip), %xmm0 +; CHECK-NEXT: callq __addtf3 +; CHECK-NEXT: movaps %xmm0, vf128(%rip) +; CHECK: retq +} + +define void @Test64Sub(double %d1, double %d2){ +entry: + %sub = fsub double %d1, %d2 + store double %sub, double* @vf64, align 8 + ret void +; CHECK-LABEL: Test64Sub: +; CHECK: subsd %xmm1, %xmm0 +; CHECK-NEXT: movsd %xmm0, vf64(%rip) +; CHECK-NEXT: retq +} + +define void @Test64_1Sub(double %d1){ +entry: + %0 = load double, double* @vf64, align 8 + %sub = fsub double %0, %d1 + store double %sub, double* @vf64, align 8 + ret void +; CHECK-LABEL: Test64_1Sub: +; CHECK: movsd vf64(%rip), %xmm1 +; CHECK-NEXT: subsd %xmm0, %xmm1 +; CHECK-NEXT: movsd %xmm1, vf64(%rip) +; CHECK-NEXT: retq +} + +define void @Test128Sub(fp128 %d1, fp128 %d2){ +entry: + %sub = fsub fp128 %d1, %d2 + store fp128 %sub, fp128* @vf128, align 16 + ret void +; CHECK-LABEL: Test128Sub: +; CHECK: callq __subtf3 +; CHECK-NEXT: movaps %xmm0, vf128(%rip) +; CHECK: retq +} + +define void @Test128_1Sub(fp128 %d1){ +entry: + %0 = load fp128, fp128* @vf128, align 16 + %sub = fsub fp128 %0, %d1 + store fp128 %sub, fp128* @vf128, align 16 + ret void +; CHECK-LABEL: Test128_1Sub: +; CHECK: movaps %xmm0, %xmm1 +; CHECK-NEXT: movaps vf128(%rip), %xmm0 +; CHECK-NEXT: callq __subtf3 +; CHECK-NEXT: movaps %xmm0, vf128(%rip) +; CHECK: retq +} + +define void @Test64Mul(double %d1, double %d2){ +entry: + %mul = fmul double %d1, %d2 + store double %mul, double* @vf64, align 8 + ret void +; CHECK-LABEL: Test64Mul: +; CHECK: mulsd %xmm1, %xmm0 +; CHECK-NEXT: movsd %xmm0, vf64(%rip) +; CHECK-NEXT: retq +} + +define void @Test64_1Mul(double %d1){ +entry: + %0 = load double, double* @vf64, align 8 + %mul = fmul double %0, %d1 + store double %mul, double* @vf64, align 8 + ret void +; CHECK-LABEL: Test64_1Mul: +; CHECK: mulsd vf64(%rip), %xmm0 +; CHECK-NEXT: movsd %xmm0, vf64(%rip) +; CHECK-NEXT: retq +} + +define void @Test128Mul(fp128 %d1, fp128 %d2){ +entry: + %mul = fmul fp128 %d1, %d2 + store fp128 %mul, fp128* @vf128, align 16 + ret void +; CHECK-LABEL: Test128Mul: +; CHECK: callq __multf3 +; CHECK-NEXT: movaps %xmm0, vf128(%rip) +; CHECK: retq +} + +define void @Test128_1Mul(fp128 %d1){ +entry: + %0 = load fp128, fp128* @vf128, align 16 + %mul = fmul fp128 %0, %d1 + store fp128 %mul, fp128* @vf128, align 16 + ret void +; CHECK-LABEL: Test128_1Mul: +; CHECK: movaps %xmm0, %xmm1 +; CHECK-NEXT: movaps vf128(%rip), %xmm0 +; CHECK-NEXT: callq __multf3 +; CHECK-NEXT: movaps %xmm0, vf128(%rip) +; CHECK: retq +} + +define void @Test64Div(double %d1, double %d2){ +entry: + %div = fdiv double %d1, %d2 + store double %div, double* @vf64, align 8 + ret void +; CHECK-LABEL: Test64Div: +; CHECK: divsd %xmm1, %xmm0 +; CHECK-NEXT: movsd %xmm0, vf64(%rip) +; CHECK-NEXT: retq +} + +define void @Test64_1Div(double %d1){ +entry: + %0 = load double, double* @vf64, align 8 + %div = fdiv double %0, %d1 + store double %div, double* @vf64, align 8 + ret void +; CHECK-LABEL: Test64_1Div: +; CHECK: movsd vf64(%rip), %xmm1 +; CHECK-NEXT: divsd %xmm0, %xmm1 +; CHECK-NEXT: movsd %xmm1, vf64(%rip) +; CHECK-NEXT: retq +} + +define void @Test128Div(fp128 %d1, fp128 %d2){ +entry: + %div = fdiv fp128 %d1, %d2 + store fp128 %div, fp128* @vf128, align 16 + ret void +; CHECK-LABEL: Test128Div: +; CHECK: callq __divtf3 +; CHECK-NEXT: movaps %xmm0, vf128(%rip) +; CHECK: retq +} + +define void @Test128_1Div(fp128 %d1){ +entry: + %0 = load fp128, fp128* @vf128, align 16 + %div = fdiv fp128 %0, %d1 + store fp128 %div, fp128* @vf128, align 16 + ret void +; CHECK-LABEL: Test128_1Div: +; CHECK: movaps %xmm0, %xmm1 +; CHECK-NEXT: movaps vf128(%rip), %xmm0 +; CHECK-NEXT: callq __divtf3 +; CHECK-NEXT: movaps %xmm0, vf128(%rip) +; CHECK: retq +} Index: test/CodeGen/X86/fp128-load.ll =================================================================== --- test/CodeGen/X86/fp128-load.ll +++ test/CodeGen/X86/fp128-load.ll @@ -0,0 +1,69 @@ +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android | FileCheck %s +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu | FileCheck %s + +; double myD = 1.0; +@my_double = global double 1.000000e+00, align 8 + +; long double myFP80 = 1.0L; // x86_64-linux-gnu +@my_fp80 = global x86_fp80 0xK3FFF8000000000000000, align 16 + +; long double myFP128 = 1.0L; // x86_64-linux-android +@my_fp128 = global fp128 0xL00000000000000003FFF000000000000, align 16 + +define double @get_double() { +entry: + %0 = load double, double* @my_double, align 8 + ret double %0 +; CHECK-LABEL: get_double: +; CHECK: movsd my_double(%rip), %xmm0 +; CHECK-NEXT: retq +} + +define x86_fp80 @get_fp80() { +entry: + %0 = load x86_fp80, x86_fp80* @my_fp80, align 16 + ret x86_fp80 %0 +; CHECK-LABEL: get_fp80: +; CHECK: fldt my_fp80(%rip) +; CHECK-NEXT: retq +} + +define fp128 @get_fp128() { +entry: + %0 = load fp128, fp128* @my_fp128, align 16 + ret fp128 %0 +; CHECK-LABEL: get_fp128: +; CHECK: movaps my_fp128(%rip), %xmm0 +; CHECK-NEXT: retq +} + +@TestLoadExtend.data = internal unnamed_addr constant [2 x float] [float 0x3FB99999A0000000, float 0x3FC99999A0000000], align 4 + +define fp128 @TestLoadExtend(fp128 %x, i32 %n) { +entry: + %idxprom = sext i32 %n to i64 + %arrayidx = getelementptr inbounds [2 x float], [2 x float]* @TestLoadExtend.data, i64 0, i64 %idxprom + %0 = load float, float* %arrayidx, align 4 + %conv = fpext float %0 to fp128 + ret fp128 %conv +; CHECK-LABEL: TestLoadExtend: +; CHECK: movslq %edi, %rax +; CHECK-NEXT: movss TestLoadExtend.data(,%rax,4), %xmm0 +; CHECK-NEXT: callq __extendsftf2 +; CHECK: retq +} + +; CHECK-LABEL: my_double: +; CHECK-NEXT: .quad 4607182418800017408 +; CHECK-NEXT: .size my_double, 8 + +; CHECK-LABEL: my_fp80: +; CHECK-NEXT: .quad -9223372036854775808 +; CHECK-NEXT: .short 16383 +; CHECK-NEXT: .zero 6 +; CHECK-NEXT: .size my_fp80, 16 + +; CHECK-LABEL: my_fp128: +; CHECK-NEXT: .quad 0 +; CHECK-NEXT: .quad 4611404543450677248 +; CHECK-NEXT: .size my_fp128, 16 Index: test/CodeGen/X86/fp128-store.ll =================================================================== --- test/CodeGen/X86/fp128-store.ll +++ test/CodeGen/X86/fp128-store.ll @@ -0,0 +1,39 @@ +; RUN: llc < %s -O2 -mtriple=x86_64-linux-android | FileCheck %s +; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu | FileCheck %s + +; double myD = 1.0; +@myD = global double 1.000000e+00, align 8 + +; long double myFP80 = 1.0L; // x86_64-linux-gnu +@myFP80 = global x86_fp80 0xK3FFF8000000000000000, align 16 + +; long double myFP128 = 1.0L; // x86_64-linux-android +@myFP128 = global fp128 0xL00000000000000003FFF000000000000, align 16 + +define void @set_D(double %x) { +entry: + store double %x, double* @myD, align 8 + ret void +; CHECK-LABEL: set_D: +; CHECK: movsd %xmm0, myD(%rip) +; CHECK-NEXT: retq +} + +define void @set_FP80(x86_fp80 %x) { +entry: + store x86_fp80 %x, x86_fp80* @myFP80, align 16 + ret void +; CHECK-LABEL: set_FP80: +; CHECK: fldt 8(%rsp) +; CHECK-NEXT: fstpt myFP80(%rip) +; CHECK-NEXT: retq +} + +define void @set_FP128(fp128 %x) { +entry: + store fp128 %x, fp128* @myFP128, align 16 + ret void +; CHECK-LABEL: set_FP128: +; CHECK: movaps %xmm0, myFP128(%rip) +; CHECK-NEXT: retq +} Index: test/CodeGen/X86/soft-fp.ll =================================================================== --- test/CodeGen/X86/soft-fp.ll +++ test/CodeGen/X86/soft-fp.ll @@ -1,8 +1,14 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2,+soft-float | FileCheck %s -; RUN: llc < %s -march=x86-64 -mattr=+sse2,+soft-float | FileCheck %s +; RUN: llc < %s -march=x86 -mattr=+sse,+soft-float \ +; RUN: | FileCheck %s --check-prefix=SOFT1 --check-prefix=CHECK +; RUN: llc < %s -march=x86-64 -mattr=+sse2,+soft-float \ +; RUN: | FileCheck %s --check-prefix=SOFT2 --check-prefix=CHECK +; RUN: llc < %s -march=x86-64 -mattr=+sse \ +; RUN: | FileCheck %s --check-prefix=SSE1 --check-prefix=CHECK +; RUN: llc < %s -march=x86-64 -mattr=+sse2 \ +; RUN: | FileCheck %s --check-prefix=SSE2 --check-prefix=CHECK ; RUN: llc < %s -mtriple=x86_64-gnux32 -mattr=+sse2,+soft-float | FileCheck %s -; CHECK-NOT: xmm{[0-9]+} +; CHECK-NOT: xmm{{[0-9]+}} %struct.__va_list_tag = type { i32, i32, i8*, i8* } @@ -15,6 +21,8 @@ call void @bar(%struct.__va_list_tag* %va3) nounwind call void @llvm.va_end(i8* %va12) ret i32 undef +; CHECK-LABEL: t1: +; CHECK: ret{{[lq]}} } declare void @llvm.va_start(i8*) nounwind @@ -27,4 +35,23 @@ entry: %0 = fadd float %a, %b ; [#uses=1] ret float %0 +; CHECK-LABEL: t2: +; SOFT1-NOT: xmm{{[0-9]+}} +; SOFT2-NOT: xmm{{[0-9]+}} +; SSE1: xmm{{[0-9]+}} +; SSE2: xmm{{[0-9]+}} +; CHECK: ret{{[lq]}} } + +; soft-float means no SSE instruction and passing fp128 as pair of i64. +define fp128 @t3(fp128 %a, fp128 %b) nounwind readnone { +entry: + %0 = fadd fp128 %b, %a + ret fp128 %0 +; CHECK-LABEL: t3: +; SOFT1-NOT: xmm{{[0-9]+}} +; SOFT2-NOT: xmm{{[0-9]+}} +; SSE1: xmm{{[0-9]+}} +; SSE2: xmm{{[0-9]+}} +; CHECK: ret{{[lq]}} +} Index: utils/TableGen/X86RecognizableInstr.cpp =================================================================== --- utils/TableGen/X86RecognizableInstr.cpp +++ utils/TableGen/X86RecognizableInstr.cpp @@ -951,6 +951,7 @@ TYPE("f128mem", TYPE_M128) TYPE("f256mem", TYPE_M256) TYPE("f512mem", TYPE_M512) + TYPE("FR128", TYPE_XMM128) TYPE("FR64", TYPE_XMM64) TYPE("FR64X", TYPE_XMM64) TYPE("f64mem", TYPE_M64FP) @@ -1069,6 +1070,7 @@ // register IDs in 8-bit immediates nowadays. ENCODING("FR32", ENCODING_IB) ENCODING("FR64", ENCODING_IB) + ENCODING("FR128", ENCODING_IB) ENCODING("VR128", ENCODING_IB) ENCODING("VR256", ENCODING_IB) ENCODING("FR32X", ENCODING_IB) @@ -1091,6 +1093,7 @@ ENCODING("GR8", ENCODING_RM) ENCODING("VR128", ENCODING_RM) ENCODING("VR128X", ENCODING_RM) + ENCODING("FR128", ENCODING_RM) ENCODING("FR64", ENCODING_RM) ENCODING("FR32", ENCODING_RM) ENCODING("FR64X", ENCODING_RM) @@ -1120,6 +1123,7 @@ ENCODING("GR64", ENCODING_REG) ENCODING("GR8", ENCODING_REG) ENCODING("VR128", ENCODING_REG) + ENCODING("FR128", ENCODING_REG) ENCODING("FR64", ENCODING_REG) ENCODING("FR32", ENCODING_REG) ENCODING("VR64", ENCODING_REG) @@ -1157,6 +1161,7 @@ ENCODING("GR32", ENCODING_VVVV) ENCODING("GR64", ENCODING_VVVV) ENCODING("FR32", ENCODING_VVVV) + ENCODING("FR128", ENCODING_VVVV) ENCODING("FR64", ENCODING_VVVV) ENCODING("VR128", ENCODING_VVVV) ENCODING("VR256", ENCODING_VVVV)