Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3517,6 +3517,16 @@ RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128)); break; + case ISD::FADD: + Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, + RTLIB::ADD_F80, RTLIB::ADD_F128, + RTLIB::ADD_PPCF128)); + break; + case ISD::FMUL: + Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64, + RTLIB::MUL_F80, RTLIB::MUL_F128, + RTLIB::MUL_PPCF128)); + break; case ISD::FP16_TO_FP: { if (Node->getValueType(0) == MVT::f32) { Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); @@ -3549,12 +3559,16 @@ } case ISD::FSUB: { EVT VT = Node->getValueType(0); - assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) && - TLI.isOperationLegalOrCustom(ISD::FNEG, VT) && - "Don't know how to expand this FP subtraction!"); - Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1)); - Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1); - Results.push_back(Tmp1); + if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) && + TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) { + Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1)); + Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1); + Results.push_back(Tmp1); + } else { + Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, + RTLIB::SUB_F80, RTLIB::SUB_F128, + RTLIB::SUB_PPCF128)); + } break; } case ISD::SUB: { Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7265,8 +7265,11 @@ } if (Args[i].isNest) Flags.setNest(); - if (NeedsRegBlock) + if (NeedsRegBlock) { Flags.setInConsecutiveRegs(); + if (Value == NumValues - 1) + Flags.setInConsecutiveRegsLast(); + } Flags.setOrigAlign(OriginalAlignment); MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); @@ -7312,10 +7315,6 @@ else if (j != 0) MyFlags.Flags.setOrigAlign(1); - // Only mark the end at the last register of the last value. - if (NeedsRegBlock && Value == NumValues - 1 && j == NumParts - 1) - MyFlags.Flags.setInConsecutiveRegsLast(); - CLI.Outs.push_back(MyFlags); CLI.OutVals.push_back(Parts[j]); } @@ -7530,8 +7529,11 @@ } if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) Flags.setNest(); - if (NeedsRegBlock) + if (NeedsRegBlock) { Flags.setInConsecutiveRegs(); + if (Value == NumValues - 1) + Flags.setInConsecutiveRegsLast(); + } Flags.setOrigAlign(OriginalAlignment); MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); @@ -7544,11 +7546,6 @@ // if it isn't first piece, alignment must be 1 else if (i > 0) MyFlags.Flags.setOrigAlign(1); - - // Only mark the end at the last register of the last value. - if (NeedsRegBlock && Value == NumValues - 1 && i == NumRegs - 1) - MyFlags.Flags.setInConsecutiveRegsLast(); - Ins.push_back(MyFlags); } PartBase += VT.getStoreSize(); Index: lib/Target/ARM/ARMBaseInstrInfo.cpp =================================================================== --- lib/Target/ARM/ARMBaseInstrInfo.cpp +++ lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -721,7 +721,7 @@ Opc = ARM::VMOVRS; else if (SPRDest && GPRSrc) Opc = ARM::VMOVSR; - else if (ARM::DPRRegClass.contains(DestReg, SrcReg)) + else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP()) Opc = ARM::VMOVD; else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) Opc = ARM::VORRq; @@ -781,6 +781,10 @@ BeginIdx = ARM::dsub_0; SubRegs = 4; Spacing = 2; + } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) { + Opc = ARM::VMOVS; + BeginIdx = ARM::ssub_0; + SubRegs = 2; } assert(Opc && "Impossible reg-to-reg copy"); @@ -1231,7 +1235,8 @@ // copyPhysReg() calls. Look for VMOVS instructions that can legally be // widened to VMOVD. We prefer the VMOVD when possible because it may be // changed into a VORR that can go down the NEON pipeline. - if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15()) + if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15() || + Subtarget.isFPOnlySP()) return false; // Look for a copy between even S-registers. That is where we keep floats Index: lib/Target/ARM/ARMCallingConv.h =================================================================== --- lib/Target/ARM/ARMCallingConv.h +++ lib/Target/ARM/ARMCallingConv.h @@ -177,8 +177,9 @@ CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { SmallVectorImpl &PendingHAMembers = State.getPendingLocs(); + // AAPCS HFAs must have 1-4 elements, all of the same type - assert(PendingHAMembers.size() < 8); + assert(PendingHAMembers.size() < 4); if (PendingHAMembers.size() > 0) assert(PendingHAMembers[0].getLocVT() == LocVT); @@ -188,7 +189,7 @@ CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); if (ArgFlags.isInConsecutiveRegsLast()) { - assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 8 && + assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 4 && "Homogeneous aggregates must have between 1 and 4 members"); // Try to allocate a contiguous block of registers, each of the correct @@ -196,7 +197,6 @@ const uint16_t *RegList; unsigned NumRegs; switch (LocVT.SimpleTy) { - case MVT::i32: case MVT::f32: RegList = SRegList; NumRegs = 16; @@ -235,20 +235,11 @@ State.AllocateReg(SRegList[regNo]); unsigned Size = LocVT.getSizeInBits() / 8; - unsigned Align = Size; - - if (LocVT.SimpleTy == MVT::v2f64 || LocVT.SimpleTy == MVT::i32) { - // Vectors are always aligned to 8 bytes. If we've seen an i32 here - // it's because it's been split from a larger type, also with align 8. - Align = 8; - } + unsigned Align = std::min(Size, 8U); for (auto It : PendingHAMembers) { It.convertToMem(State.AllocateStack(Size, Align)); State.addLoc(It); - - // Only the first member needs to be aligned. - Align = 1; } // All pending members have now been allocated Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -476,6 +476,10 @@ SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; unsigned getRegisterByName(const char* RegName, EVT VT) const override; @@ -565,6 +569,9 @@ bool mayBeEmittedAsTailCall(CallInst *CI) const override; + SDValue getCMOV(SDLoc dl, EVT VT, SDValue FalseVal, SDValue TrueVal, + SDValue ARMcc, SDValue CCR, SDValue Cmp, + SelectionDAG &DAG) const; SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMcc, SelectionDAG &DAG, SDLoc dl) const; SDValue getVFPCmp(SDValue LHS, SDValue RHS, Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -445,8 +445,7 @@ if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { addRegisterClass(MVT::f32, &ARM::SPRRegClass); - if (!Subtarget->isFPOnlySP()) - addRegisterClass(MVT::f64, &ARM::DPRRegClass); + addRegisterClass(MVT::f64, &ARM::DPRRegClass); } for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; @@ -628,6 +627,39 @@ if (!Subtarget->isThumb1Only()) setTargetDAGCombine(ISD::ADDC); + if (Subtarget->isFPOnlySP()) { + // When targetting a floating-point unit with only single-precision + // operations, f64 is legal for the few double-precision instructions which + // are present However, no double-precision operations other than moves, + // loads and stores are provided by the hardware. + setOperationAction(ISD::FADD, MVT::f64, Expand); + setOperationAction(ISD::FSUB, MVT::f64, Expand); + setOperationAction(ISD::FMUL, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FDIV, MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FGETSIGN, MVT::f64, Expand); + setOperationAction(ISD::FNEG, MVT::f64, Expand); + setOperationAction(ISD::FABS, MVT::f64, Expand); + setOperationAction(ISD::FSQRT, MVT::f64, Expand); + setOperationAction(ISD::FSIN, MVT::f64, Expand); + setOperationAction(ISD::FCOS, MVT::f64, Expand); + setOperationAction(ISD::FPOWI, MVT::f64, Expand); + setOperationAction(ISD::FPOW, MVT::f64, Expand); + setOperationAction(ISD::FLOG, MVT::f64, Expand); + setOperationAction(ISD::FLOG2, MVT::f64, Expand); + setOperationAction(ISD::FLOG10, MVT::f64, Expand); + setOperationAction(ISD::FEXP, MVT::f64, Expand); + setOperationAction(ISD::FEXP2, MVT::f64, Expand); + setOperationAction(ISD::FCEIL, MVT::f64, Expand); + setOperationAction(ISD::FTRUNC, MVT::f64, Expand); + setOperationAction(ISD::FRINT, MVT::f64, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); + setOperationAction(ISD::FFLOOR, MVT::f64, Expand); + setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); + setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); + } computeRegisterProperties(); @@ -3276,6 +3308,7 @@ SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, SDLoc dl) const { + assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64); SDValue Cmp; if (!isFloatingPointZero(RHS)) Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); @@ -3391,9 +3424,8 @@ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); EVT VT = Op.getValueType(); - return DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, SelectTrue, SelectFalse, - ARMcc, CCR, OverflowCmp); - + return getCMOV(SDLoc(Op), VT, SelectTrue, SelectFalse, ARMcc, CCR, + OverflowCmp, DAG); } // Convert: @@ -3427,7 +3459,7 @@ SDValue CCR = Cond.getOperand(3); SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); assert(True.getValueType() == VT); - return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); + return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG); } } } @@ -3497,6 +3529,32 @@ } } +SDValue ARMTargetLowering::getCMOV(SDLoc dl, EVT VT, SDValue FalseVal, + SDValue TrueVal, SDValue ARMcc, SDValue CCR, + SDValue Cmp, SelectionDAG &DAG) const { + if (Subtarget->isFPOnlySP() && VT == MVT::f64) { + FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), FalseVal); + TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), TrueVal); + + SDValue TrueLow = TrueVal.getValue(0); + SDValue TrueHigh = TrueVal.getValue(1); + SDValue FalseLow = FalseVal.getValue(0); + SDValue FalseHigh = FalseVal.getValue(1); + + SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow, + ARMcc, CCR, Cmp); + SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh, + ARMcc, CCR, duplicateCmp(Cmp, DAG)); + + return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High); + } else { + return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, + Cmp); + } +} + SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDValue LHS = Op.getOperand(0); @@ -3506,6 +3564,18 @@ SDValue FalseVal = Op.getOperand(3); SDLoc dl(Op); + if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) { + DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, + dl); + + // If softenSetCCOperands only returned one value, we should compare it to + // zero. + if (!RHS.getNode()) { + RHS = DAG.getConstant(0, LHS.getValueType()); + CC = ISD::SETNE; + } + } + if (LHS.getValueType() == MVT::i32) { // Try to generate VSEL on ARMv8. // The VSEL instruction can't use all the usual ARM condition @@ -3530,8 +3600,7 @@ SDValue ARMcc; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); - return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, - Cmp); + return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); } ARMCC::CondCodes CondCode, CondCode2; @@ -3570,14 +3639,12 @@ SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, - ARMcc, CCR, Cmp); + SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); if (CondCode2 != ARMCC::AL) { SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32); // FIXME: Needs another CMP because flag can have but one use. SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); - Result = DAG.getNode(ARMISD::CMOV, dl, VT, - Result, TrueVal, ARMcc2, CCR, Cmp2); + Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG); } return Result; } @@ -3710,6 +3777,18 @@ SDValue Dest = Op.getOperand(4); SDLoc dl(Op); + if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) { + DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, + dl); + + // If softenSetCCOperands only returned one value, we should compare it to + // zero. + if (!RHS.getNode()) { + RHS = DAG.getConstant(0, LHS.getValueType()); + CC = ISD::SETNE; + } + } + if (LHS.getValueType() == MVT::i32) { SDValue ARMcc; SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); @@ -3802,11 +3881,23 @@ return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); } -static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); if (VT.isVector()) return LowerVectorFP_TO_INT(Op, DAG); + if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) { + RTLIB::Libcall LC; + if (Op.getOpcode() == ISD::FP_TO_SINT) + LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), + Op.getValueType()); + else + LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), + Op.getValueType()); + return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1, + /*isSigned*/ false, SDLoc(Op)).first; + } + SDLoc dl(Op); unsigned Opc; @@ -3856,11 +3947,23 @@ return DAG.getNode(Opc, dl, VT, Op); } -static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); if (VT.isVector()) return LowerVectorINT_TO_FP(Op, DAG); + if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) { + RTLIB::Libcall LC; + if (Op.getOpcode() == ISD::SINT_TO_FP) + LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), + Op.getValueType()); + else + LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), + Op.getValueType()); + return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1, + /*isSigned*/ false, SDLoc(Op)).first; + } + SDLoc dl(Op); unsigned Opc; @@ -4369,7 +4472,7 @@ ISD::CondCode SetCCOpcode = cast(CC)->get(); SDLoc dl(Op); - if (Op.getOperand(1).getValueType().isFloatingPoint()) { + if (Op1.getValueType().isFloatingPoint()) { switch (SetCCOpcode) { default: llvm_unreachable("Illegal FP comparison"); case ISD::SETUNE: @@ -4633,6 +4736,11 @@ bool IsDouble = Op.getValueType() == MVT::f64; ConstantFPSDNode *CFP = cast(Op); + // Use the default (constant pool) lowering for double constants when we have + // an SP-only FPU + if (IsDouble && Subtarget->isFPOnlySP()) + return SDValue(); + // Try splatting with a VMOV.f32... APFloat FPVal = CFP->getValueAPF(); int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal); @@ -6336,6 +6444,8 @@ if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) return LowerDYNAMIC_STACKALLOC(Op, DAG); llvm_unreachable("Don't know how to custom lower this!"); + case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); + case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); } } @@ -8479,10 +8589,11 @@ /// PerformVMOVRRDCombine - Target-specific dag combine xforms for /// ARMISD::VMOVRRD. static SDValue PerformVMOVRRDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { // vmovrrd(vmovdrr x, y) -> x,y SDValue InDouble = N->getOperand(0); - if (InDouble.getOpcode() == ARMISD::VMOVDRR) + if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP()) return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); // vmovrrd(load f64) -> (load i32), (load i32) @@ -8695,7 +8806,8 @@ /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for /// ISD::BUILD_VECTOR. static SDValue PerformBUILD_VECTORCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI){ + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X): // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value // into a pair of GPRs, which is fine when the value is used as a scalar, @@ -9710,10 +9822,10 @@ case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); case ARMISD::BFI: return PerformBFICombine(N, DCI); - case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); + case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget); case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); case ISD::STORE: return PerformSTORECombine(N, DCI); - case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI); + case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget); case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); @@ -10703,6 +10815,31 @@ return DAG.getMergeValues(Ops, DL); } +SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() && + "Unexpected type for custom-lowering FP_EXTEND"); + + RTLIB::Libcall LC; + LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType()); + + SDValue SrcVal = Op.getOperand(0); + return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, + /*isSigned*/ false, SDLoc(Op)).first; +} + +SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getOperand(0).getValueType() == MVT::f64 && + Subtarget->isFPOnlySP() && + "Unexpected type for custom-lowering FP_ROUND"); + + RTLIB::Libcall LC; + LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType()); + + SDValue SrcVal = Op.getOperand(0); + return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, + /*isSigned*/ false, SDLoc(Op)).first; +} + bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The ARM target isn't yet aware of offsets. @@ -10730,7 +10867,7 @@ return false; if (VT == MVT::f32) return ARM_AM::getFP32Imm(Imm) != -1; - if (VT == MVT::f64) + if (VT == MVT::f64 && !Subtarget->isFPOnlySP()) return ARM_AM::getFP64Imm(Imm) != -1; return false; } Index: lib/Target/ARM/ARMInstrVFP.td =================================================================== --- lib/Target/ARM/ARMInstrVFP.td +++ lib/Target/ARM/ARMInstrVFP.td @@ -515,6 +515,8 @@ let Inst{5} = Sm{0}; let Inst{15-12} = Dd{3-0}; let Inst{22} = Dd{4}; + + let Predicates = [HasVFP2, HasDPVFP]; } // Special case encoding: bits 11-8 is 0b1011. Index: test/CodeGen/ARM/aapcs-hfa-code.ll =================================================================== --- test/CodeGen/ARM/aapcs-hfa-code.ll +++ test/CodeGen/ARM/aapcs-hfa-code.ll @@ -54,12 +54,11 @@ ; CHECK: bl test_1double ; CHECK-M4F-LABEL: test_1double: -; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0 -; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0 -; CHECK-M4F: movt [[ONEHI]], #16368 -; CHECK-M4F-DAG: vmov s0, [[ONELO]] -; CHECK-M4F-DAG: vmov s1, [[ONEHI]] +; CHECK-M4F: vldr d0, [[CP_LABEL:.*]] ; CHECK-M4F: bl test_1double +; CHECK-M4F: [[CP_LABEL]] +; CHECK-M4F-NEXT: .long 0 +; CHECK-M4F-NEXT: .long 1072693248 call arm_aapcs_vfpcc void @test_1double({ double } { double 1.0 }) ret void @@ -76,11 +75,10 @@ ; CHECK: bl test_1double_nosplit ; CHECK-M4F-LABEL: test_1double_nosplit: -; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0 ; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0 +; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0 ; CHECK-M4F: movt [[ONEHI]], #16368 -; CHECK-M4F-DAG: str [[ONELO]], [sp] -; CHECK-M4F-DAG: str [[ONEHI]], [sp, #4] +; CHECK-M4F: strd [[ONELO]], [[ONEHI]], [sp] ; CHECK-M4F: bl test_1double_nosplit call arm_aapcs_vfpcc void @test_1double_nosplit([4 x float] undef, [4 x double] undef, [3 x float] undef, double 1.0) ret void @@ -98,11 +96,10 @@ ; CHECK-DAG: strd [[ONELO]], [[ONEHI]], [sp, #8] ; CHECK-M4F-LABEL: test_1double_misaligned: -; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0 ; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0 +; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0 ; CHECK-M4F: movt [[ONEHI]], #16368 -; CHECK-M4F-DAG: str [[ONELO]], [sp, #8] -; CHECK-M4F-DAG: str [[ONEHI]], [sp, #12] +; CHECK-M4F: strd [[ONELO]], [[ONEHI]], [sp, #8] ; CHECK-M4F: bl test_1double_misaligned ret void Index: test/CodeGen/ARM/darwin-eabi.ll =================================================================== --- test/CodeGen/ARM/darwin-eabi.ll +++ test/CodeGen/ARM/darwin-eabi.ll @@ -20,5 +20,5 @@ ; CHECK-M3: bl ___adddf3 ; CHECK-M4-LABEL: double_op: -; CHECK-M4: bl ___adddf3 +; CHECK-M4: {{(blx|b.w)}} ___adddf3 } Index: test/CodeGen/Thumb2/aapcs.ll =================================================================== --- /dev/null +++ test/CodeGen/Thumb2/aapcs.ll @@ -0,0 +1,50 @@ +; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m4 -mattr=-vfp2 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 -mattr=+vfp4,+fp-only-sp | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a8 -mattr=+vfp3 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP + +define float @float_in_reg(float %a, float %b) { +entry: +; CHECK-LABEL: float_in_reg: +; SOFT: mov r0, r1 +; HARD: vmov.f32 s0, s1 +; CHECK-NEXT: bx lr + ret float %b +} + +define double @double_in_reg(double %a, double %b) { +entry: +; CHECK-LABEL: double_in_reg: +; SOFT: mov r0, r2 +; SOFT: mov r1, r3 +; SP: vmov.f32 s0, s2 +; SP: vmov.f32 s1, s3 +; DP: vmov.f64 d0, d1 +; CHECK-NEXT: bx lr + ret double %b +} + +define float @float_on_stack(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, float %i) { +; CHECK-LABEL: float_on_stack: +; SOFT: ldr r0, [sp, #48] +; HARD: vldr s0, [sp] +; CHECK-NEXT: bx lr + ret float %i +} + +define double @double_on_stack(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i) { +; CHECK-LABEL: double_on_stack: +; SOFT: ldr r0, [sp, #48] +; SOFT: ldr r1, [sp, #52] +; HARD: vldr d0, [sp] +; CHECK-NEXT: bx lr + ret double %i +} + +define double @double_not_split(double %a, double %b, double %c, double %d, double %e, double %f, double %g, float %h, double %i) { +; CHECK-LABEL: double_not_split: +; SOFT: ldr r0, [sp, #48] +; SOFT: ldr r1, [sp, #52] +; HARD: vldr d0, [sp] +; CHECK-NEXT: bx lr + ret double %i +} Index: test/CodeGen/Thumb2/cortex-fp.ll =================================================================== --- test/CodeGen/Thumb2/cortex-fp.ll +++ test/CodeGen/Thumb2/cortex-fp.ll @@ -18,7 +18,7 @@ ; CHECK-LABEL: bar: %0 = fmul double %a, %b ; CORTEXM3: bl ___muldf3 -; CORTEXM4: bl ___muldf3 +; CORTEXM4: {{bl|b.w}} ___muldf3 ; CORTEXA8: vmul.f64 d ret double %0 } Index: test/CodeGen/Thumb2/float-cmp.ll =================================================================== --- /dev/null +++ test/CodeGen/Thumb2/float-cmp.ll @@ -0,0 +1,300 @@ +; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=NONE +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP + + + +define i1 @cmp_f_false(float %a, float %b) { +; CHECK-LABEL: cmp_f_false: +; NONE: movs r0, #0 +; HARD: movs r0, #0 + %1 = fcmp false float %a, %b + ret i1 %1 +} +define i1 @cmp_f_oeq(float %a, float %b) { +; CHECK-LABEL: cmp_f_oeq: +; NONE: bl __aeabi_fcmpeq +; HARD: vcmpe.f32 +; HARD: moveq r0, #1 + %1 = fcmp oeq float %a, %b + ret i1 %1 +} +define i1 @cmp_f_ogt(float %a, float %b) { +; CHECK-LABEL: cmp_f_ogt: +; NONE: bl __aeabi_fcmpgt +; HARD: vcmpe.f32 +; HARD: movgt r0, #1 + %1 = fcmp ogt float %a, %b + ret i1 %1 +} +define i1 @cmp_f_oge(float %a, float %b) { +; CHECK-LABEL: cmp_f_oge: +; NONE: bl __aeabi_fcmpge +; HARD: vcmpe.f32 +; HARD: movge r0, #1 + %1 = fcmp oge float %a, %b + ret i1 %1 +} +define i1 @cmp_f_olt(float %a, float %b) { +; CHECK-LABEL: cmp_f_olt: +; NONE: bl __aeabi_fcmplt +; HARD: vcmpe.f32 +; HARD: movmi r0, #1 + %1 = fcmp olt float %a, %b + ret i1 %1 +} +define i1 @cmp_f_ole(float %a, float %b) { +; CHECK-LABEL: cmp_f_ole: +; NONE: bl __aeabi_fcmple +; HARD: vcmpe.f32 +; HARD: movls r0, #1 + %1 = fcmp ole float %a, %b + ret i1 %1 +} +define i1 @cmp_f_one(float %a, float %b) { +; CHECK-LABEL: cmp_f_one: +; NONE: bl __aeabi_fcmpgt +; NONE: bl __aeabi_fcmplt +; HARD: vcmpe.f32 +; HARD: movmi r0, #1 +; HARD: movgt r0, #1 + %1 = fcmp one float %a, %b + ret i1 %1 +} +define i1 @cmp_f_ord(float %a, float %b) { +; CHECK-LABEL: cmp_f_ord: +; NONE: bl __aeabi_fcmpun +; HARD: vcmpe.f32 +; HARD: movvc r0, #1 + %1 = fcmp ord float %a, %b + ret i1 %1 +}define i1 @cmp_f_ueq(float %a, float %b) { +; CHECK-LABEL: cmp_f_ueq: +; NONE: bl __aeabi_fcmpeq +; NONE: bl __aeabi_fcmpun +; HARD: vcmpe.f32 +; HARD: moveq r0, #1 +; HARD: movvs r0, #1 + %1 = fcmp ueq float %a, %b + ret i1 %1 +} +define i1 @cmp_f_ugt(float %a, float %b) { +; CHECK-LABEL: cmp_f_ugt: +; NONE: bl __aeabi_fcmpgt +; NONE: bl __aeabi_fcmpun +; HARD: vcmpe.f32 +; HARD: movhi r0, #1 + %1 = fcmp ugt float %a, %b + ret i1 %1 +} +define i1 @cmp_f_uge(float %a, float %b) { +; CHECK-LABEL: cmp_f_uge: +; NONE: bl __aeabi_fcmpge +; NONE: bl __aeabi_fcmpun +; HARD: vcmpe.f32 +; HARD: movpl r0, #1 + %1 = fcmp uge float %a, %b + ret i1 %1 +} +define i1 @cmp_f_ult(float %a, float %b) { +; CHECK-LABEL: cmp_f_ult: +; NONE: bl __aeabi_fcmplt +; NONE: bl __aeabi_fcmpun +; HARD: vcmpe.f32 +; HARD: movlt r0, #1 + %1 = fcmp ult float %a, %b + ret i1 %1 +} +define i1 @cmp_f_ule(float %a, float %b) { +; CHECK-LABEL: cmp_f_ule: +; NONE: bl __aeabi_fcmple +; NONE: bl __aeabi_fcmpun +; HARD: vcmpe.f32 +; HARD: movle r0, #1 + %1 = fcmp ule float %a, %b + ret i1 %1 +} +define i1 @cmp_f_une(float %a, float %b) { +; CHECK-LABEL: cmp_f_une: +; NONE: bl __aeabi_fcmpeq +; HARD: vcmpe.f32 +; HARD: movne r0, #1 + %1 = fcmp une float %a, %b + ret i1 %1 +} +define i1 @cmp_f_uno(float %a, float %b) { +; CHECK-LABEL: cmp_f_uno: +; NONE: bl __aeabi_fcmpun +; HARD: vcmpe.f32 +; HARD: movvs r0, #1 + %1 = fcmp uno float %a, %b + ret i1 %1 +} +define i1 @cmp_f_true(float %a, float %b) { +; CHECK-LABEL: cmp_f_true: +; NONE: movs r0, #1 +; HARD: movs r0, #1 + %1 = fcmp true float %a, %b + ret i1 %1 +} + +define i1 @cmp_d_false(double %a, double %b) { +; CHECK-LABEL: cmp_d_false: +; NONE: movs r0, #0 +; HARD: movs r0, #0 + %1 = fcmp false double %a, %b + ret i1 %1 +} +define i1 @cmp_d_oeq(double %a, double %b) { +; CHECK-LABEL: cmp_d_oeq: +; NONE: bl __aeabi_dcmpeq +; SP: bl __aeabi_dcmpeq +; DP: vcmpe.f64 +; DP: moveq r0, #1 + %1 = fcmp oeq double %a, %b + ret i1 %1 +} +define i1 @cmp_d_ogt(double %a, double %b) { +; CHECK-LABEL: cmp_d_ogt: +; NONE: bl __aeabi_dcmpgt +; SP: bl __aeabi_dcmpgt +; DP: vcmpe.f64 +; DP: movgt r0, #1 + %1 = fcmp ogt double %a, %b + ret i1 %1 +} +define i1 @cmp_d_oge(double %a, double %b) { +; CHECK-LABEL: cmp_d_oge: +; NONE: bl __aeabi_dcmpge +; SP: bl __aeabi_dcmpge +; DP: vcmpe.f64 +; DP: movge r0, #1 + %1 = fcmp oge double %a, %b + ret i1 %1 +} +define i1 @cmp_d_olt(double %a, double %b) { +; CHECK-LABEL: cmp_d_olt: +; NONE: bl __aeabi_dcmplt +; SP: bl __aeabi_dcmplt +; DP: vcmpe.f64 +; DP: movmi r0, #1 + %1 = fcmp olt double %a, %b + ret i1 %1 +} +define i1 @cmp_d_ole(double %a, double %b) { +; CHECK-LABEL: cmp_d_ole: +; NONE: bl __aeabi_dcmple +; SP: bl __aeabi_dcmple +; DP: vcmpe.f64 +; DP: movls r0, #1 + %1 = fcmp ole double %a, %b + ret i1 %1 +} +define i1 @cmp_d_one(double %a, double %b) { +; CHECK-LABEL: cmp_d_one: +; NONE: bl __aeabi_dcmpgt +; NONE: bl __aeabi_dcmplt +; SP: bl __aeabi_dcmpgt +; SP: bl __aeabi_dcmplt +; DP: vcmpe.f64 +; DP: movmi r0, #1 +; DP: movgt r0, #1 + %1 = fcmp one double %a, %b + ret i1 %1 +} +define i1 @cmp_d_ord(double %a, double %b) { +; CHECK-LABEL: cmp_d_ord: +; NONE: bl __aeabi_dcmpun +; SP: bl __aeabi_dcmpun +; DP: vcmpe.f64 +; DP: movvc r0, #1 + %1 = fcmp ord double %a, %b + ret i1 %1 +} +define i1 @cmp_d_ugt(double %a, double %b) { +; CHECK-LABEL: cmp_d_ugt: +; NONE: bl __aeabi_dcmpgt +; NONE: bl __aeabi_dcmpun +; SP: bl __aeabi_dcmpgt +; SP: bl __aeabi_dcmpun +; DP: vcmpe.f64 +; DP: movhi r0, #1 + %1 = fcmp ugt double %a, %b + ret i1 %1 +} + +define i1 @cmp_d_ult(double %a, double %b) { +; CHECK-LABEL: cmp_d_ult: +; NONE: bl __aeabi_dcmplt +; NONE: bl __aeabi_dcmpun +; SP: bl __aeabi_dcmplt +; SP: bl __aeabi_dcmpun +; DP: vcmpe.f64 +; DP: movlt r0, #1 + %1 = fcmp ult double %a, %b + ret i1 %1 +} + + +define i1 @cmp_d_uno(double %a, double %b) { +; CHECK-LABEL: cmp_d_uno: +; NONE: bl __aeabi_dcmpun +; SP: bl __aeabi_dcmpun +; DP: vcmpe.f64 +; DP: movvs r0, #1 + %1 = fcmp uno double %a, %b + ret i1 %1 +} +define i1 @cmp_d_true(double %a, double %b) { +; CHECK-LABEL: cmp_d_true: +; NONE: movs r0, #1 +; HARD: movs r0, #1 + %1 = fcmp true double %a, %b + ret i1 %1 +} +define i1 @cmp_d_ueq(double %a, double %b) { +; CHECK-LABEL: cmp_d_ueq: +; NONE: bl __aeabi_dcmpeq +; NONE: bl __aeabi_dcmpun +; SP: bl __aeabi_dcmpeq +; SP: bl __aeabi_dcmpun +; DP: vcmpe.f64 +; DP: moveq r0, #1 +; DP: movvs r0, #1 + %1 = fcmp ueq double %a, %b + ret i1 %1 +} + +define i1 @cmp_d_uge(double %a, double %b) { +; CHECK-LABEL: cmp_d_uge: +; NONE: bl __aeabi_dcmpge +; NONE: bl __aeabi_dcmpun +; SP: bl __aeabi_dcmpge +; SP: bl __aeabi_dcmpun +; DP: vcmpe.f64 +; DP: movpl r0, #1 + %1 = fcmp uge double %a, %b + ret i1 %1 +} + +define i1 @cmp_d_ule(double %a, double %b) { +; CHECK-LABEL: cmp_d_ule: +; NONE: bl __aeabi_dcmple +; NONE: bl __aeabi_dcmpun +; SP: bl __aeabi_dcmple +; SP: bl __aeabi_dcmpun +; DP: vcmpe.f64 +; DP: movle r0, #1 + %1 = fcmp ule double %a, %b + ret i1 %1 +} + +define i1 @cmp_d_une(double %a, double %b) { +; CHECK-LABEL: cmp_d_une: +; NONE: bl __aeabi_dcmpeq +; SP: bl __aeabi_dcmpeq +; DP: vcmpe.f64 +; DP: movne r0, #1 + %1 = fcmp une double %a, %b + ret i1 %1 +} Index: test/CodeGen/Thumb2/float-intrinsics-double.ll =================================================================== --- /dev/null +++ test/CodeGen/Thumb2/float-intrinsics-double.ll @@ -0,0 +1,214 @@ +; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=NONE +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=SP +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP + +declare double @llvm.sqrt.f64(double %Val) +define double @sqrt_d(double %a) { +; CHECK-LABEL: sqrt_d: +; SOFT: {{(bl|b)}} sqrt +; HARD: vsqrt.f64 d0, d0 + %1 = call double @llvm.sqrt.f64(double %a) + ret double %1 +} + +declare double @llvm.powi.f64(double %Val, i32 %power) +define double @powi_d(double %a, i32 %b) { +; CHECK-LABEL: powi_d: +; SOFT: {{(bl|b)}} __powidf2 +; HARD: b __powidf2 + %1 = call double @llvm.powi.f64(double %a, i32 %b) + ret double %1 +} + +declare double @llvm.sin.f64(double %Val) +define double @sin_d(double %a) { +; CHECK-LABEL: sin_d: +; SOFT: {{(bl|b)}} sin +; HARD: b sin + %1 = call double @llvm.sin.f64(double %a) + ret double %1 +} + +declare double @llvm.cos.f64(double %Val) +define double @cos_d(double %a) { +; CHECK-LABEL: cos_d: +; SOFT: {{(bl|b)}} cos +; HARD: b cos + %1 = call double @llvm.cos.f64(double %a) + ret double %1 +} + +declare double @llvm.pow.f64(double %Val, double %power) +define double @pow_d(double %a, double %b) { +; CHECK-LABEL: pow_d: +; SOFT: {{(bl|b)}} pow +; HARD: b pow + %1 = call double @llvm.pow.f64(double %a, double %b) + ret double %1 +} + +declare double @llvm.exp.f64(double %Val) +define double @exp_d(double %a) { +; CHECK-LABEL: exp_d: +; SOFT: {{(bl|b)}} exp +; HARD: b exp + %1 = call double @llvm.exp.f64(double %a) + ret double %1 +} + +declare double @llvm.exp2.f64(double %Val) +define double @exp2_d(double %a) { +; CHECK-LABEL: exp2_d: +; SOFT: {{(bl|b)}} exp2 +; HARD: b exp2 + %1 = call double @llvm.exp2.f64(double %a) + ret double %1 +} + +declare double @llvm.log.f64(double %Val) +define double @log_d(double %a) { +; CHECK-LABEL: log_d: +; SOFT: {{(bl|b)}} log +; HARD: b log + %1 = call double @llvm.log.f64(double %a) + ret double %1 +} + +declare double @llvm.log10.f64(double %Val) +define double @log10_d(double %a) { +; CHECK-LABEL: log10_d: +; SOFT: {{(bl|b)}} log10 +; HARD: b log10 + %1 = call double @llvm.log10.f64(double %a) + ret double %1 +} + +declare double @llvm.log2.f64(double %Val) +define double @log2_d(double %a) { +; CHECK-LABEL: log2_d: +; SOFT: {{(bl|b)}} log2 +; HARD: b log2 + %1 = call double @llvm.log2.f64(double %a) + ret double %1 +} + +declare double @llvm.fma.f64(double %a, double %b, double %c) +define double @fma_d(double %a, double %b, double %c) { +; CHECK-LABEL: fma_d: +; SOFT: {{(bl|b)}} fma +; HARD: vfma.f64 + %1 = call double @llvm.fma.f64(double %a, double %b, double %c) + ret double %1 +} + +; FIXME: the FPv4-SP version is less efficient than the no-FPU version +declare double @llvm.fabs.f64(double %Val) +define double @abs_d(double %a) { +; CHECK-LABEL: abs_d: +; NONE: bic r1, r1, #-2147483648 +; SP: bl __aeabi_dcmpgt +; SP: bl __aeabi_dcmpun +; SP: bl __aeabi_dsub +; DP: vabs.f64 d0, d0 + %1 = call double @llvm.fabs.f64(double %a) + ret double %1 +} + +declare double @llvm.copysign.f64(double %Mag, double %Sgn) +define double @copysign_d(double %a, double %b) { +; CHECK-LABEL: copysign_d: +; SOFT: lsrs [[REG:r[0-9]+]], r3, #31 +; SOFT: bfi r1, [[REG]], #31, #1 +; HARD: vmov.i32 [[REG:d[0-9]+]], #0x80000000 +; HARD: vshl.i64 [[REG]], [[REG]], #32 +; HARD: vbsl [[REG]], d + %1 = call double @llvm.copysign.f64(double %a, double %b) + ret double %1 +} + +declare double @llvm.floor.f64(double %Val) +define double @floor_d(double %a) { +; CHECK-LABEL: floor_d: +; SOFT: {{(bl|b)}} floor +; HARD: b floor + %1 = call double @llvm.floor.f64(double %a) + ret double %1 +} + +declare double @llvm.ceil.f64(double %Val) +define double @ceil_d(double %a) { +; CHECK-LABEL: ceil_d: +; SOFT: {{(bl|b)}} ceil +; HARD: b ceil + %1 = call double @llvm.ceil.f64(double %a) + ret double %1 +} + +declare double @llvm.trunc.f64(double %Val) +define double @trunc_d(double %a) { +; CHECK-LABEL: trunc_d: +; SOFT: {{(bl|b)}} trunc +; HARD: b trunc + %1 = call double @llvm.trunc.f64(double %a) + ret double %1 +} + +declare double @llvm.rint.f64(double %Val) +define double @rint_d(double %a) { +; CHECK-LABEL: rint_d: +; SOFT: {{(bl|b)}} rint +; HARD: b rint + %1 = call double @llvm.rint.f64(double %a) + ret double %1 +} + +declare double @llvm.nearbyint.f64(double %Val) +define double @nearbyint_d(double %a) { +; CHECK-LABEL: nearbyint_d: +; SOFT: {{(bl|b)}} nearbyint +; HARD: b nearbyint + %1 = call double @llvm.nearbyint.f64(double %a) + ret double %1 +} + +declare double @llvm.round.f64(double %Val) +define double @round_d(double %a) { +; CHECK-LABEL: round_d: +; SOFT: {{(bl|b)}} round +; HARD: b round + %1 = call double @llvm.round.f64(double %a) + ret double %1 +} + +declare double @llvm.fmuladd.f64(double %a, double %b, double %c) +define double @fmuladd_d(double %a, double %b, double %c) { +; CHECK-LABEL: fmuladd_d: +; SOFT: bl __aeabi_dmul +; SOFT: bl __aeabi_dadd +; HARD: vmul.f64 +; HARD: vadd.f64 + %1 = call double @llvm.fmuladd.f64(double %a, double %b, double %c) + ret double %1 +} + +declare i16 @llvm.convert.to.fp16.f64(double %a) +define i16 @d_to_h(double %a) { +; CHECK-LABEL: d_to_h: +; SOFT: bl __aeabi_d2h +; HARD: bl __aeabi_d2h + %1 = call i16 @llvm.convert.to.fp16.f64(double %a) + ret i16 %1 +} + +declare double @llvm.convert.from.fp16.f64(i16 %a) +define double @h_to_d(i16 %a) { +; CHECK-LABEL: h_to_d: +; NONE: bl __gnu_h2f_ieee +; NONE: bl __aeabi_f2d +; SP: vcvtb.f32.f16 +; SP: bl __aeabi_f2d +; DP: vcvtb.f32.f16 +; DP: vcvt.f64.f32 + %1 = call double @llvm.convert.from.fp16.f64(i16 %a) + ret double %1 +} Index: test/CodeGen/Thumb2/float-intrinsics-float.ll =================================================================== --- /dev/null +++ test/CodeGen/Thumb2/float-intrinsics-float.ll @@ -0,0 +1,210 @@ +; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=NONE +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP + +declare float @llvm.sqrt.f32(float %Val) +define float @sqrt_f(float %a) { +; CHECK-LABEL: sqrt_f: +; SOFT: bl sqrtf +; HARD: vsqrt.f32 s0, s0 + %1 = call float @llvm.sqrt.f32(float %a) + ret float %1 +} + +declare float @llvm.powi.f32(float %Val, i32 %power) +define float @powi_f(float %a, i32 %b) { +; CHECK-LABEL: powi_f: +; SOFT: bl __powisf2 +; HARD: b __powisf2 + %1 = call float @llvm.powi.f32(float %a, i32 %b) + ret float %1 +} + +declare float @llvm.sin.f32(float %Val) +define float @sin_f(float %a) { +; CHECK-LABEL: sin_f: +; SOFT: bl sinf +; HARD: b sinf + %1 = call float @llvm.sin.f32(float %a) + ret float %1 +} + +declare float @llvm.cos.f32(float %Val) +define float @cos_f(float %a) { +; CHECK-LABEL: cos_f: +; SOFT: bl cosf +; HARD: b cosf + %1 = call float @llvm.cos.f32(float %a) + ret float %1 +} + +declare float @llvm.pow.f32(float %Val, float %power) +define float @pow_f(float %a, float %b) { +; CHECK-LABEL: pow_f: +; SOFT: bl powf +; HARD: b powf + %1 = call float @llvm.pow.f32(float %a, float %b) + ret float %1 +} + +declare float @llvm.exp.f32(float %Val) +define float @exp_f(float %a) { +; CHECK-LABEL: exp_f: +; SOFT: bl expf +; HARD: b expf + %1 = call float @llvm.exp.f32(float %a) + ret float %1 +} + +declare float @llvm.exp2.f32(float %Val) +define float @exp2_f(float %a) { +; CHECK-LABEL: exp2_f: +; SOFT: bl exp2f +; HARD: b exp2f + %1 = call float @llvm.exp2.f32(float %a) + ret float %1 +} + +declare float @llvm.log.f32(float %Val) +define float @log_f(float %a) { +; CHECK-LABEL: log_f: +; SOFT: bl logf +; HARD: b logf + %1 = call float @llvm.log.f32(float %a) + ret float %1 +} + +declare float @llvm.log10.f32(float %Val) +define float @log10_f(float %a) { +; CHECK-LABEL: log10_f: +; SOFT: bl log10f +; HARD: b log10f + %1 = call float @llvm.log10.f32(float %a) + ret float %1 +} + +declare float @llvm.log2.f32(float %Val) +define float @log2_f(float %a) { +; CHECK-LABEL: log2_f: +; SOFT: bl log2f +; HARD: b log2f + %1 = call float @llvm.log2.f32(float %a) + ret float %1 +} + +declare float @llvm.fma.f32(float %a, float %b, float %c) +define float @fma_f(float %a, float %b, float %c) { +; CHECK-LABEL: fma_f: +; SOFT: bl fmaf +; HARD: vfma.f32 + %1 = call float @llvm.fma.f32(float %a, float %b, float %c) + ret float %1 +} + +declare float @llvm.fabs.f32(float %Val) +define float @abs_f(float %a) { +; CHECK-LABEL: abs_f: +; SOFT: bic r0, r0, #-2147483648 +; HARD: vabs.f32 + %1 = call float @llvm.fabs.f32(float %a) + ret float %1 +} + +declare float @llvm.copysign.f32(float %Mag, float %Sgn) +define float @copysign_f(float %a, float %b) { +; CHECK-LABEL: copysign_f: +; NONE: lsrs [[REG:r[0-9]+]], r{{[0-9]+}}, #31 +; NONE: bfi r{{[0-9]+}}, [[REG]], #31, #1 +; SP: lsrs [[REG:r[0-9]+]], r{{[0-9]+}}, #31 +; SP: bfi r{{[0-9]+}}, [[REG]], #31, #1 +; DP: vmov.i32 [[REG:d[0-9]+]], #0x80000000 +; DP: vbsl [[REG]], d + %1 = call float @llvm.copysign.f32(float %a, float %b) + ret float %1 +} + +declare float @llvm.floor.f32(float %Val) +define float @floor_f(float %a) { +; CHECK-LABEL: floor_f: +; SOFT: bl floorf +; HARD: b floorf + %1 = call float @llvm.floor.f32(float %a) + ret float %1 +} + +declare float @llvm.ceil.f32(float %Val) +define float @ceil_f(float %a) { +; CHECK-LABEL: ceil_f: +; SOFT: bl ceilf +; HARD: b ceilf + %1 = call float @llvm.ceil.f32(float %a) + ret float %1 +} + +declare float @llvm.trunc.f32(float %Val) +define float @trunc_f(float %a) { +; CHECK-LABEL: trunc_f: +; SOFT: bl truncf +; HARD: b truncf + %1 = call float @llvm.trunc.f32(float %a) + ret float %1 +} + +declare float @llvm.rint.f32(float %Val) +define float @rint_f(float %a) { +; CHECK-LABEL: rint_f: +; SOFT: bl rintf +; HARD: b rintf + %1 = call float @llvm.rint.f32(float %a) + ret float %1 +} + +declare float @llvm.nearbyint.f32(float %Val) +define float @nearbyint_f(float %a) { +; CHECK-LABEL: nearbyint_f: +; SOFT: bl nearbyintf +; HARD: b nearbyintf + %1 = call float @llvm.nearbyint.f32(float %a) + ret float %1 +} + +declare float @llvm.round.f32(float %Val) +define float @round_f(float %a) { +; CHECK-LABEL: round_f: +; SOFT: bl roundf +; HARD: b roundf + %1 = call float @llvm.round.f32(float %a) + ret float %1 +} + +; FIXME: why does cortex-m4 use vmla, while cortex-a7 uses vmul+vadd? +; (these should be equivalent, even the rounding is the same) +declare float @llvm.fmuladd.f32(float %a, float %b, float %c) +define float @fmuladd_f(float %a, float %b, float %c) { +; CHECK-LABEL: fmuladd_f: +; SOFT: bl __aeabi_fmul +; SOFT: bl __aeabi_fadd +; SP: vmla.f32 +; DP: vmul.f32 +; DP: vadd.f32 + %1 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) + ret float %1 +} + +declare i16 @llvm.convert.to.fp16.f32(float %a) +define i16 @f_to_h(float %a) { +; CHECK-LABEL: f_to_h: +; SOFT: bl __gnu_f2h_ieee +; HARD: vcvtb.f16.f32 + %1 = call i16 @llvm.convert.to.fp16.f32(float %a) + ret i16 %1 +} + +declare float @llvm.convert.from.fp16.f32(i16 %a) +define float @h_to_f(i16 %a) { +; CHECK-LABEL: h_to_f: +; SOFT: bl __gnu_h2f_ieee +; HARD: vcvtb.f32.f16 + %1 = call float @llvm.convert.from.fp16.f32(i16 %a) + ret float %1 +} Index: test/CodeGen/Thumb2/float-ops.ll =================================================================== --- /dev/null +++ test/CodeGen/Thumb2/float-ops.ll @@ -0,0 +1,290 @@ +; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=NONE +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP + +define float @add_f(float %a, float %b) { +entry: +; CHECK-LABEL: add_f: +; NONE: bl __aeabi_fadd +; HARD: vadd.f32 s0, s0, s1 + %0 = fadd float %a, %b + ret float %0 +} + +define double @add_d(double %a, double %b) { +entry: +; CHECK-LABEL: add_d: +; NONE: bl __aeabi_dadd +; SP: bl __aeabi_dadd +; DP: vadd.f64 d0, d0, d1 + %0 = fadd double %a, %b + ret double %0 +} + +define float @sub_f(float %a, float %b) { +entry: +; CHECK-LABEL: sub_f: +; NONE: bl __aeabi_fsub +; HARD: vsub.f32 s + %0 = fsub float %a, %b + ret float %0 +} + +define double @sub_d(double %a, double %b) { +entry: +; CHECK-LABEL: sub_d: +; NONE: bl __aeabi_dsub +; SP: bl __aeabi_dsub +; DP: vsub.f64 d0, d0, d1 + %0 = fsub double %a, %b + ret double %0 +} + +define float @mul_f(float %a, float %b) { +entry: +; CHECK-LABEL: mul_f: +; NONE: bl __aeabi_fmul +; HARD: vmul.f32 s + %0 = fmul float %a, %b + ret float %0 +} + +define double @mul_d(double %a, double %b) { +entry: +; CHECK-LABEL: mul_d: +; NONE: bl __aeabi_dmul +; SP: bl __aeabi_dmul +; DP: vmul.f64 d0, d0, d1 + %0 = fmul double %a, %b + ret double %0 +} + +define float @div_f(float %a, float %b) { +entry: +; CHECK-LABEL: div_f: +; NONE: bl __aeabi_fdiv +; HARD: vdiv.f32 s + %0 = fdiv float %a, %b + ret float %0 +} + +define double @div_d(double %a, double %b) { +entry: +; CHECK-LABEL: div_d: +; NONE: bl __aeabi_ddiv +; SP: bl __aeabi_ddiv +; DP: vdiv.f64 d0, d0, d1 + %0 = fdiv double %a, %b + ret double %0 +} + +define float @rem_f(float %a, float %b) { +entry: +; CHECK-LABEL: rem_f: +; NONE: bl fmodf +; HARD: b fmodf + %0 = frem float %a, %b + ret float %0 +} + +define double @rem_d(double %a, double %b) { +entry: +; CHECK-LABEL: rem_d: +; NONE: bl fmod +; HARD: b fmod + %0 = frem double %a, %b + ret double %0 +} + +define float @load_f(float* %a) { +entry: +; CHECK-LABEL: load_f: +; NONE: ldr r0, [r0] +; HARD: vldr s0, [r0] + %0 = load float* %a, align 4 + ret float %0 +} + +define double @load_d(double* %a) { +entry: +; CHECK-LABEL: load_d: +; NONE: ldm.w r0, {r0, r1} +; HARD: vldr d0, [r0] + %0 = load double* %a, align 8 + ret double %0 +} + +define void @store_f(float* %a, float %b) { +entry: +; CHECK-LABEL: store_f: +; NONE: str r1, [r0] +; HARD: vstr s0, [r0] + store float %b, float* %a, align 4 + ret void +} + +define void @store_d(double* %a, double %b) { +entry: +; CHECK-LABEL: store_d: +; NONE: mov r1, r3 +; NONE: str r2, [r0] +; NONE: str r1, [r0, #4] +; HARD: vstr d0, [r0] + store double %b, double* %a, align 8 + ret void +} + +define double @f_to_d(float %a) { +; CHECK-LABEL: f_to_d: +; NONE: bl __aeabi_f2d +; SP: bl __aeabi_f2d +; DP: vcvt.f64.f32 d0, s0 + %1 = fpext float %a to double + ret double %1 +} + +define float @d_to_f(double %a) { +; CHECK-LABEL: d_to_f: +; NONE: bl __aeabi_d2f +; SP: bl __aeabi_d2f +; DP: vcvt.f32.f64 s0, d0 + %1 = fptrunc double %a to float + ret float %1 +} + +define i32 @f_to_si(float %a) { +; CHECK-LABEL: f_to_si: +; NONE: bl __aeabi_f2iz +; HARD: vcvt.s32.f32 s0, s0 +; HARD: vmov r0, s0 + %1 = fptosi float %a to i32 + ret i32 %1 +} + +define i32 @d_to_si(double %a) { +; CHECK-LABEL: d_to_si: +; NONE: bl __aeabi_d2iz +; SP: vmov r0, r1, d0 +; SP: bl __aeabi_d2iz +; DP: vcvt.s32.f64 s0, d0 +; DP: vmov r0, s0 + %1 = fptosi double %a to i32 + ret i32 %1 +} + +define i32 @f_to_ui(float %a) { +; CHECK-LABEL: f_to_ui: +; NONE: bl __aeabi_f2uiz +; HARD: vcvt.u32.f32 s0, s0 +; HARD: vmov r0, s0 + %1 = fptoui float %a to i32 + ret i32 %1 +} + +define i32 @d_to_ui(double %a) { +; CHECK-LABEL: d_to_ui: +; NONE: bl __aeabi_d2uiz +; SP: vmov r0, r1, d0 +; SP: bl __aeabi_d2uiz +; DP: vcvt.u32.f64 s0, d0 +; DP: vmov r0, s0 + %1 = fptoui double %a to i32 + ret i32 %1 +} + +define float @si_to_f(i32 %a) { +; CHECK-LABEL: si_to_f: +; NONE: bl __aeabi_i2f +; HARD: vcvt.f32.s32 s0, s0 + %1 = sitofp i32 %a to float + ret float %1 +} + +define double @si_to_d(i32 %a) { +; CHECK-LABEL: si_to_d: +; NONE: bl __aeabi_i2d +; SP: bl __aeabi_i2d +; DP: vcvt.f64.s32 d0, s0 + %1 = sitofp i32 %a to double + ret double %1 +} + +define float @ui_to_f(i32 %a) { +; CHECK-LABEL: ui_to_f: +; NONE: bl __aeabi_ui2f +; HARD: vcvt.f32.u32 s0, s0 + %1 = uitofp i32 %a to float + ret float %1 +} + +define double @ui_to_d(i32 %a) { +; CHECK-LABEL: ui_to_d: +; NONE: bl __aeabi_ui2d +; SP: bl __aeabi_ui2d +; DP: vcvt.f64.u32 d0, s0 + %1 = uitofp i32 %a to double + ret double %1 +} + +define float @bitcast_i_to_f(i32 %a) { +; CHECK-LABEL: bitcast_i_to_f: +; NONE-NOT: mov +; HARD: vmov s0, r0 + %1 = bitcast i32 %a to float + ret float %1 +} + +define double @bitcast_i_to_d(i64 %a) { +; CHECK-LABEL: bitcast_i_to_d: +; NONE-NOT: mov +; HARD: vmov d0, r0, r1 + %1 = bitcast i64 %a to double + ret double %1 +} + +define i32 @bitcast_f_to_i(float %a) { +; CHECK-LABEL: bitcast_f_to_i: +; NONE-NOT: mov +; HARD: vmov r0, s0 + %1 = bitcast float %a to i32 + ret i32 %1 +} + +define i64 @bitcast_d_to_i(double %a) { +; CHECK-LABEL: bitcast_d_to_i: +; NONE-NOT: mov +; HARD: vmov r0, r1, d0 + %1 = bitcast double %a to i64 + ret i64 %1 +} + +define float @select_f(float %a, float %b, i1 %c) { +; CHECK-LABEL: select_f: +; NONE: tst.w r2, #1 +; NONE: moveq r0, r1 +; HARD: tst.w r0, #1 +; HARD: vmovne.f32 s1, s0 +; HARD: vmov.f32 s0, s1 + %1 = select i1 %c, float %a, float %b + ret float %1 +} + +define double @select_d(double %a, double %b, i1 %c) { +; CHECK-LABEL: select_d: +; NONE: ldr.w [[REG:r[0-9]+]], [sp] +; NONE: ands [[REG]], [[REG]], #1 +; NONE: moveq r0, r2 +; NONE: moveq r1, r3 +; SP: ands r0, r0, #1 +; SP-DAG: vmov [[ALO:r[0-9]+]], [[AHI:r[0-9]+]], d0 +; SP-DAG: vmov [[BLO:r[0-9]+]], [[BHI:r[0-9]+]], d1 +; SP: itt ne +; SP-DAG: movne [[BLO]], [[ALO]] +; SP-DAG: movne [[BHI]], [[AHI]] +; SP: vmov d0, [[BLO]], [[BHI]] +; DP: tst.w r0, #1 +; DP: vmovne.f64 d1, d0 +; DP: vmov.f64 d0, d1 + %1 = select i1 %c, double %a, double %b + ret double %1 +}