Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -325,6 +325,10 @@ /// \brief Return true if the hardware has a fast square-root instruction. bool haveFastSqrt(Type *Ty) const; + /// \brief Return the expected cost of supporting the floating point operation + /// of the specified type. + unsigned getFPOpCost(Type *Ty) const; + /// \brief Return the expected cost of materializing for the given integer /// immediate of the specified type. unsigned getIntImmCost(const APInt &Imm, Type *Ty) const; @@ -516,6 +520,7 @@ virtual bool shouldBuildLookupTables() = 0; virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0; virtual bool haveFastSqrt(Type *Ty) = 0; + virtual unsigned getFPOpCost(Type *Ty) = 0; virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) = 0; virtual unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) = 0; @@ -631,6 +636,11 @@ return Impl.getPopcntSupport(IntTyWidthInBit); } bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); } + + unsigned getFPOpCost(Type *Ty) override { + return Impl.getFPOpCost(Ty); + } + unsigned getIntImmCost(const APInt &Imm, Type *Ty) override { return Impl.getIntImmCost(Imm, Ty); } Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -239,6 +239,8 @@ bool haveFastSqrt(Type *Ty) { return false; } + unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; } + unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; } unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Index: include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- include/llvm/CodeGen/BasicTTIImpl.h +++ include/llvm/CodeGen/BasicTTIImpl.h @@ -167,6 +167,12 @@ TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); } + unsigned getFPOpCost(Type *Ty) { + // By default, FP instructions are no more expensive since they are + // implemented in HW. Target specific TTI can override this. + return TargetTransformInfo::TCC_Basic; + } + void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP) { // This unrolling functionality is target independent, but to provide some // motivation for its intended use, for x86: Index: lib/Analysis/IPA/InlineCost.cpp =================================================================== --- lib/Analysis/IPA/InlineCost.cpp +++ lib/Analysis/IPA/InlineCost.cpp @@ -907,6 +907,25 @@ if (isa(I) || I->getType()->isVectorTy()) ++NumVectorInstructions; + // If the instruction is floating point, and the target says this operation is + // expensive or the function has the "use-soft-float" attribute, this may + // eventually become a library call. Treat the cost as such. + if (I->getType()->isFloatingPointTy()) { + bool hasSoftFloatAttr = false; + + // If the function has the "use-soft-float" attribute, mark it as expensive. + if (F.hasFnAttribute("use-soft-float")) { + Attribute Attr = F.getFnAttribute("use-soft-float"); + StringRef Val = Attr.getValueAsString(); + if (Val == "true") + hasSoftFloatAttr = true; + } + + if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive || + hasSoftFloatAttr) + Cost += InlineConstants::CallPenalty; + } + // If the instruction simplified to a constant, there is no cost to this // instruction. Visit the instructions using our InstVisitor to account for // all of the per-instruction logic. The visit tree returns true if we Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -148,6 +148,10 @@ return TTIImpl->haveFastSqrt(Ty); } +unsigned TargetTransformInfo::getFPOpCost(Type *Ty) const { + return TTIImpl->getFPOpCost(Ty); +} + unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { return TTIImpl->getIntImmCost(Imm, Ty); } Index: lib/Target/ARM/ARMSubtarget.h =================================================================== --- lib/Target/ARM/ARMSubtarget.h +++ lib/Target/ARM/ARMSubtarget.h @@ -310,7 +310,8 @@ bool hasCRC() const { return HasCRC; } bool hasVirtualization() const { return HasVirtualization; } bool useNEONForSinglePrecisionFP() const { - return hasNEON() && UseNEONForSinglePrecisionFP; } + return hasNEON() && UseNEONForSinglePrecisionFP; + } bool hasDivide() const { return HasHardwareDivide; } bool hasDivideInARMMode() const { return HasHardwareDivideInARM; } Index: lib/Target/ARM/ARMTargetTransformInfo.h =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.h +++ lib/Target/ARM/ARMTargetTransformInfo.h @@ -114,6 +114,8 @@ unsigned getAddressComputationCost(Type *Val, bool IsComplex); + unsigned getFPOpCost(Type *Ty); + unsigned getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info = TTI::OK_AnyValue, Index: lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.cpp +++ lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -314,6 +314,25 @@ return 1; } +unsigned ARMTTIImpl::getFPOpCost(Type *Ty) { + // Use similar logic that's in ARMISelLowering: + // Any ARM CPU with VFP2 has floating point, but Thumb1 didn't have access + // to VFP. + + if (ST->hasVFP2() && !ST->isThumb1Only()) { + if (Ty->isFloatTy()) { + return TargetTransformInfo::TCC_Basic; + } + + if (Ty->isDoubleTy()) { + return ST->isFPOnlySP() ? TargetTransformInfo::TCC_Expensive : + TargetTransformInfo::TCC_Basic; + } + } + + return TargetTransformInfo::TCC_Expensive; +} + unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { // We only handle costs of reverse and alternate shuffles for now. Index: test/Transforms/Inline/inline-fp.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/inline-fp.ll @@ -0,0 +1,136 @@ +; RUN: opt -S -inline < %s | FileCheck %s +; Make sure that soft float implementations are calculated as being more expensive +; to the inliner. + +define i32 @test_nofp() #0 { +; f_nofp() has the "use-soft-float" attribute, so it should never get inlined. +; CHECK-LABEL: test_nofp +; CHECK: call float @f_nofp +entry: + %responseX = alloca i32, align 4 + %responseY = alloca i32, align 4 + %responseZ = alloca i32, align 4 + %valueX = alloca i8, align 1 + %valueY = alloca i8, align 1 + %valueZ = alloca i8, align 1 + + call void @getX(i32* %responseX, i8* %valueX) + call void @getY(i32* %responseY, i8* %valueY) + call void @getZ(i32* %responseZ, i8* %valueZ) + + %0 = load i32* %responseX + %1 = load i8* %valueX + %call = call float @f_nofp(i32 %0, i8 zeroext %1) + %2 = load i32* %responseZ + %3 = load i8* %valueZ + %call2 = call float @f_nofp(i32 %2, i8 zeroext %3) + %call3 = call float @fabsf(float %call) + %cmp = fcmp ogt float %call3, 0x3FC1EB8520000000 + br i1 %cmp, label %if.end12, label %if.else + +if.else: ; preds = %entry + %4 = load i32* %responseY + %5 = load i8* %valueY + %call1 = call float @f_nofp(i32 %4, i8 zeroext %5) + %call4 = call float @fabsf(float %call1) + %cmp5 = fcmp ogt float %call4, 0x3FC1EB8520000000 + br i1 %cmp5, label %if.end12, label %if.else7 + +if.else7: ; preds = %if.else + %call8 = call float @fabsf(float %call2) + %cmp9 = fcmp ogt float %call8, 0x3FC1EB8520000000 + br i1 %cmp9, label %if.then10, label %if.end12 + +if.then10: ; preds = %if.else7 + br label %if.end12 + +if.end12: ; preds = %if.else, %entry, %if.then10, %if.else7 + %success.0 = phi i32 [ 0, %if.then10 ], [ 1, %if.else7 ], [ 0, %entry ], [ 0, %if.else ] + ret i32 %success.0 +} + +define i32 @test_hasfp() #0 { +; f_hasfp() does not have the "use-soft-float" attribute, so it should get inlined. +; CHECK-LABEL: test_hasfp +; CHECK-NOT: call float @f_hasfp +entry: + %responseX = alloca i32, align 4 + %responseY = alloca i32, align 4 + %responseZ = alloca i32, align 4 + %valueX = alloca i8, align 1 + %valueY = alloca i8, align 1 + %valueZ = alloca i8, align 1 + + call void @getX(i32* %responseX, i8* %valueX) + call void @getY(i32* %responseY, i8* %valueY) + call void @getZ(i32* %responseZ, i8* %valueZ) + + %0 = load i32* %responseX + %1 = load i8* %valueX + %call = call float @f_hasfp(i32 %0, i8 zeroext %1) + %2 = load i32* %responseZ + %3 = load i8* %valueZ + %call2 = call float @f_hasfp(i32 %2, i8 zeroext %3) + %call3 = call float @fabsf(float %call) + %cmp = fcmp ogt float %call3, 0x3FC1EB8520000000 + br i1 %cmp, label %if.end12, label %if.else + +if.else: ; preds = %entry + %4 = load i32* %responseY + %5 = load i8* %valueY + %call1 = call float @f_hasfp(i32 %4, i8 zeroext %5) + %call4 = call float @fabsf(float %call1) + %cmp5 = fcmp ogt float %call4, 0x3FC1EB8520000000 + br i1 %cmp5, label %if.end12, label %if.else7 + +if.else7: ; preds = %if.else + %call8 = call float @fabsf(float %call2) + %cmp9 = fcmp ogt float %call8, 0x3FC1EB8520000000 + br i1 %cmp9, label %if.then10, label %if.end12 + +if.then10: ; preds = %if.else7 + br label %if.end12 + +if.end12: ; preds = %if.else, %entry, %if.then10, %if.else7 + %success.0 = phi i32 [ 0, %if.then10 ], [ 1, %if.else7 ], [ 0, %entry ], [ 0, %if.else ] + ret i32 %success.0 +} + +declare void @getX(i32*, i8*) #0 + +declare void @getY(i32*, i8*) #0 + +declare void @getZ(i32*, i8*) #0 + +define internal float @f_hasfp(i32 %response, i8 zeroext %value1) #0 { +entry: + %conv = zext i8 %value1 to i32 + %sub = add nsw i32 %conv, -1 + %conv1 = sitofp i32 %sub to float + %0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1) + %mul = fmul float %0, 2.620000e+03 + %conv2 = sitofp i32 %response to float + %sub3 = fsub float %conv2, %mul + %div = fdiv float %sub3, %mul + ret float %div +} + +define internal float @f_nofp(i32 %response, i8 zeroext %value1) #1 { +entry: + %conv = zext i8 %value1 to i32 + %sub = add nsw i32 %conv, -1 + %conv1 = sitofp i32 %sub to float + %0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1) + %mul = fmul float %0, 2.620000e+03 + %conv2 = sitofp i32 %response to float + %sub3 = fsub float %conv2, %mul + %div = fdiv float %sub3, %mul + ret float %div +} + +declare float @fabsf(float) optsize minsize + +declare float @llvm.pow.f32(float, float) optsize minsize + +attributes #0 = { minsize optsize } +attributes #1 = { minsize optsize "use-soft-float"="true" }