Index: include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- include/llvm/CodeGen/BasicTTIImpl.h +++ include/llvm/CodeGen/BasicTTIImpl.h @@ -302,9 +302,13 @@ } unsigned getFPOpCost(Type *Ty) { - // By default, FP instructions are no more expensive since they are - // implemented in HW. Target specific TTI can override this. - return TargetTransformInfo::TCC_Basic; + // Check whether FADD is available, as a proxy for floating-point in + // general. + const TargetLoweringBase *TLI = getTLI(); + EVT VT = TLI->getValueType(DL, Ty); + if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT)) + return TargetTransformInfo::TCC_Basic; + return TargetTransformInfo::TCC_Expensive; } unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { Index: lib/Analysis/InlineCost.cpp =================================================================== --- lib/Analysis/InlineCost.cpp +++ lib/Analysis/InlineCost.cpp @@ -700,6 +700,22 @@ // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. disableSROA(I.getOperand(0)); + // If this is a floating-point cast, and the target says this operation + // is expensive, this may eventually become a library call. Treat the cost + // as such. + switch (I.getOpcode()) { + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPToUI: + case Instruction::FPToSI: + if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive) + Cost += InlineConstants::CallPenalty; + default: + break; + } + return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); } @@ -1078,6 +1094,13 @@ disableSROA(LHS); disableSROA(RHS); + // If the instruction is floating point, and the target says this operation + // is expensive, this may eventually become a library call. Treat the cost + // as such. + if (I.getType()->isFloatingPointTy() && + TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive) + Cost += InlineConstants::CallPenalty; + return false; } @@ -1547,17 +1570,6 @@ if (isa(I) || I->getType()->isVectorTy()) ++NumVectorInstructions; - // If the instruction is floating point, and the target says this operation - // is expensive or the function has the "use-soft-float" attribute, this may - // eventually become a library call. Treat the cost as such. - if (I->getType()->isFloatingPointTy()) { - // If the function has the "use-soft-float" attribute, mark it as - // expensive. - if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive || - (F.getFnAttribute("use-soft-float").getValueAsString() == "true")) - Cost += InlineConstants::CallPenalty; - } - // If the instruction simplified to a constant, there is no cost to this // instruction. Visit the instructions using our InstVisitor to account for // all of the per-instruction logic. The visit tree returns true if we Index: lib/Target/ARM/ARMTargetTransformInfo.h =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.h +++ lib/Target/ARM/ARMTargetTransformInfo.h @@ -156,8 +156,6 @@ int getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr); - int getFPOpCost(Type *Ty); - int getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info = TTI::OK_AnyValue, Index: lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.cpp +++ lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -394,25 +394,6 @@ return 1; } -int ARMTTIImpl::getFPOpCost(Type *Ty) { - // Use similar logic that's in ARMISelLowering: - // Any ARM CPU with VFP2 has floating point, but Thumb1 didn't have access - // to VFP. - - if (ST->hasVFP2() && !ST->isThumb1Only()) { - if (Ty->isFloatTy()) { - return TargetTransformInfo::TCC_Basic; - } - - if (Ty->isDoubleTy()) { - return ST->isFPOnlySP() ? TargetTransformInfo::TCC_Expensive : - TargetTransformInfo::TCC_Basic; - } - } - - return TargetTransformInfo::TCC_Expensive; -} - int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { // We only handle costs of reverse and alternate shuffles for now. Index: test/Transforms/Inline/ARM/inline-fp.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/ARM/inline-fp.ll @@ -0,0 +1,113 @@ +; RUN: opt -S -inline -mtriple=arm-eabi -pass-remarks=.* -pass-remarks-missed=.* < %s 2>&1 | FileCheck %s -check-prefix=NOFP +; RUN: opt -S -inline -mtriple=arm-eabi -mattr=+vfp2 -pass-remarks=.* -pass-remarks-missed=.* < %s 2>&1 | FileCheck %s -check-prefix=FULLFP +; RUN: opt -S -inline -mtriple=arm-eabi -mattr=+vfp2,+fp-only-sp -pass-remarks=.* -pass-remarks-missed=.* < %s 2>&1 | FileCheck %s -check-prefix=SINGLEFP +; Make sure that soft float implementations are calculated as being more expensive +; to the inliner. + +; NOFP-DAG: single not inlined into test_single because too costly to inline (cost=125, threshold=75) +; NOFP-DAG: single not inlined into test_single because too costly to inline (cost=125, threshold=75) +; NOFP-DAG: single_cheap inlined into test_single_cheap with cost=-15 (threshold=75) +; NOFP-DAG: single_cheap inlined into test_single_cheap with cost=-15015 (threshold=75) +; NOFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75) +; NOFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75) +; NOFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) +; NOFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) + +; FULLFP-DAG: single inlined into test_single with cost=0 (threshold=75) +; FULLFP-DAG: single inlined into test_single with cost=-15000 (threshold=75) +; FULLFP-DAG: single_cheap inlined into test_single_cheap with cost=-15 (threshold=75) +; FULLFP-DAG: single_cheap inlined into test_single_cheap with cost=-15015 (threshold=75) +; FULLFP-DAG: double inlined into test_double with cost=0 (threshold=75) +; FULLFP-DAG: double inlined into test_double with cost=-15000 (threshold=75) +; FULLFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) +; FULLFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) + +; SINGLEFP-DAG: single inlined into test_single with cost=0 (threshold=75) +; SINGLEFP-DAG: single inlined into test_single with cost=-15000 (threshold=75) +; SINGLEFP-DAG: single_cheap inlined into test_single_cheap with cost=-15 (threshold=75) +; SINGLEFP-DAG: single_cheap inlined into test_single_cheap with cost=-15015 (threshold=75) +; SINGLEFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75) +; SINGLEFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75) +; SINGLEFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) +; SINGLEFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) + +define i32 @test_single(i32 %a, i8 %b, i32 %c, i8 %d) #0 { + %call = call float @single(i32 %a, i8 zeroext %b) + %call2 = call float @single(i32 %c, i8 zeroext %d) + ret i32 0 +} + +define i32 @test_single_cheap(i32 %a, i8 %b, i32 %c, i8 %d) #0 { + %call = call float @single_cheap(i32 %a, i8 zeroext %b) + %call2 = call float @single_cheap(i32 %c, i8 zeroext %d) + ret i32 0 +} + +define i32 @test_double(i32 %a, i8 %b, i32 %c, i8 %d) #0 { + %call = call double @double(i32 %a, i8 zeroext %b) + %call2 = call double @double(i32 %c, i8 zeroext %d) + ret i32 0 +} + +define i32 @test_single_force_soft(i32 %a, i8 %b, i32 %c, i8 %d) #1 { + %call = call float @single_force_soft(i32 %a, i8 zeroext %b) #1 + %call2 = call float @single_force_soft(i32 %c, i8 zeroext %d) #1 + ret i32 0 +} + +define internal float @single(i32 %response, i8 zeroext %value1) #0 { +entry: + %conv = zext i8 %value1 to i32 + %sub = add nsw i32 %conv, -1 + %conv1 = sitofp i32 %sub to float + %0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1) + %mul = fmul float %0, 2.620000e+03 + %conv2 = sitofp i32 %response to float + %sub3 = fsub float %conv2, %mul + %div = fdiv float %sub3, %mul + ret float %div +} + +define internal float @single_cheap(i32 %response, i8 zeroext %value1) #0 { +entry: + %conv = zext i8 %value1 to i32 + %sub = add nsw i32 %conv, -1 + %conv1 = bitcast i32 %sub to float + %conv2 = bitcast i32 %response to float + %0 = tail call float @llvm.pow.f32(float %conv2, float %conv1) + %1 = tail call float @llvm.pow.f32(float %0, float %0) + %2 = tail call float @llvm.pow.f32(float %1, float %1) + ret float %2 +} + +define internal double @double(i32 %response, i8 zeroext %value1) #0 { +entry: + %conv = zext i8 %value1 to i32 + %sub = add nsw i32 %conv, -1 + %conv1 = sitofp i32 %sub to double + %0 = tail call double @llvm.pow.f64(double 0x3FF028F5C0000000, double %conv1) + %mul = fmul double %0, 2.620000e+03 + %conv2 = sitofp i32 %response to double + %sub3 = fsub double %conv2, %mul + %div = fdiv double %sub3, %mul + ret double %div +} + +define internal float @single_force_soft(i32 %response, i8 zeroext %value1) #1 { +entry: + %conv = zext i8 %value1 to i32 + %sub = add nsw i32 %conv, -1 + %conv1 = sitofp i32 %sub to float + %0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1) + %mul = fmul float %0, 2.620000e+03 + %conv2 = sitofp i32 %response to float + %sub3 = fsub float %conv2, %mul + %div = fdiv float %sub3, %mul + ret float %div +} + +declare float @llvm.pow.f32(float, float) optsize minsize +declare double @llvm.pow.f64(double, double) optsize minsize + +attributes #0 = { optsize } +attributes #1 = { optsize "use-soft-float"="true" "target-features"="+soft-float" } Index: test/Transforms/Inline/inline-fp.ll =================================================================== --- test/Transforms/Inline/inline-fp.ll +++ /dev/null @@ -1,137 +0,0 @@ -; RUN: opt -S -inline < %s | FileCheck %s -; RUN: opt -S -passes='cgscc(inline)' < %s | FileCheck %s -; Make sure that soft float implementations are calculated as being more expensive -; to the inliner. - -define i32 @test_nofp() #0 { -; f_nofp() has the "use-soft-float" attribute, so it should never get inlined. -; CHECK-LABEL: test_nofp -; CHECK: call float @f_nofp -entry: - %responseX = alloca i32, align 4 - %responseY = alloca i32, align 4 - %responseZ = alloca i32, align 4 - %valueX = alloca i8, align 1 - %valueY = alloca i8, align 1 - %valueZ = alloca i8, align 1 - - call void @getX(i32* %responseX, i8* %valueX) - call void @getY(i32* %responseY, i8* %valueY) - call void @getZ(i32* %responseZ, i8* %valueZ) - - %0 = load i32, i32* %responseX - %1 = load i8, i8* %valueX - %call = call float @f_nofp(i32 %0, i8 zeroext %1) - %2 = load i32, i32* %responseZ - %3 = load i8, i8* %valueZ - %call2 = call float @f_nofp(i32 %2, i8 zeroext %3) - %call3 = call float @fabsf(float %call) - %cmp = fcmp ogt float %call3, 0x3FC1EB8520000000 - br i1 %cmp, label %if.end12, label %if.else - -if.else: ; preds = %entry - %4 = load i32, i32* %responseY - %5 = load i8, i8* %valueY - %call1 = call float @f_nofp(i32 %4, i8 zeroext %5) - %call4 = call float @fabsf(float %call1) - %cmp5 = fcmp ogt float %call4, 0x3FC1EB8520000000 - br i1 %cmp5, label %if.end12, label %if.else7 - -if.else7: ; preds = %if.else - %call8 = call float @fabsf(float %call2) - %cmp9 = fcmp ogt float %call8, 0x3FC1EB8520000000 - br i1 %cmp9, label %if.then10, label %if.end12 - -if.then10: ; preds = %if.else7 - br label %if.end12 - -if.end12: ; preds = %if.else, %entry, %if.then10, %if.else7 - %success.0 = phi i32 [ 0, %if.then10 ], [ 1, %if.else7 ], [ 0, %entry ], [ 0, %if.else ] - ret i32 %success.0 -} - -define i32 @test_hasfp() #0 { -; f_hasfp() does not have the "use-soft-float" attribute, so it should get inlined. -; CHECK-LABEL: test_hasfp -; CHECK-NOT: call float @f_hasfp -entry: - %responseX = alloca i32, align 4 - %responseY = alloca i32, align 4 - %responseZ = alloca i32, align 4 - %valueX = alloca i8, align 1 - %valueY = alloca i8, align 1 - %valueZ = alloca i8, align 1 - - call void @getX(i32* %responseX, i8* %valueX) - call void @getY(i32* %responseY, i8* %valueY) - call void @getZ(i32* %responseZ, i8* %valueZ) - - %0 = load i32, i32* %responseX - %1 = load i8, i8* %valueX - %call = call float @f_hasfp(i32 %0, i8 zeroext %1) - %2 = load i32, i32* %responseZ - %3 = load i8, i8* %valueZ - %call2 = call float @f_hasfp(i32 %2, i8 zeroext %3) - %call3 = call float @fabsf(float %call) - %cmp = fcmp ogt float %call3, 0x3FC1EB8520000000 - br i1 %cmp, label %if.end12, label %if.else - -if.else: ; preds = %entry - %4 = load i32, i32* %responseY - %5 = load i8, i8* %valueY - %call1 = call float @f_hasfp(i32 %4, i8 zeroext %5) - %call4 = call float @fabsf(float %call1) - %cmp5 = fcmp ogt float %call4, 0x3FC1EB8520000000 - br i1 %cmp5, label %if.end12, label %if.else7 - -if.else7: ; preds = %if.else - %call8 = call float @fabsf(float %call2) - %cmp9 = fcmp ogt float %call8, 0x3FC1EB8520000000 - br i1 %cmp9, label %if.then10, label %if.end12 - -if.then10: ; preds = %if.else7 - br label %if.end12 - -if.end12: ; preds = %if.else, %entry, %if.then10, %if.else7 - %success.0 = phi i32 [ 0, %if.then10 ], [ 1, %if.else7 ], [ 0, %entry ], [ 0, %if.else ] - ret i32 %success.0 -} - -declare void @getX(i32*, i8*) #0 - -declare void @getY(i32*, i8*) #0 - -declare void @getZ(i32*, i8*) #0 - -define internal float @f_hasfp(i32 %response, i8 zeroext %value1) #0 { -entry: - %conv = zext i8 %value1 to i32 - %sub = add nsw i32 %conv, -1 - %conv1 = sitofp i32 %sub to float - %0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1) - %mul = fmul float %0, 2.620000e+03 - %conv2 = sitofp i32 %response to float - %sub3 = fsub float %conv2, %mul - %div = fdiv float %sub3, %mul - ret float %div -} - -define internal float @f_nofp(i32 %response, i8 zeroext %value1) #1 { -entry: - %conv = zext i8 %value1 to i32 - %sub = add nsw i32 %conv, -1 - %conv1 = sitofp i32 %sub to float - %0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1) - %mul = fmul float %0, 2.620000e+03 - %conv2 = sitofp i32 %response to float - %sub3 = fsub float %conv2, %mul - %div = fdiv float %sub3, %mul - ret float %div -} - -declare float @fabsf(float) optsize minsize - -declare float @llvm.pow.f32(float, float) optsize minsize - -attributes #0 = { optsize } -attributes #1 = { optsize "use-soft-float"="true" }