Index: llvm/lib/Analysis/InlineCost.cpp =================================================================== --- llvm/lib/Analysis/InlineCost.cpp +++ llvm/lib/Analysis/InlineCost.cpp @@ -272,6 +272,7 @@ bool visitCmpInst(CmpInst &I); bool visitSub(BinaryOperator &I); bool visitBinaryOperator(BinaryOperator &I); + bool visitUnaryOperator(UnaryOperator &I); bool visitLoad(LoadInst &I); bool visitStore(StoreInst &I); bool visitExtractValue(ExtractValueInst &I); @@ -1103,6 +1104,38 @@ return false; } +bool CallAnalyzer::visitUnaryOperator(UnaryOperator &I) { + Value *Op = I.getOperand(0); + Constant *COp = dyn_cast(Op); + if (!COp) + COp = SimplifiedValues.lookup(Op); + + Value *SimpleV = nullptr; + if (auto FI = dyn_cast(&I)) + SimpleV = SimplifyFPUnOp(I.getOpcode(), COp ? COp : Op, + FI->getFastMathFlags(), DL); + else + SimpleV = SimplifyUnOp(I.getOpcode(), COp ? COp : Op, DL); + + if (Constant *C = dyn_cast_or_null(SimpleV)) + SimplifiedValues[&I] = C; + + if (SimpleV) + return true; + + // Disable any SROA on arguments to arbitrary, unsimplified unary operators. + disableSROA(Op); + + // If the instruction is floating point, and the target says this operation + // is expensive, this may eventually become a library call. Treat the cost + // as such. + if (I.getType()->isFloatingPointTy() && + TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive) + addCost(InlineConstants::CallPenalty); + + return false; +} + bool CallAnalyzer::visitLoad(LoadInst &I) { Value *SROAArg; DenseMap::iterator CostIt; Index: llvm/lib/Analysis/InstructionSimplify.cpp =================================================================== --- llvm/lib/Analysis/InstructionSimplify.cpp +++ llvm/lib/Analysis/InstructionSimplify.cpp @@ -4589,6 +4589,10 @@ } } +Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q) { + return ::simplifyUnOp(Opcode, Op, Q, RecursionLimit); +} + Value *llvm::SimplifyFPUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF, const SimplifyQuery &Q) { return ::simplifyFPUnOp(Opcode, Op, FMF, Q, RecursionLimit); Index: llvm/test/Transforms/Inline/ARM/inline-fp.ll =================================================================== --- llvm/test/Transforms/Inline/ARM/inline-fp.ll +++ llvm/test/Transforms/Inline/ARM/inline-fp.ll @@ -12,6 +12,8 @@ ; NOFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75) ; NOFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) ; NOFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) +; NOFP-DAG: single_force_soft_fneg not inlined into test_single_force_soft_fneg because too costly to inline (cost=125, threshold=75) +; NOFP-DAG: single_force_soft_fneg not inlined into test_single_force_soft_fneg because too costly to inline (cost=125, threshold=75) ; FULLFP-DAG: single inlined into test_single with (cost=0, threshold=75) ; FULLFP-DAG: single inlined into test_single with (cost=-15000, threshold=75) @@ -21,6 +23,8 @@ ; FULLFP-DAG: double inlined into test_double with (cost=-15000, threshold=75) ; FULLFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) ; FULLFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) +; FULLFP-DAG: single_force_soft_fneg not inlined into test_single_force_soft_fneg because too costly to inline (cost=125, threshold=75) +; FULLFP-DAG: single_force_soft_fneg not inlined into test_single_force_soft_fneg because too costly to inline (cost=125, threshold=75) ; SINGLEFP-DAG: single inlined into test_single with (cost=0, threshold=75) ; SINGLEFP-DAG: single inlined into test_single with (cost=-15000, threshold=75) @@ -30,6 +34,8 @@ ; SINGLEFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75) ; SINGLEFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) ; SINGLEFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) +; SINGLEFP-DAG: single_force_soft_fneg not inlined into test_single_force_soft_fneg because too costly to inline (cost=125, threshold=75) +; SINGLEFP-DAG: single_force_soft_fneg not inlined into test_single_force_soft_fneg because too costly to inline (cost=125, threshold=75) define i32 @test_single(i32 %a, i8 %b, i32 %c, i8 %d) #0 { %call = call float @single(i32 %a, i8 zeroext %b) @@ -55,6 +61,12 @@ ret i32 0 } +define i32 @test_single_force_soft_fneg(i32 %a, i8 %b, i32 %c, i8 %d) #1 { + %call = call float @single_force_soft_fneg(i32 %a, i8 zeroext %b) #1 + %call2 = call float @single_force_soft_fneg(i32 %c, i8 zeroext %d) #1 + ret i32 0 +} + define internal float @single(i32 %response, i8 zeroext %value1) #0 { entry: %conv = zext i8 %value1 to i32 @@ -106,6 +118,20 @@ ret float %div } +; Make sure fneg is considered expensive +define internal float @single_force_soft_fneg(i32 %response, i8 zeroext %value1) #1 { +entry: + %conv = zext i8 %value1 to i32 + %sub = add nsw i32 %conv, -1 + %conv1 = sitofp i32 %sub to float + %0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1) + %mul = fneg float %0 + %conv2 = sitofp i32 %response to float + %sub3 = fsub float %conv2, %mul + %div = fdiv float %sub3, %mul + ret float %div +} + declare float @llvm.pow.f32(float, float) optsize minsize declare double @llvm.pow.f64(double, double) optsize minsize