Index: llvm/lib/Analysis/InlineCost.cpp =================================================================== --- llvm/lib/Analysis/InlineCost.cpp +++ llvm/lib/Analysis/InlineCost.cpp @@ -135,6 +135,36 @@ namespace { class InlineCostCallAnalyzer; +Optional stringAttrAsInt(Attribute Attr) { + assert((!Attr.isValid() || Attr.isStringAttribute()) && + "Expected string attribute!"); + int AttrValue; + if (Attr.getValueAsString().getAsInteger(10, AttrValue)) + return None; + return AttrValue; +} + +/// This function behaves more like CallBase::hasFnAttr: when it looks for the +/// requested attribute, it check both the call instruction and the called +/// function (if it's available and operand bundles don't prohibit that). +Attribute getFnAttr(CallBase &CB, StringRef AttrKind) { + Attribute CallAttr = CB.getFnAttr(AttrKind); + if (CallAttr.isValid()) + return CallAttr; + + // Operand bundles override attributes on the called function, but don't + // override attributes directly present on the call instruction. + if (!CB.isFnAttrDisallowedByOpBundle(AttrKind)) + if (const Function *F = CB.getCalledFunction()) + return F->getFnAttribute(AttrKind); + + return {}; +} + +Optional getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) { + return stringAttrAsInt(getFnAttr(CB, AttrKind)); +} + // This struct is used to store information about inline cost of a // particular instruction struct InstructionCostDetail { @@ -235,6 +265,10 @@ /// Called the analysis engine determines load elimination won't happen. virtual void onDisableLoadElimination() {} + /// Called when we visit a CallBase, before the analysis starts. Return false + /// to stop further processing of the instruction. + virtual bool onCallBaseVisitStart(CallBase &Call) { return true; } + /// Called to account for a call. virtual void onCallPenalty() {} @@ -558,6 +592,22 @@ addCost(LoadEliminationCost); LoadEliminationCost = 0; } + + bool onCallBaseVisitStart(CallBase &Call) override { + if (Optional AttrCallThresholdBonus = + getStringFnAttrAsInt(Call, "call-threshold-bonus")) + Threshold += *AttrCallThresholdBonus; + + if (Optional AttrCallCost = + getStringFnAttrAsInt(Call, "call-inline-cost")) { + addCost(*AttrCallCost); + // Prevent further processing of the call since we want to override its + // inline cost, not just add to it. + return false; + } + return true; + } + void onCallPenalty() override { addCost(CallPenalty); } void onCallArgumentSetup(const CallBase &Call) override { // Pay the price of the argument setup. We account for the average 1 @@ -847,6 +897,14 @@ else if (NumVectorInstructions <= NumInstructions / 2) Threshold -= VectorBonus / 2; + if (Optional AttrCost = + getStringFnAttrAsInt(CandidateCall, "function-inline-cost")) + Cost = *AttrCost; + + if (Optional AttrThreshold = + getStringFnAttrAsInt(CandidateCall, "function-inline-threshold")) + Threshold = *AttrThreshold; + if (auto Result = costBenefitAnalysis()) { DecidedByCostBenefit = true; if (Result.getValue()) @@ -2029,6 +2087,9 @@ } bool CallAnalyzer::visitCallBase(CallBase &Call) { + if (!onCallBaseVisitStart(Call)) + return true; + if (Call.hasFnAttr(Attribute::ReturnsTwice) && !F.hasFnAttribute(Attribute::ReturnsTwice)) { // This aborts the entire analysis. Index: llvm/test/Transforms/Inline/inline-call-penalty-option.ll =================================================================== --- llvm/test/Transforms/Inline/inline-call-penalty-option.ll +++ llvm/test/Transforms/Inline/inline-call-penalty-option.ll @@ -4,25 +4,20 @@ ; RUN: opt < %s -inline --inline-call-penalty=0 --inline-threshold=5 -S | FileCheck %s ; RUN: opt < %s -inline --inline-threshold=5 -S | FileCheck %s -check-prefix=DEFAULT_CALL_PENALTY -define i32 @X9(i32 %x) nounwind { - %x2 = add i32 %x, %x - %x3 = add i32 %x2, %x - %x4 = add i32 %x3, %x - %x5 = add i32 %x4, %x - %x6 = add i32 %x5, %x - %x7 = add i32 %x6, %x - %x8 = add i32 %x7, %x - %x9 = add i32 %x8, %x +declare void @extern() - ret i32 %x9 +define void @X9() nounwind { + call void @extern() "call-inline-cost"="30" + ret void } -define i32 @f1(i32 %x) nounwind { - %res = call i32 @X9(i32 %x) - ret i32 %res +define void @f1() nounwind { + call void @X9() + ret void ; CHECK-LABEL: @f1( -; CHECK: %res = call i32 @X9 +; CHECK: call void @X9 ; DEFAULT_CALL_PENALTY-LABEL: @f1( -; DEFAULT_CALL_PENALTY-NOT: call +; DEFAULT_CALL_PENALTY: call void @extern +; DEFAULT_CALL_PENALTY-NOT: call void @X9 } Index: llvm/test/Transforms/Inline/inline-cold-callee.ll =================================================================== --- llvm/test/Transforms/Inline/inline-cold-callee.ll +++ llvm/test/Transforms/Inline/inline-cold-callee.ll @@ -7,33 +7,25 @@ define i32 @callee1(i32 %x) !prof !21 { %x1 = add i32 %x, 1 - %x2 = add i32 %x1, 1 - %x3 = add i32 %x2, 1 - call void @extern() - ret i32 %x3 + ret i32 %x1 } define i32 @callee2(i32 %x) !prof !22 { ; CHECK-LABEL: @callee2( %x1 = add i32 %x, 1 - %x2 = add i32 %x1, 1 - %x3 = add i32 %x2, 1 - call void @extern() - ret i32 %x3 + ret i32 %x1 } define i32 @caller2(i32 %y1) !prof !22 { ; CHECK-LABEL: @caller2( ; CHECK: call i32 @callee2 ; CHECK-NOT: call i32 @callee1 -; CHECK: ret i32 %x3.i - %y2 = call i32 @callee2(i32 %y1) - %y3 = call i32 @callee1(i32 %y2) +; CHECK: ret i32 %x1.i + %y2 = call i32 @callee2(i32 %y1) "function-inline-cost"="10" + %y3 = call i32 @callee1(i32 %y2) "function-inline-cost"="10" ret i32 %y3 } -declare void @extern() - !llvm.module.flags = !{!1} !21 = !{!"function_entry_count", i64 100} !22 = !{!"function_entry_count", i64 1} Index: llvm/test/Transforms/Inline/inline-cold-callsite-pgo.ll =================================================================== --- llvm/test/Transforms/Inline/inline-cold-callsite-pgo.ll +++ llvm/test/Transforms/Inline/inline-cold-callsite-pgo.ll @@ -4,12 +4,10 @@ ; and does not get inlined. Another callsite to an identical callee that ; is not cold gets inlined because cost is below the inline-threshold. -define i32 @callee1(i32 %x) !prof !21 { +define i32 @callee1(i32 %x) "function-inline-cost"="30" !prof !21 { %x1 = add i32 %x, 1 - %x2 = add i32 %x1, 1 - %x3 = add i32 %x2, 1 call void @extern() - ret i32 %x3 + ret i32 %x1 } define i32 @caller(i32 %n) !prof !22 { @@ -20,7 +18,7 @@ cond_true: ; CHECK-LABEL: cond_true: ; CHECK-NOT: call i32 @callee1 -; CHECK: ret i32 %x3.i +; CHECK: ret i32 %x1.i %i = call i32 @callee1(i32 %n) ret i32 %i cond_false: Index: llvm/test/Transforms/Inline/inline-cold-callsite.ll =================================================================== --- llvm/test/Transforms/Inline/inline-cold-callsite.ll +++ llvm/test/Transforms/Inline/inline-cold-callsite.ll @@ -5,8 +5,7 @@ ; and does not get inlined. Another callsite to an identical callee that ; is not cold gets inlined because cost is below the inline-threshold. -define void @callee() { - call void @extern() +define void @callee() "function-inline-cost"="10" { call void @extern() ret void } Index: llvm/test/Transforms/Inline/inline-cold.ll =================================================================== --- llvm/test/Transforms/Inline/inline-cold.ll +++ llvm/test/Transforms/Inline/inline-cold.ll @@ -18,39 +18,13 @@ ; This function should be larger than the cold threshold (75), but smaller ; than the regular threshold. ; Function Attrs: nounwind readnone uwtable -define i32 @simpleFunction(i32 %a) #0 { +define i32 @simpleFunction(i32 %a) #0 "function-inline-cost"="80" { entry: - call void @extern() - %a1 = load volatile i32, i32* @a - %x1 = add i32 %a1, %a1 - %a2 = load volatile i32, i32* @a - %x2 = add i32 %x1, %a2 - %a3 = load volatile i32, i32* @a - %x3 = add i32 %x2, %a3 - %a4 = load volatile i32, i32* @a - %x4 = add i32 %x3, %a4 - %a5 = load volatile i32, i32* @a - %x5 = add i32 %x4, %a5 - %a6 = load volatile i32, i32* @a - %x6 = add i32 %x5, %a6 - %a7 = load volatile i32, i32* @a - %x7 = add i32 %x6, %a6 - %a8 = load volatile i32, i32* @a - %x8 = add i32 %x7, %a8 - %a9 = load volatile i32, i32* @a - %x9 = add i32 %x8, %a9 - %a10 = load volatile i32, i32* @a - %x10 = add i32 %x9, %a10 - %a11 = load volatile i32, i32* @a - %x11 = add i32 %x10, %a11 - %a12 = load volatile i32, i32* @a - %x12 = add i32 %x11, %a12 - %add = add i32 %x12, %a - ret i32 %add + ret i32 %a } ; Function Attrs: nounwind cold readnone uwtable -define i32 @ColdFunction(i32 %a) #1 { +define i32 @ColdFunction(i32 %a) #1 "function-inline-cost"="30" { ; CHECK-LABEL: @ColdFunction ; CHECK: ret ; OVERRIDE-LABEL: @ColdFunction @@ -58,21 +32,11 @@ ; DEFAULT-LABEL: @ColdFunction ; DEFAULT: ret entry: - call void @extern() - %a1 = load volatile i32, i32* @a - %x1 = add i32 %a1, %a1 - %a2 = load volatile i32, i32* @a - %x2 = add i32 %x1, %a2 - %a3 = load volatile i32, i32* @a - %x3 = add i32 %x2, %a3 - %a4 = load volatile i32, i32* @a - %x4 = add i32 %x3, %a4 - %add = add i32 %x4, %a - ret i32 %add + ret i32 %a } ; This function should be larger than the default cold threshold (225). -define i32 @ColdFunction2(i32 %a) #1 { +define i32 @ColdFunction2(i32 %a) #1 "function-inline-cost"="250" { ; CHECK-LABEL: @ColdFunction2 ; CHECK: ret ; OVERRIDE-LABEL: @ColdFunction2 @@ -80,84 +44,7 @@ ; DEFAULT-LABEL: @ColdFunction2 ; DEFAULT: ret entry: - call void @extern() - %a1 = load volatile i32, i32* @a - %x1 = add i32 %a1, %a1 - %a2 = load volatile i32, i32* @a - %x2 = add i32 %x1, %a2 - %a3 = load volatile i32, i32* @a - %x3 = add i32 %x2, %a3 - %a4 = load volatile i32, i32* @a - %x4 = add i32 %x3, %a4 - %a5 = load volatile i32, i32* @a - %x5 = add i32 %x4, %a5 - %a6 = load volatile i32, i32* @a - %x6 = add i32 %x5, %a6 - %a7 = load volatile i32, i32* @a - %x7 = add i32 %x6, %a7 - %a8 = load volatile i32, i32* @a - %x8 = add i32 %x7, %a8 - %a9 = load volatile i32, i32* @a - %x9 = add i32 %x8, %a9 - %a10 = load volatile i32, i32* @a - %x10 = add i32 %x9, %a10 - %a11 = load volatile i32, i32* @a - %x11 = add i32 %x10, %a11 - %a12 = load volatile i32, i32* @a - %x12 = add i32 %x11, %a12 - - %a21 = load volatile i32, i32* @a - %x21 = add i32 %x12, %a21 - %a22 = load volatile i32, i32* @a - %x22 = add i32 %x21, %a22 - %a23 = load volatile i32, i32* @a - %x23 = add i32 %x22, %a23 - %a24 = load volatile i32, i32* @a - %x24 = add i32 %x23, %a24 - %a25 = load volatile i32, i32* @a - %x25 = add i32 %x24, %a25 - %a26 = load volatile i32, i32* @a - %x26 = add i32 %x25, %a26 - %a27 = load volatile i32, i32* @a - %x27 = add i32 %x26, %a27 - %a28 = load volatile i32, i32* @a - %x28 = add i32 %x27, %a28 - %a29 = load volatile i32, i32* @a - %x29 = add i32 %x28, %a29 - %a30 = load volatile i32, i32* @a - %x30 = add i32 %x29, %a30 - %a31 = load volatile i32, i32* @a - %x31 = add i32 %x30, %a31 - %a32 = load volatile i32, i32* @a - %x32 = add i32 %x31, %a32 - - %a41 = load volatile i32, i32* @a - %x41 = add i32 %x32, %a41 - %a42 = load volatile i32, i32* @a - %x42 = add i32 %x41, %a42 - %a43 = load volatile i32, i32* @a - %x43 = add i32 %x42, %a43 - %a44 = load volatile i32, i32* @a - %x44 = add i32 %x43, %a44 - %a45 = load volatile i32, i32* @a - %x45 = add i32 %x44, %a45 - %a46 = load volatile i32, i32* @a - %x46 = add i32 %x45, %a46 - %a47 = load volatile i32, i32* @a - %x47 = add i32 %x46, %a47 - %a48 = load volatile i32, i32* @a - %x48 = add i32 %x47, %a48 - %a49 = load volatile i32, i32* @a - %x49 = add i32 %x48, %a49 - %a50 = load volatile i32, i32* @a - %x50 = add i32 %x49, %a50 - %a51 = load volatile i32, i32* @a - %x51 = add i32 %x50, %a51 - %a52 = load volatile i32, i32* @a - %x52 = add i32 %x51, %a52 - - %add = add i32 %x52, %a - ret i32 %add + ret i32 %a } ; Function Attrs: nounwind readnone uwtable Index: llvm/test/Transforms/Inline/inline-cost-attributes.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Inline/inline-cost-attributes.ll @@ -0,0 +1,53 @@ +; REQUIRES: asserts +; RUN: opt -inline-cost-full -passes='cgscc(inline)' -debug-only=inline -disable-output %s 2>&1 | FileCheck --check-prefix=INLINER %s +; RUN: opt -inline-cost-full -passes='print' -disable-output %s 2>&1 | FileCheck --check-prefix=COST %s + +declare void @extern() "call-threshold-bonus"="31" + +define void @fn1() "function-inline-cost"="321" "function-inline-threshold"="123" "call-inline-cost"="271" { +entry: + ret void +} + +define void @fn2() "function-inline-threshold"="41" { +; INLINER-LABEL: Inlining calls in: fn2 +; INLINER-NEXT: Function size: 6 +; INLINER-NEXT: NOT Inlining (cost=321, threshold=123), Call: call void @fn1() +; INLINER-NEXT: NOT Inlining (cost=321, threshold=321), Call: call void @fn1() +; INLINER-NEXT: NOT Inlining (cost=197, threshold=123), Call: call void @fn1() +; INLINER-NEXT: Inlining (cost=197, threshold=321), Call: call void @fn1() + +; COST-LABEL: define void @fn2() +; COST-NEXT: entry: +; COST-NEXT: threshold delta = 31 +; COST-NEXT: call void @extern() +; COST-NEXT: cost delta = 132, threshold delta = 193 +; COST-NEXT: call void @fn1() +; COST-NEXT: cost delta = 0 +; COST-NEXT: call void @fn1() +; COST-NEXT: cost delta = 271, threshold delta = 17 +; COST-NEXT: call void @fn1() +; COST-NEXT: cost delta = 473 +; COST-NEXT: call void @fn1() + +entry: + call void @extern() + call void @fn1() "call-inline-cost"="132" "call-threshold-bonus"="193" + call void @fn1() "call-inline-cost"="0" "function-inline-threshold"="321" + call void @fn1() "call-threshold-bonus"="17" "function-inline-cost"="197" + call void @fn1() "call-inline-cost"="473" "function-inline-cost"="197" "function-inline-threshold"="321" + ret void +} + +define void @fn3() { +; INLINER-LABEL: Inlining calls in: fn3 +; INLINER-NEXT: Function size: 3 +; INLINER-NEXT: Inlining (cost=386, threshold=849), Call: call void @fn1() +; INLINER-NEXT: Size after inlining: 2 +; INLINER-NEXT: NOT Inlining (cost=403, threshold=41), Call: call void @fn2() + +entry: + call void @fn1() "function-inline-cost"="386" "function-inline-threshold"="849" + call void @fn2() + ret void +} Index: llvm/test/Transforms/Inline/inline-threshold.ll =================================================================== --- llvm/test/Transforms/Inline/inline-threshold.ll +++ llvm/test/Transforms/Inline/inline-threshold.ll @@ -9,81 +9,21 @@ define i32 @simpleFunction(i32 %a) #0 { entry: %a1 = load volatile i32, i32* @a - %x1 = add i32 %a1, %a1 - %cmp = icmp eq i32 %a1, 0 - br i1 %cmp, label %if.then, label %if.else -if.then: - %a2 = load volatile i32, i32* @a - %x2_0 = add i32 %x1, %a2 - br label %if.else -if.else: - %x2 = phi i32 [ %x1, %entry ], [ %x2_0, %if.then ] - %a3 = load volatile i32, i32* @a - %x3 = add i32 %x2, %a3 - %a4 = load volatile i32, i32* @a - %x4 = add i32 %x3, %a4 - %a5 = load volatile i32, i32* @a - %x5 = add i32 %x4, %a5 - %a6 = load volatile i32, i32* @a - %x6 = add i32 %x5, %a6 - %a7 = load volatile i32, i32* @a - %x7 = add i32 %x6, %a7 - %a8 = load volatile i32, i32* @a - %x8 = add i32 %x7, %a8 - %a9 = load volatile i32, i32* @a - %x9 = add i32 %x8, %a9 - %a10 = load volatile i32, i32* @a - %x10 = add i32 %x9, %a10 - %a11 = load volatile i32, i32* @a - %x11 = add i32 %x10, %a11 - %a12 = load volatile i32, i32* @a - %x12 = add i32 %x11, %a12 - %a13 = load volatile i32, i32* @a - %x13 = add i32 %x12, %a13 - %a14 = load volatile i32, i32* @a - %x14 = add i32 %x13, %a14 - %a15 = load volatile i32, i32* @a - %x15 = add i32 %x14, %a15 - %a16 = load volatile i32, i32* @a - %x16 = add i32 %x15, %a16 - %a17 = load volatile i32, i32* @a - %x17 = add i32 %x16, %a17 - %a18 = load volatile i32, i32* @a - %x18 = add i32 %x17, %a18 - %a19 = load volatile i32, i32* @a - %x19 = add i32 %x18, %a19 - %a20 = load volatile i32, i32* @a - %x20 = add i32 %x19, %a20 - %a21 = load volatile i32, i32* @a - %x21 = add i32 %x20, %a21 - %a22 = load volatile i32, i32* @a - %x22 = add i32 %x21, %a22 - %a23 = load volatile i32, i32* @a - %x23 = add i32 %x22, %a23 - %a24 = load volatile i32, i32* @a - %x24 = add i32 %x23, %a24 - %a25 = load volatile i32, i32* @a - %x25 = add i32 %x24, %a25 - %a26 = load volatile i32, i32* @a - %x26 = add i32 %x25, %a26 - %a27 = load volatile i32, i32* @a - %x27 = add i32 %x26, %a27 - %a28 = load volatile i32, i32* @a - %x28 = add i32 %x27, %a28 - %a29 = load volatile i32, i32* @a - %x29 = add i32 %x28, %a29 - %add = add i32 %x29, %a - ret i32 %add + %x1 = add i32 %a1, %a + ret i32 %x1 } ; Function Attrs: nounwind readnone uwtable define i32 @bar(i32 %a) #0 { ; CHECK-LABEL: @bar -; CHECK-NOT: call i32 @simpleFunction(i32 6) +; CHECK: load volatile +; CHECK-NEXT: add i32 +; CHECK-NEXT: call i32 @simpleFunction ; CHECK: ret entry: - %i = tail call i32 @simpleFunction(i32 6) - ret i32 %i + %i = tail call i32 @simpleFunction(i32 6) "function-inline-cost"="749" + %j = tail call i32 @simpleFunction(i32 %i) "function-inline-cost"="750" + ret i32 %j } attributes #0 = { nounwind readnone uwtable }