Index: llvm/include/llvm/Transforms/IPO/Attributor.h =================================================================== --- llvm/include/llvm/Transforms/IPO/Attributor.h +++ llvm/include/llvm/Transforms/IPO/Attributor.h @@ -103,6 +103,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -636,6 +637,9 @@ /// The underlying CGSCC, or null if not available. SetVector *CGSCC; + /// Set of inlineable functions + SmallPtrSet InlineableFunctions; + /// Give the Attributor access to the members so /// Attributor::identifyDefaultAbstractAttributes(...) can initialize them. friend struct Attributor; @@ -789,6 +793,20 @@ /// through the information cache interface *prior* to looking at them. void initializeInformationCache(Function &F); + /// Determine whether the function \p F is IPO amendable + /// + /// If a function is exactly defined or it has alwaysinline attribute + /// and is viable to be inlined, we say it is IPO amendable + bool isFunctionIPOAmendable(Function &F) { + if (!F.hasExactDefinition()) { + if (InfoCache.InlineableFunctions.count(&F)) + return true; + return false; + } + + return true; + } + /// Mark the internal function \p F as live. /// /// This will trigger the identification and initialization of attributes for @@ -1704,7 +1722,7 @@ } bool IsFnInterface = IRP.isFnInterfaceKind(); - const Function *FnScope = IRP.getAnchorScope(); + Function *FnScope = IRP.getAnchorScope(); // TODO: Not all attributes require an exact definition. Find a way to // enable deduction for some but not all attributes in case the // definition might be changed at runtime, see also @@ -1712,8 +1730,9 @@ // TODO: We could always determine abstract attributes and if sufficient // information was found we could duplicate the functions that do not // have an exact definition. - if (IsFnInterface && (!FnScope || !FnScope->hasExactDefinition())) + if (IsFnInterface && (!FnScope || !A.isFunctionIPOAmendable(*FnScope))) { this->getState().indicatePessimisticFixpoint(); + } } /// See AbstractAttribute::manifest(...). @@ -2748,7 +2767,8 @@ /// Return an assumed constant for the assocaited value a program point \p /// CtxI. Optional - getAssumedConstantInt(Attributor &A, const Instruction *CtxI = nullptr) const { + getAssumedConstantInt(Attributor &A, + const Instruction *CtxI = nullptr) const { ConstantRange RangeV = getAssumedConstantRange(A, CtxI); if (auto *C = RangeV.getSingleElement()) return cast( Index: llvm/lib/Transforms/IPO/Attributor.cpp =================================================================== --- llvm/lib/Transforms/IPO/Attributor.cpp +++ llvm/lib/Transforms/IPO/Attributor.cpp @@ -1156,7 +1156,7 @@ } } - if (!F->hasExactDefinition()) + if (!A.isFunctionIPOAmendable(*F)) indicatePessimisticFixpoint(); } @@ -2435,7 +2435,7 @@ // FIXME: Any cycle is regarded as endless loop for now. // We have to allow some patterns. static bool containsPossiblyEndlessLoop(Function *F) { - return !F || !F->hasExactDefinition() || containsCycle(*F); + return containsCycle(*F); } struct AAWillReturnImpl : public AAWillReturn { @@ -2446,7 +2446,7 @@ AAWillReturn::initialize(A); Function *F = getAssociatedFunction(); - if (containsPossiblyEndlessLoop(F)) + if (!F || !A.isFunctionIPOAmendable(*F) || containsPossiblyEndlessLoop(F)) indicatePessimisticFixpoint(); } @@ -2986,7 +2986,7 @@ /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { - if (!getAssociatedFunction()->hasExactDefinition()) + if (!A.isFunctionIPOAmendable(*getAssociatedFunction())) indicatePessimisticFixpoint(); } @@ -3537,8 +3537,8 @@ const IRPosition &IRP = this->getIRPosition(); bool IsFnInterface = IRP.isFnInterfaceKind(); - const Function *FnScope = IRP.getAnchorScope(); - if (IsFnInterface && (!FnScope || !FnScope->hasExactDefinition())) + Function *FnScope = IRP.getAnchorScope(); + if (IsFnInterface && (!FnScope || !A.isFunctionIPOAmendable(*FnScope))) indicatePessimisticFixpoint(); } @@ -3819,7 +3819,7 @@ if (getIRPosition().isFnInterfaceKind() && (!getAssociatedFunction() || - !getAssociatedFunction()->hasExactDefinition())) + !A.isFunctionIPOAmendable(*getAssociatedFunction()))) indicatePessimisticFixpoint(); } @@ -4075,7 +4075,7 @@ } Function *AnchorScope = getAnchorScope(); if (isFnInterfaceKind() && - (!AnchorScope || !AnchorScope->hasExactDefinition())) { + (!AnchorScope || !A.isFunctionIPOAmendable(*AnchorScope))) { indicatePessimisticFixpoint(); return; } @@ -5807,7 +5807,7 @@ // Initialize the use vector with all direct uses of the associated value. Argument *Arg = getAssociatedArgument(); - if (!Arg || !Arg->getParent()->hasExactDefinition()) { + if (!Arg || !A.isFunctionIPOAmendable(*(Arg->getParent()))) { indicatePessimisticFixpoint(); } else { // Initialize the use vector with all direct uses of the associated value. @@ -5935,7 +5935,7 @@ void initialize(Attributor &A) override { AAMemoryBehaviorImpl::initialize(A); Function *F = getAssociatedFunction(); - if (!F || !F->hasExactDefinition()) + if (!F || !A.isFunctionIPOAmendable(*F)) indicatePessimisticFixpoint(); } @@ -6622,7 +6622,7 @@ void initialize(Attributor &A) override { AAMemoryLocationImpl::initialize(A); Function *F = getAssociatedFunction(); - if (!F || !F->hasExactDefinition()) + if (!F || !A.isFunctionIPOAmendable(*F)) indicatePessimisticFixpoint(); } @@ -8212,6 +8212,10 @@ if (I.mayReadOrWriteMemory()) ReadOrWriteInsts.push_back(&I); } + + if (F.hasFnAttribute(Attribute::AlwaysInline) && + isInlineViable(F).isSuccess()) + InfoCache.InlineableFunctions.insert(&F); } void Attributor::recordDependence(const AbstractAttribute &FromAA, Index: llvm/test/Transforms/Attributor/alwaysinline.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Attributor/alwaysinline.ll @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -attributor -attributor-disable=false -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefix=CHECK +; +; When a function is marked `alwaysinline` and is able to be inlined, +; we can IPO its boundaries + +; the function is not exactly defined, and marked alwaysinline and can be inlined, +; so the function can be analyzed +; CHECK: Function Attrs: alwaysinline nofree nosync nounwind readnone willreturn +define linkonce void @inner1() alwaysinline { +; CHECK-LABEL: @inner1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + ret void +} + +; CHECK: Function Attrs: nofree nosync nounwind readnone willreturn +define void @outer1() { +; CHECK-LABEL: @outer1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + call void @inner1() + ret void +} + +; The function is not alwaysinline and is not exactly defined +; so it will not be analyzed +; CHECK-NOT: Function Attrs: +define linkonce i32 @inner2() { +; CHECK-LABEL: @inner2( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 1 +; +entry: + ret i32 1 +} + +; CHECK-NOT: Function Attrs +define i32 @outer2() { +; CHECK-LABEL: @outer2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[R:%.*]] = call i32 @inner2() #2 +; CHECK-NEXT: ret i32 [[R]] +; +entry: + %r = call i32 @inner2() alwaysinline + ret i32 %r +} + +; This function cannot be inlined although it is marked alwaysinline +; it is `unexactly defined` and alwaysinline but cannot be inlined. +; so it will not be analyzed +; CHECK: Function Attrs: +; CHECK-NOT: nofree nosync nounwind readnone +define linkonce i32 @inner3(i8* %addr) alwaysinline { +; CHECK-LABEL: @inner3( +; CHECK-NEXT: entry: +; CHECK-NEXT: indirectbr i8* [[ADDR:%.*]], [label [[ONE:%.*]], label %two] +; CHECK: one: +; CHECK-NEXT: ret i32 42 +; CHECK: two: +; CHECK-NEXT: ret i32 44 +; +entry: + indirectbr i8* %addr, [ label %one, label %two ] + +one: + ret i32 42 + +two: + ret i32 44 +} + +; CHECK-NOT: Function Attrs: +define i32 @outer3(i32 %x) { +; CHECK-LABEL: @outer3( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 42 +; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[CMP]], i8* blockaddress(@inner3, [[ONE:%.*]]), i8* blockaddress(@inner3, [[TWO:%.*]]) +; CHECK-NEXT: [[CALL:%.*]] = call i32 @inner3(i8* [[ADDR]]) +; CHECK-NEXT: ret i32 [[CALL]] +; + %cmp = icmp slt i32 %x, 42 + %addr = select i1 %cmp, i8* blockaddress(@inner3, %one), i8* blockaddress(@inner3, %two) + %call = call i32 @inner3(i8* %addr) + ret i32 %call +}