Index: llvm/include/llvm/Transforms/IPO/Attributor.h =================================================================== --- llvm/include/llvm/Transforms/IPO/Attributor.h +++ llvm/include/llvm/Transforms/IPO/Attributor.h @@ -103,6 +103,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -789,6 +790,12 @@ /// through the information cache interface *prior* to looking at them. void initializeInformationCache(Function &F); + /// Initialize Inline information for function \p F + /// + /// This method uses IsInlineViable to determine if the function can be + /// inlined and caches the result + void initializeInlineInfo(Function &F); + /// Mark the internal function \p F as live. /// /// This will trigger the identification and initialization of attributes for @@ -1095,6 +1102,12 @@ AA.initialize(*this); + // if the function is marked as `alwaysinline` but can not be inlined + // we should not initialize it + if (FnScope && FnScope->hasFnAttribute(Attribute::AlwaysInline)) + if (InlineableFunctions.find(FnScope) == InlineableFunctions.end()) + AA.getState().indicatePessimisticFixpoint(); + // We can initialize (=look at) code outside the current function set but // not call update because that would again spawn new abstract attributes in // potentially unconnected code regions (=SCCs). @@ -1187,6 +1200,9 @@ /// Helper to update an underlying call graph. CallGraphUpdater &CGUpdater; + /// Set of functions that can be inlined + SmallPtrSet InlineableFunctions; + /// Set of functions for which we modified the content such that it might /// impact the call graph. SmallPtrSet CGModifiedFunctions; @@ -1712,8 +1728,12 @@ // TODO: We could always determine abstract attributes and if sufficient // information was found we could duplicate the functions that do not // have an exact definition. - if (IsFnInterface && (!FnScope || !FnScope->hasExactDefinition())) + if (IsFnInterface && (!FnScope || !FnScope->hasExactDefinition())) { + if (FnScope && FnScope->hasFnAttribute(Attribute::AlwaysInline)) { + return; + } this->getState().indicatePessimisticFixpoint(); + } } /// See AbstractAttribute::manifest(...). @@ -2748,7 +2768,8 @@ /// Return an assumed constant for the assocaited value a program point \p /// CtxI. Optional - getAssumedConstantInt(Attributor &A, const Instruction *CtxI = nullptr) const { + getAssumedConstantInt(Attributor &A, + const Instruction *CtxI = nullptr) const { ConstantRange RangeV = getAssumedConstantRange(A, CtxI); if (auto *C = RangeV.getSingleElement()) return cast( Index: llvm/lib/Transforms/IPO/Attributor.cpp =================================================================== --- llvm/lib/Transforms/IPO/Attributor.cpp +++ llvm/lib/Transforms/IPO/Attributor.cpp @@ -8214,6 +8214,12 @@ } } +void Attributor::initializeInlineInfo(Function &F) { + if (isInlineViable(F).isSuccess()) { + InlineableFunctions.insert(&F); + } +} + void Attributor::recordDependence(const AbstractAttribute &FromAA, const AbstractAttribute &ToAA, DepClassTy DepClass) { @@ -8513,8 +8519,10 @@ // while we identify default attribute opportunities. Attributor A(Functions, InfoCache, CGUpdater, DepRecInterval); - for (Function *F : Functions) + for (Function *F : Functions) { A.initializeInformationCache(*F); + A.initializeInlineInfo(*F); + } for (Function *F : Functions) { if (F->hasExactDefinition()) Index: llvm/test/Transforms/Attributor/alwaysinline.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Attributor/alwaysinline.ll @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -attributor -attributor-disable=false -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefix=CHECK +; +; When a function is marked `alwaysinline` and is able to be inlined, +; we can IPO its boundaries + +; the function is not exactly defined, and marked alwaysinline and can be inlined, +; so the function can be analyzed +; CHECK: Function Attrs: alwaysinline nofree nosync nounwind readnone +define linkonce void @inner1() alwaysinline { +; CHECK-LABEL: @inner1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + ret void +} + +; CHECK: Function Attrs: nofree nosync nounwind +define void @outer1() { +; CHECK-LABEL: @outer1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + call void @inner1() + ret void +} + +; The function is not alwaysinline and is not exactly defined +; so it will not be analyzed +; CHECK-NOT: Function Attrs: +define linkonce i32 @inner2() { +; CHECK-LABEL: @inner2( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 1 +; +entry: + ret i32 1 +} + +; CHECK-NOT: Function Attrs +define i32 @outer2() { +; CHECK-LABEL: @outer2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[R:%.*]] = call i32 @inner2() #2 +; CHECK-NEXT: ret i32 [[R]] +; +entry: + %r = call i32 @inner2() alwaysinline + ret i32 %r +} + +; This function cannot be inlined although it is marked alwaysinline +; it is `unexactly defined` and alwaysinline but cannot be inlined. +; so it will not be analyzed +; CHECK: Function Attrs: +; CHECK-NOT: nofree nosync nounwind readnone +define linkonce i32 @inner3(i8* %addr) alwaysinline { +; CHECK-LABEL: @inner3( +; CHECK-NEXT: entry: +; CHECK-NEXT: indirectbr i8* [[ADDR:%.*]], [label [[ONE:%.*]], label %two] +; CHECK: one: +; CHECK-NEXT: ret i32 42 +; CHECK: two: +; CHECK-NEXT: ret i32 44 +; +entry: + indirectbr i8* %addr, [ label %one, label %two ] + +one: + ret i32 42 + +two: + ret i32 44 +} + +; CHECK-NOT: Function Attrs: +define i32 @outer3(i32 %x) { +; CHECK-LABEL: @outer3( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 42 +; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[CMP]], i8* blockaddress(@inner3, [[ONE:%.*]]), i8* blockaddress(@inner3, [[TWO:%.*]]) +; CHECK-NEXT: [[CALL:%.*]] = call i32 @inner3(i8* [[ADDR]]) +; CHECK-NEXT: ret i32 [[CALL]] +; + %cmp = icmp slt i32 %x, 42 + %addr = select i1 %cmp, i8* blockaddress(@inner3, %one), i8* blockaddress(@inner3, %two) + %call = call i32 @inner3(i8* %addr) + ret i32 %call +}