Index: llvm/include/llvm/Transforms/IPO/Attributor.h =================================================================== --- llvm/include/llvm/Transforms/IPO/Attributor.h +++ llvm/include/llvm/Transforms/IPO/Attributor.h @@ -103,6 +103,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -636,6 +637,9 @@ /// The underlying CGSCC, or null if not available. SetVector *CGSCC; + /// Set of inlineable functions + SmallPtrSet InlineableFunctions; + /// Give the Attributor access to the members so /// Attributor::identifyDefaultAbstractAttributes(...) can initialize them. friend struct Attributor; @@ -1095,6 +1099,13 @@ AA.initialize(*this); + // if the function is marked as `alwaysinline` but can not be inlined + // we should not initialize it + if (FnScope && FnScope->hasFnAttribute(Attribute::AlwaysInline)) + if (InfoCache.InlineableFunctions.find(FnScope) == + InfoCache.InlineableFunctions.end()) + AA.getState().indicatePessimisticFixpoint(); + // We can initialize (=look at) code outside the current function set but // not call update because that would again spawn new abstract attributes in // potentially unconnected code regions (=SCCs). @@ -1712,8 +1723,11 @@ // TODO: We could always determine abstract attributes and if sufficient // information was found we could duplicate the functions that do not // have an exact definition. - if (IsFnInterface && (!FnScope || !FnScope->hasExactDefinition())) + if (IsFnInterface && (!FnScope || !FnScope->hasExactDefinition())) { + if (FnScope && FnScope->hasFnAttribute(Attribute::AlwaysInline)) + return; this->getState().indicatePessimisticFixpoint(); + } } /// See AbstractAttribute::manifest(...). @@ -2748,7 +2762,8 @@ /// Return an assumed constant for the assocaited value a program point \p /// CtxI. Optional - getAssumedConstantInt(Attributor &A, const Instruction *CtxI = nullptr) const { + getAssumedConstantInt(Attributor &A, + const Instruction *CtxI = nullptr) const { ConstantRange RangeV = getAssumedConstantRange(A, CtxI); if (auto *C = RangeV.getSingleElement()) return cast( Index: llvm/lib/Transforms/IPO/Attributor.cpp =================================================================== --- llvm/lib/Transforms/IPO/Attributor.cpp +++ llvm/lib/Transforms/IPO/Attributor.cpp @@ -8212,6 +8212,9 @@ if (I.mayReadOrWriteMemory()) ReadOrWriteInsts.push_back(&I); } + + if (isInlineViable(F).isSuccess()) + InfoCache.InlineableFunctions.insert(&F); } void Attributor::recordDependence(const AbstractAttribute &FromAA, Index: llvm/test/Transforms/Attributor/alwaysinline.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Attributor/alwaysinline.ll @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -attributor -attributor-disable=false -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefix=CHECK +; +; When a function is marked `alwaysinline` and is able to be inlined, +; we can IPO its boundaries + +; the function is not exactly defined, and marked alwaysinline and can be inlined, +; so the function can be analyzed +; CHECK: Function Attrs: alwaysinline nofree nosync nounwind readnone +define linkonce void @inner1() alwaysinline { +; CHECK-LABEL: @inner1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + ret void +} + +; CHECK: Function Attrs: nofree nosync nounwind +define void @outer1() { +; CHECK-LABEL: @outer1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + call void @inner1() + ret void +} + +; The function is not alwaysinline and is not exactly defined +; so it will not be analyzed +; CHECK-NOT: Function Attrs: +define linkonce i32 @inner2() { +; CHECK-LABEL: @inner2( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 1 +; +entry: + ret i32 1 +} + +; CHECK-NOT: Function Attrs +define i32 @outer2() { +; CHECK-LABEL: @outer2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[R:%.*]] = call i32 @inner2() #2 +; CHECK-NEXT: ret i32 [[R]] +; +entry: + %r = call i32 @inner2() alwaysinline + ret i32 %r +} + +; This function cannot be inlined although it is marked alwaysinline +; it is `unexactly defined` and alwaysinline but cannot be inlined. +; so it will not be analyzed +; CHECK: Function Attrs: +; CHECK-NOT: nofree nosync nounwind readnone +define linkonce i32 @inner3(i8* %addr) alwaysinline { +; CHECK-LABEL: @inner3( +; CHECK-NEXT: entry: +; CHECK-NEXT: indirectbr i8* [[ADDR:%.*]], [label [[ONE:%.*]], label %two] +; CHECK: one: +; CHECK-NEXT: ret i32 42 +; CHECK: two: +; CHECK-NEXT: ret i32 44 +; +entry: + indirectbr i8* %addr, [ label %one, label %two ] + +one: + ret i32 42 + +two: + ret i32 44 +} + +; CHECK-NOT: Function Attrs: +define i32 @outer3(i32 %x) { +; CHECK-LABEL: @outer3( +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 42 +; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[CMP]], i8* blockaddress(@inner3, [[ONE:%.*]]), i8* blockaddress(@inner3, [[TWO:%.*]]) +; CHECK-NEXT: [[CALL:%.*]] = call i32 @inner3(i8* [[ADDR]]) +; CHECK-NEXT: ret i32 [[CALL]] +; + %cmp = icmp slt i32 %x, 42 + %addr = select i1 %cmp, i8* blockaddress(@inner3, %one), i8* blockaddress(@inner3, %two) + %call = call i32 @inner3(i8* %addr) + ret i32 %call +}