Index: lib/Transforms/IPO/Inliner.cpp =================================================================== --- lib/Transforms/IPO/Inliner.cpp +++ lib/Transforms/IPO/Inliner.cpp @@ -62,6 +62,15 @@ DisableInlinedAllocaMerging("disable-inlined-alloca-merging", cl::init(false), cl::Hidden); +/// Flag to control the maximum number of instructions scanned when checking +/// for inlining into a definitely reached call site. +/// +/// This is set to a completely arbitrary value and primarily exists to prevent +/// run-away compile time on truly outrageous callers. +static cl::opt + ReachabilityInstrScanLimit("inliner-reachability-instr-scan-limit", + cl::init(50), cl::Hidden); + namespace { enum class InlinerFunctionImportStatsOpts { No = 0, @@ -742,6 +751,103 @@ return true; } +namespace { +/// Struct of properties used to control propagation of attributes after +/// inlining. +struct AttrPropCSInfo { + /// True when the callsite is in the entry extended basic block. + bool IsInExtendedEntry; + + /// True when the callsite is in the entry extended basic block and + /// definitely reached if the function is called. + /// + /// For example, this will be false if there are intervening calls which may + /// unwind or simply never return. + bool IsInExtendedEntryAndReached; +}; +} // namespace + +static AttrPropCSInfo computeAttrPropagationCallSiteInfo(CallSite CS) { + AttrPropCSInfo Info = {false, false}; + + BasicBlock &ParentBB = *CS.getParent(); + Function &Caller = *ParentBB.getParent(); + + // First see if the call site is part of the "extended" entry basic block. + SmallVector Blocks; + BasicBlock *BB = &*Caller.begin(); + do { + Blocks.push_back(BB); + if (BB == &ParentBB) + break; + BB = BB->getUniqueSuccessor(); + } while (BB); + + if (!BB) + return Info; + + Info.IsInExtendedEntry = true; + + // Now scan the instructions in these blocks to see if the callsite is also + // definitely reached. + int NumInstructionsScanned = 0; + for (BasicBlock *BB : Blocks) { + for (Instruction &I : *BB) { + // If we've reached our limit on the scan, just return. + if (++NumInstructionsScanned > ReachabilityInstrScanLimit) + return Info; + + // If we've found this callsite, we're done. + if (&I == CS.getInstruction()) + break; + + // Otherwise check if this is a call that could prevent reaching our + // callsite. + auto *CI = dyn_cast(&I); + if (!CI) + continue; + + // FIXME: We use `readnone` as an approximation for "definitely returns" + // here as in other places in LLVM. They're all a bit imprecise. + if (!CI->doesNotThrow() || !CI->doesNotAccessMemory()) + // If this could throw or access memory, assume we don't have + // reachability. + return Info; + } + } + + // We finished the scan w/o breaking reachability! + Info.IsInExtendedEntryAndReached = true; + return Info; +} + +static bool canPropagateAttrsAfterInlining(Function &Caller, Function &Callee) { + return (!Caller.hasFnAttribute(Attribute::InlineHint) && + Callee.hasFnAttribute(Attribute::InlineHint)) || + (!Caller.hasFnAttribute(Attribute::Cold) && + Callee.hasFnAttribute(Attribute::Cold)); +} + +static void propagateAttrsAfterInlining(Function &Caller, Function &Callee, + AttrPropCSInfo PropInfo) { + assert(canPropagateAttrsAfterInlining(Caller, Callee) && + "Tried to propate attributes when there were none!"); + + // We only propagate callee attributes when inlining into the extended entry + // block. + if (!PropInfo.IsInExtendedEntry) + return; + + if (Callee.hasFnAttribute(Attribute::InlineHint)) + Caller.addFnAttr(Attribute::InlineHint); + + // If the call is definitely reached as well, sink the cold attribute down to + // the caller as well. + if (PropInfo.IsInExtendedEntryAndReached && + Callee.hasFnAttribute(Attribute::Cold)) + Caller.addFnAttr(Attribute::Cold); +} + PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR) { @@ -889,6 +995,10 @@ if (!shouldInline(CS, GetInlineCost, ORE)) continue; + Optional AttrPropInfo; + if (canPropagateAttrsAfterInlining(F, Callee)) + AttrPropInfo = computeAttrPropagationCallSiteInfo(CS); + // Setup the data structure used to plumb customization into the // `InlineFunction` routine. InlineFunctionInfo IFI( @@ -914,6 +1024,10 @@ // Merge the attributes based on the inlining. AttributeFuncs::mergeAttributesForInlining(F, Callee); + // Also propagate inlining-specific attributes. + if (AttrPropInfo) + propagateAttrsAfterInlining(F, Callee, *AttrPropInfo); + // For local functions, check whether this makes the callee trivially // dead. In that case, we can drop the body of the function eagerly // which may reduce the number of callers of other functions to one, Index: test/Transforms/Inline/inline-attr-prop.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/inline-attr-prop.ll @@ -0,0 +1,199 @@ +; Test that when we successfully inline a call we propagate attributes to the +; caller that have specific impact on subsequent inlining. +; +; RUN: opt -S < %s -passes=inline -inline-threshold=50 -inlinecold-threshold=10 -inlinehint-threshold=200 | FileCheck %s + +@a = global i32 4 + +; Filler that is larger than the cold threshold but smaller than the normal +; threshold. +define i32 @filler() { +; CHECK-LABEL: define i32 @filler() +entry: + %a1 = load volatile i32, i32* @a + %a2 = load volatile i32, i32* @a + %a3 = load volatile i32, i32* @a + %a4 = load volatile i32, i32* @a + %a5 = load volatile i32, i32* @a + %a6 = load volatile i32, i32* @a + %a7 = load volatile i32, i32* @a + %a8 = load volatile i32, i32* @a + %a9 = load volatile i32, i32* @a + %a10 = load volatile i32, i32* @a + ret i32 %a10 +} + +define i32 @cold_small() cold { +; CHECK-LABEL: define i32 @cold_small() +; CHECK: #[[COLD_ATTR_GROUP:[0-9]+]] { +entry: + %a1 = load volatile i32, i32* @a + ret i32 %a1 +} + +define i32 @cold_medium() cold { +; CHECK-LABEL: define i32 @cold_medium() +; CHECK: #[[COLD_ATTR_GROUP]] { +entry: + %a = call i32 @filler() +; CHECK-NOT: call + ret i32 %a +} + +define i32 @hint_medium() inlinehint { +; CHECK-LABEL: define i32 @hint_medium() +; CHECK: #[[HINT_ATTR_GROUP:[0-9]+]] { +entry: + %a = call i32 @filler() +; CHECK-NOT: call + ret i32 %a +} + +define i32 @hint_large() inlinehint { +; CHECK-LABEL: define i32 @hint_large() +; CHECK: #[[HINT_ATTR_GROUP]] { +entry: + %a1 = call i32 @filler() + %a2 = call i32 @filler() + %a3 = call i32 @filler() + %a4 = call i32 @filler() + %a5 = call i32 @filler() +; CHECK-NOT: call + ret i32 %a5 +} + +define i32 @normal_small() { +; CHECK-LABEL: define i32 @normal_small() +entry: + %a1 = load volatile i32, i32* @a + ret i32 %a1 +} + +define i32 @normal_medium() { +; CHECK-LABEL: define i32 @normal_medium() +entry: + %a = call i32 @filler() +; CHECK-NOT: call + ret i32 %a +} + +define i32 @normal_large() { +; CHECK-LABEL: define i32 @normal_large() +entry: + %a1 = call i32 @filler() + %a2 = call i32 @filler() + %a3 = call i32 @filler() + %a4 = call i32 @filler() + %a5 = call i32 @filler() +; CHECK-NOT: call + ret i32 %a5 +} + +define i32 @base_case_test_cold() { +; CHECK-LABEL: define i32 @base_case_test_cold() +entry: + %a1 = call i32 @cold_small() +; CHECK-NOT: call + %a2 = call i32 @cold_medium() +; CHECK: call i32 @cold_medium() + ret i32 %a2 +} + +define i32 @base_case_test_hint() { +; CHECK-LABEL: define i32 @base_case_test_hint() +entry: + %a2 = call i32 @hint_medium() + %a3 = call i32 @hint_large() +; CHECK-NOT: call + ret i32 %a3 +} + +define i32 @base_case_test_normal() { +; CHECK-LABEL: define i32 @base_case_test_normal() +entry: + %a1 = call i32 @normal_small() + %a2 = call i32 @normal_medium() +; CHECK-NOT: call + %a3 = call i32 @normal_large() +; CHECK: call i32 @normal_large() + ret i32 %a3 +} + +define i32 @cold_wrapper_small() { +; CHECK-LABEL: define i32 @cold_wrapper_small() +; CHECK: #[[COLD_ATTR_GROUP]] { +entry: + %a = call i32 @cold_small() +; CHECK-NOT: call + ret i32 %a +} + +define i32 @cold_wrapper_medium() { +; CHECK-LABEL: define i32 @cold_wrapper_medium() +; CHECK: #[[COLD_ATTR_GROUP]] { +entry: + ; Use `alwaysinline` to simulate the case where for whatever reason we both + ; don't defer inlining and end up inlining at this phase but ending up with + ; more code than is desirable to inline. + %a = call i32 @cold_medium() alwaysinline +; CHECK-NOT: call + ret i32 %a +} + +define i32 @test_call_cold_wrapper_small() { +; CHECK-LABEL: define i32 @test_call_cold_wrapper_small() +entry: + %a = call i32 @cold_wrapper_small() +; CHECK-NOT: call + ret i32 %a +} + +define i32 @test_call_cold_wrapper_medium() { +; CHECK-LABEL: define i32 @test_call_cold_wrapper_medium() +entry: + %a = call i32 @cold_wrapper_medium() +; CHECK: call i32 @cold_wrapper_medium() + ret i32 %a +} + +define i32 @hint_wrapper_medium() { +; CHECK-LABEL: define i32 @hint_wrapper_medium() +; CHECK: #[[HINT_ATTR_GROUP]] { +entry: + %a = call i32 @hint_medium() +; CHECK-NOT: call + ret i32 %a +} + +define i32 @hint_wrapper_large() { +; CHECK-LABEL: define i32 @hint_wrapper_large() +; CHECK: #[[HINT_ATTR_GROUP]] { +entry: + ; Use `alwaysinline` to simulate the case where for whatever reason we both + ; don't defer inlining and end up inlining at this phase but ending up with + ; more code than is desirable to inline. + %a = call i32 @hint_large() +; CHECK-NOT: call + ret i32 %a +} + +define i32 @test_call_hint_wrapper_medium() { +; CHECK-LABEL: define i32 @test_call_hint_wrapper_medium() +entry: + %a = call i32 @hint_wrapper_medium() +; CHECK-NOT: call + ret i32 %a +} + +define i32 @test_call_hint_wrapper_large() { +; CHECK-LABEL: define i32 @test_call_hint_wrapper_large() +entry: + %a = call i32 @hint_wrapper_large() +; CHECK-NOT: call + ret i32 %a +} + +; CHECK-LABEL: attributes +; CHECK: #[[COLD_ATTR_GROUP]] = { cold } +; CHECK-LABEL: attributes +; CHECK: #[[HINT_ATTR_GROUP]] = { inlinehint }