Index: lib/Transforms/IPO/PartialInlining.cpp =================================================================== --- lib/Transforms/IPO/PartialInlining.cpp +++ lib/Transforms/IPO/PartialInlining.cpp @@ -17,7 +17,9 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -27,7 +29,7 @@ #include "llvm/Transforms/Utils/CodeExtractor.h" using namespace llvm; -#define DEBUG_TYPE "partialinlining" +#define DEBUG_TYPE "partial_inlining" STATISTIC(NumPartialInlined, "Number of functions partially inlined"); @@ -65,6 +67,9 @@ } Function *PartialInlinerImpl::unswitchFunction(Function *F) { + if (F->hasAddressTaken()) + return nullptr; + // First, verify that this function is an unswitching candidate... BasicBlock *EntryBlock = &F->front(); BranchInst *BR = dyn_cast(EntryBlock->getTerminator()); @@ -85,25 +90,6 @@ if (ReturnCount != 1) return nullptr; - auto canAllUsesBeReplaced = [](Function *F) { - std::vector Users(F->user_begin(), F->user_end()); - for (User *User : Users) { - Function *Callee = nullptr; - if (CallInst *CI = dyn_cast(User)) - Callee = CallSite(CI).getCalledFunction(); - else if (InvokeInst *II = dyn_cast(User)) - Callee = CallSite(II).getCalledFunction(); - - if (Callee != F) - return false; - } - - return true; - }; - - if (!canAllUsesBeReplaced(F)) - return nullptr; - // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; Function *DuplicateFunction = CloneFunction(F, VMap); @@ -168,11 +154,25 @@ // Inline the top-level if test into all callers. std::vector Users(DuplicateFunction->user_begin(), DuplicateFunction->user_end()); - for (User *User : Users) + + for (User *User : Users) { + CallSite CS; if (CallInst *CI = dyn_cast(User)) - InlineFunction(CI, IFI); + CS = CallSite(CI); else if (InvokeInst *II = dyn_cast(User)) - InlineFunction(II, IFI); + CS = CallSite(II); + else + llvm_unreachable("All uses must be calls"); + + OptimizationRemarkEmitter ORE(CS.getCaller()); + DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); + BasicBlock *Block = CS.getParent(); + ORE.emit(OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", DLoc, Block) + << ore::NV("Callee", F) << " partially inlined into " + << ore::NV("Caller", CS.getCaller())); + + InlineFunction(CS, IFI); + } // Ditch the duplicate, since we're done with it, and rewrite all remaining // users (function pointers, etc.) back to the original function. Index: test/Transforms/CodeExtractor/PartialInlineOptRemark.ll =================================================================== --- test/Transforms/CodeExtractor/PartialInlineOptRemark.ll +++ test/Transforms/CodeExtractor/PartialInlineOptRemark.ll @@ -0,0 +1,65 @@ +; RUN: opt -S -partial-inliner -pass-remarks=partial_inlining -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -S -passes=partial-inliner -pass-remarks=partial_inlining -disable-output < %s 2>&1 | FileCheck %s + +define i32 @bar(i32 %arg) local_unnamed_addr #0 !dbg !5 { +bb: + %tmp = icmp slt i32 %arg, 0, !dbg !7 + br i1 %tmp, label %bb1, label %bb2, !dbg !8 + +bb1: ; preds = %bb + tail call void (...) @foo() #0, !dbg !9 + tail call void (...) @foo() #0, !dbg !10 + tail call void (...) @foo() #0, !dbg !11 + tail call void (...) @foo() #0, !dbg !12 + tail call void (...) @foo() #0, !dbg !13 + tail call void (...) @foo() #0, !dbg !14 + tail call void (...) @foo() #0, !dbg !15 + tail call void (...) @foo() #0, !dbg !16 + tail call void (...) @foo() #0, !dbg !17 + br label %bb2, !dbg !18 + +bb2: ; preds = %bb1, %bb + %tmp3 = phi i32 [ 0, %bb1 ], [ 1, %bb ] + ret i32 %tmp3, !dbg !19 +} + +; Function Attrs: nounwind +declare void @foo(...) local_unnamed_addr #0 + +; Function Attrs: nounwind +define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 !dbg !20 { +bb: +; CHECK:remark{{.*}}bar partially inlined into dummy_caller + %tmp = tail call i32 @bar(i32 %arg), !dbg !21 + ret i32 %tmp, !dbg !22 +} + +attributes #0 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3} +!llvm.ident = !{!4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2) +!1 = !DIFile(filename: "t.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{!"clang "} +!5 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !6, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !2) +!6 = !DISubroutineType(types: !2) +!7 = !DILocation(line: 4, column: 14, scope: !5) +!8 = !DILocation(line: 4, column: 6, scope: !5) +!9 = !DILocation(line: 5, column: 5, scope: !5) +!10 = !DILocation(line: 6, column: 5, scope: !5) +!11 = !DILocation(line: 7, column: 5, scope: !5) +!12 = !DILocation(line: 8, column: 5, scope: !5) +!13 = !DILocation(line: 9, column: 5, scope: !5) +!14 = !DILocation(line: 10, column: 5, scope: !5) +!15 = !DILocation(line: 11, column: 5, scope: !5) +!16 = !DILocation(line: 12, column: 5, scope: !5) +!17 = !DILocation(line: 13, column: 5, scope: !5) +!18 = !DILocation(line: 14, column: 5, scope: !5) +!19 = !DILocation(line: 17, column: 1, scope: !5) +!20 = distinct !DISubprogram(name: "dummy_caller", scope: !1, file: !1, line: 19, type: !6, isLocal: false, isDefinition: true, scopeLine: 19, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !2) +!21 = !DILocation(line: 21, column: 11, scope: !20) +!22 = !DILocation(line: 21, column: 4, scope: !20)