Index: llvm/trunk/lib/Analysis/ModuleSummaryAnalysis.cpp =================================================================== --- llvm/trunk/lib/Analysis/ModuleSummaryAnalysis.cpp +++ llvm/trunk/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -291,6 +291,18 @@ if (!CalledValue || isa(CalledValue)) continue; + // Check if the instruction has a callees metadata. If so, add callees + // to CallGraphEdges to reflect the references from the metadata, and + // to enable importing for subsequent indirect call promotion and + // inlining. + if (auto *MD = I.getMetadata(LLVMContext::MD_callees)) { + for (auto &Op : MD->operands()) { + Function *Callee = mdconst::extract_or_null(Op); + if (Callee) + CallGraphEdges[Index.getOrInsertValueInfo(Callee)]; + } + } + uint32_t NumVals, NumCandidates; uint64_t TotalCount; auto CandidateProfileData = Index: llvm/trunk/test/ThinLTO/X86/Inputs/callees-metadata.ll =================================================================== --- llvm/trunk/test/ThinLTO/X86/Inputs/callees-metadata.ll +++ llvm/trunk/test/ThinLTO/X86/Inputs/callees-metadata.ll @@ -0,0 +1,34 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@fptr = internal unnamed_addr global i32 (i32)* @f2, align 8 + +define dso_local i32 @foo(i32 %x) local_unnamed_addr { +entry: + %0 = load i32 (i32)*, i32 (i32)** @fptr, align 8 + %call = tail call i32 %0(i32 %x), !callees !0 + ret i32 %call +} + +define internal i32 @f2(i32 %x) { +entry: + %tobool = icmp eq i32 %x, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + store i32 (i32)* @f1, i32 (i32)** @fptr, align 8 + %sub.i = add nsw i32 %x, -1 + br label %if.end + +if.end: ; preds = %entry, %if.then + %phi.call = phi i32 [ %sub.i, %if.then ], [ -1, %entry ] + ret i32 %phi.call +} + +define internal i32 @f1(i32 %x) { +entry: + %sub = add nsw i32 %x, -1 + ret i32 %sub +} + +!0 = !{i32 (i32)* @f1, i32 (i32)* @f2} Index: llvm/trunk/test/ThinLTO/X86/callees-metadata.ll =================================================================== --- llvm/trunk/test/ThinLTO/X86/callees-metadata.ll +++ llvm/trunk/test/ThinLTO/X86/callees-metadata.ll @@ -0,0 +1,22 @@ +; Do setup work: generate bitcode and combined index +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/callees-metadata.ll -o %t2.bc + +; RUN: llvm-lto2 run %t1.bc %t2.bc -o %t.o -save-temps \ +; RUN: -r=%t1.bc,bar,plx \ +; RUN: -r=%t1.bc,foo,l \ +; RUN: -r=%t2.bc,foo,pl +; RUN: llvm-dis %t.o.1.3.import.bc -o - | FileCheck %s +; CHECK: define {{.*}} i32 @f1.llvm.0 +; CHECK: define {{.*}} i32 @f2.llvm.0 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define dso_local i32 @bar(i32 %x) { +entry: + %call = call i32 @foo(i32 %x) + ret i32 %call +} + +declare dso_local i32 @foo(i32)