diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp --- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -305,6 +305,7 @@ bool HasInlineAsmMaybeReferencingInternal = false; bool HasIndirBranchToBlockAddress = false; + bool HasIFuncCall = false; bool HasUnknownCall = false; bool MayThrow = false; for (const BasicBlock &BB : F) { @@ -417,6 +418,16 @@ } } else { HasUnknownCall = true; + // If F is imported, a local linkage ifunc (e.g. target_clones on a + // static function) called by F will be cloned. Since summaries don't + // track ifunc, we do not know implementation functions referenced by + // the ifunc resolver need to be promoted in the exporter, and we will + // get linker errors due to cloned declarations for implementation + // functions. As a simple fix, just mark F as not eligible for import. + // Non-local ifunc is not cloned and does not have the issue. + if (auto *GI = dyn_cast_if_present(CalledValue)) + if (GI->hasLocalLinkage()) + HasIFuncCall = true; // Skip inline assembly calls. if (CI && CI->isInlineAsm()) continue; @@ -599,7 +610,7 @@ bool NonRenamableLocal = isNonRenamableLocal(F); bool NotEligibleForImport = NonRenamableLocal || HasInlineAsmMaybeReferencingInternal || - HasIndirBranchToBlockAddress; + HasIndirBranchToBlockAddress || HasIFuncCall; GlobalValueSummary::GVFlags Flags( F.getLinkage(), F.getVisibility(), NotEligibleForImport, /* Live = */ false, F.isDSOLocal(), F.canBeOmittedFromSymbolTable()); diff --git a/llvm/test/ThinLTO/X86/ifunc-import.ll b/llvm/test/ThinLTO/X86/ifunc-import.ll new file mode 100644 --- /dev/null +++ b/llvm/test/ThinLTO/X86/ifunc-import.ll @@ -0,0 +1,79 @@ +; RUN: rm -rf %t && split-file %s %t && cd %t +; RUN: opt -module-summary -o a.bc a.ll +; RUN: opt -module-summary -o b.bc b.ll +; RUN: llvm-lto2 run a.bc b.bc -o t --save-temps \ +; RUN: -r a.bc,foo2.ifunc,pl -r a.bc,use,pl -r a.bc,use2,pl -r a.bc,__cpu_model,lx \ +; RUN: -r b.bc,main,plx -r b.bc,use,l -r b.bc,use2,l +; RUN: llvm-dis < t.1.3.import.bc | FileCheck %s --check-prefix=A +; RUN: llvm-dis < t.2.3.import.bc | FileCheck %s --check-prefix=B --implicit-check-not='@foo.resolver' + +; A: define internal nonnull ptr @foo.resolver() +; A: define internal i32 @foo.default.1(i32 %n) + +;; The ifunc implementations of foo.ifunc are internal in A, so they cannot be +;; referenced by B. Our implementation actually ensures that the ifunc resolver +;; along with its implementations are not imported. +; B: declare i32 @use(i32) local_unnamed_addr +; B: define available_externally i32 @use2(i32 %n) local_unnamed_addr +; B: declare i32 @foo2.ifunc(i32) + +;--- a.ll +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +$foo.resolver = comdat any + +@__cpu_model = external dso_local local_unnamed_addr global { i32, i32, i32, [1 x i32] } + +@foo.ifunc = internal ifunc i32 (i32), ptr @foo.resolver +@foo2.ifunc = ifunc i32 (i32), ptr @foo.resolver + +define dso_local i32 @use(i32 %n) local_unnamed_addr { +entry: + %call = call i32 @foo.ifunc(i32 %n) + ret i32 %call +} + +define dso_local i32 @use2(i32 %n) local_unnamed_addr { +entry: + %call = call i32 @foo2.ifunc(i32 %n) + ret i32 %call +} + +define internal nonnull ptr @foo.resolver() comdat { +entry: + %0 = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i64 0, i32 3, i64 0), align 4 + %1 = and i32 %0, 4 + %.not = icmp eq i32 %1, 0 + %foo.popcnt.0.foo.default.1 = select i1 %.not, ptr @foo.default.1, ptr @foo.popcnt.0 + ret ptr %foo.popcnt.0.foo.default.1 +} + +define internal i32 @foo.popcnt.0(i32 %n) { +entry: + %0 = call i32 @llvm.ctpop.i32(i32 %n) + ret i32 %0 +} + +declare i32 @llvm.ctpop.i32(i32) + +define internal i32 @foo.default.1(i32 %n) { +entry: + %0 = call i32 @llvm.ctpop.i32(i32 %n) + ret i32 %0 +} + +;--- b.ll +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define dso_local i32 @main() local_unnamed_addr { +entry: + %0 = call i32 @use(i32 0) + %1 = call i32 @use2(i32 0) + %2 = add i32 %0, %1 + ret i32 %2 +} + +declare i32 @use(i32) local_unnamed_addr +declare i32 @use2(i32) local_unnamed_addr