diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -1599,6 +1599,19 @@ /// static void createShallowWrapper(Function &F); + /// Make another copy of the function \p F such that the copied version has + /// internal linkage afterwards and can be analysed. Then we replace all uses + /// of the original function to the copied one + /// + /// Only non-exactly defined functions that have `linkonce_odr` or `weak_odr` + /// linkage can be internalized because these linkages guarantee that other + /// definitions with the same name have the same semantics as this one + /// + /// This will only be run if the `attributor-allow-deep-wrappers` option is + /// set, or if the function is called with \p Force set to true. + /// + static Function *internalizeFunction(Function &F, bool Force = false); + /// Return the data layout associated with the anchor scope. const DataLayout &getDataLayout() const { return InfoCache.DL; } diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -1527,19 +1527,12 @@ NumFnShallowWrappersCreated++; } -/// Make another copy of the function \p F such that the copied version has -/// internal linkage afterwards and can be analysed. Then we replace all uses -/// of the original function to the copied one -/// -/// Only non-exactly defined functions that have `linkonce_odr` or `weak_odr` -/// linkage can be internalized because these linkages guarantee that other -/// definitions with the same name have the same semantics as this one -/// -static Function *internalizeFunction(Function &F) { - assert(AllowDeepWrapper && "Cannot create a copy if not allowed."); - assert(!F.isDeclaration() && !F.hasExactDefinition() && - !GlobalValue::isInterposableLinkage(F.getLinkage()) && - "Trying to internalize function which cannot be internalized."); +Function *Attributor::internalizeFunction(Function &F, bool Force) { + if (!AllowDeepWrapper && !Force) + return nullptr; + if (F.isDeclaration() || F.hasLocalLinkage() || + GlobalValue::isInterposableLinkage(F.getLinkage())) + return nullptr; Module &M = *F.getParent(); FunctionType *FnTy = F.getFunctionType(); @@ -2354,7 +2347,8 @@ Function *F = Functions[u]; if (!F->isDeclaration() && !F->isDefinitionExact() && F->getNumUses() && !GlobalValue::isInterposableLinkage(F->getLinkage())) { - Function *NewF = internalizeFunction(*F); + Function *NewF = Attributor::internalizeFunction(*F); + assert(NewF && "Could not internalize function."); Functions.insert(NewF); // Update call graph diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -2564,11 +2564,18 @@ if (DisableOpenMPOptimizations) return PreservedAnalyses::all(); + // Create internal copies of each function if this is a kernel Module. + if (!OMPInModule.getKernels().empty()) + for (Function &F : M) + if (!F.isDeclaration() && F.hasExternalLinkage() && + !OMPInModule.getKernels().contains(&F)) + Attributor::internalizeFunction(F, /* Force */ true); + // Look at every function definition in the Module. SmallVector SCC; - for (Function &Fn : M) - if (!Fn.isDeclaration()) - SCC.push_back(&Fn); + for (Function &F : M) + if (!F.isDeclaration()) + SCC.push_back(&F); if (SCC.empty()) return PreservedAnalyses::all(); diff --git a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll --- a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll +++ b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll @@ -1,8 +1,8 @@ -; RUN: opt -passes=openmp-opt-cgscc -debug-only=openmp-opt -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -passes=openmp-opt -debug-only=openmp-opt -disable-output < %s 2>&1 | FileCheck %s ; REQUIRES: asserts ; ModuleID = 'single_threaded_exeuction.c' -define void @kernel() { +define weak void @kernel() { call void @__kmpc_kernel_init(i32 512, i16 1) call void @nvptx() call void @amdgcn() @@ -12,7 +12,7 @@ ; CHECK-NOT: [openmp-opt] Basic block @nvptx entry is executed by a single thread. ; CHECK: [openmp-opt] Basic block @nvptx if.then is executed by a single thread. ; CHECK-NOT: [openmp-opt] Basic block @nvptx if.end is executed by a single thread. -; Function Attrs: noinline nounwind uwtable +; Function Attrs: noinline define dso_local void @nvptx() { entry: %call = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() @@ -20,6 +20,7 @@ br i1 %cmp, label %if.then, label %if.end if.then: + call void @foo() call void @bar() br label %if.end @@ -30,7 +31,7 @@ ; CHECK-NOT: [openmp-opt] Basic block @amdgcn entry is executed by a single thread. ; CHECK: [openmp-opt] Basic block @amdgcn if.then is executed by a single thread. ; CHECK-NOT: [openmp-opt] Basic block @amdgcn if.end is executed by a single thread. -; Function Attrs: noinline nounwind uwtable +; Function Attrs: noinline define dso_local void @amdgcn() { entry: %call = call i32 @llvm.amdgcn.workitem.id.x() @@ -38,6 +39,7 @@ br i1 %cmp, label %if.then, label %if.end if.then: + call void @foo() call void @bar() br label %if.end @@ -45,9 +47,16 @@ ret void } -; CHECK: [openmp-opt] Basic block @bar entry is executed by a single thread. -; Function Attrs: noinline nounwind uwtable -define internal void @bar() { +; CHECK: [openmp-opt] Basic block @foo entry is executed by a single thread. +; Function Attrs: noinline +define internal void @foo() { +entry: + ret void +} + +; CHECK: [openmp-opt] Basic block @bar.internalized entry is executed by a single thread. +; Function Attrs: noinline +define void @bar() { entry: ret void }