diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -1059,13 +1059,15 @@ /// the abstract attributes. /// \param CGUpdater Helper to update an underlying call graph. /// \param Allowed If not null, a set limiting the attribute opportunities. - /// \param DeleteFns Whether to delete functions + /// \param DeleteFns Whether to delete functions. + /// \param RewriteSignatures Whether to rewrite function signatures. Attributor(SetVector &Functions, InformationCache &InfoCache, CallGraphUpdater &CGUpdater, - DenseSet *Allowed = nullptr, bool DeleteFns = true) + DenseSet *Allowed = nullptr, bool DeleteFns = true, + bool RewriteSignatures = true) : Allocator(InfoCache.Allocator), Functions(Functions), InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed), - DeleteFns(DeleteFns) {} + DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures) {} ~Attributor(); @@ -1599,6 +1601,21 @@ /// static void createShallowWrapper(Function &F); + /// Make another copy of the function \p F such that the copied version has + /// internal linkage afterwards and can be analysed. Then we replace all uses + /// of the original function to the copied one + /// + /// Only non-locally linked functions that have `linkonce_odr` or `weak_odr` + /// linkage can be internalized because these linkages guarantee that other + /// definitions with the same name have the same semantics as this one. + /// + /// This will only be run if the `attributor-allow-deep-wrappers` option is + /// set, or if the function is called with \p Force set to true. + /// + /// If the function \p F failed to be internalized the return value will be a + /// null pointer. + static Function *internalizeFunction(Function &F, bool Force = false); + /// Return the data layout associated with the anchor scope. const DataLayout &getDataLayout() const { return InfoCache.DL; } @@ -1711,6 +1728,9 @@ /// Whether to delete functions. const bool DeleteFns; + /// Whether to rewrite signatures. + const bool RewriteSignatures; + /// A set to remember the functions we already assume to be live and visited. DenseSet VisitedFunctions; diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -1527,19 +1527,12 @@ NumFnShallowWrappersCreated++; } -/// Make another copy of the function \p F such that the copied version has -/// internal linkage afterwards and can be analysed. Then we replace all uses -/// of the original function to the copied one -/// -/// Only non-exactly defined functions that have `linkonce_odr` or `weak_odr` -/// linkage can be internalized because these linkages guarantee that other -/// definitions with the same name have the same semantics as this one -/// -static Function *internalizeFunction(Function &F) { - assert(AllowDeepWrapper && "Cannot create a copy if not allowed."); - assert(!F.isDeclaration() && !F.hasExactDefinition() && - !GlobalValue::isInterposableLinkage(F.getLinkage()) && - "Trying to internalize function which cannot be internalized."); +Function *Attributor::internalizeFunction(Function &F, bool Force) { + if (!AllowDeepWrapper && !Force) + return nullptr; + if (F.isDeclaration() || F.hasLocalLinkage() || + GlobalValue::isInterposableLinkage(F.getLinkage())) + return nullptr; Module &M = *F.getParent(); FunctionType *FnTy = F.getFunctionType(); @@ -1569,7 +1562,8 @@ SmallVector, 1> MDs; F.getAllMetadata(MDs); for (auto MDIt : MDs) - Copied->addMetadata(MDIt.first, *MDIt.second); + if (!Copied->hasMetadata()) + Copied->addMetadata(MDIt.first, *MDIt.second); M.getFunctionList().insert(F.getIterator(), Copied); F.replaceAllUsesWith(Copied); @@ -1581,6 +1575,9 @@ bool Attributor::isValidFunctionSignatureRewrite( Argument &Arg, ArrayRef ReplacementTypes) { + if (!RewriteSignatures) + return false; + auto CallSiteCanBeChanged = [](AbstractCallSite ACS) { // Forbid the call site to cast the function return type. If we need to // rewrite these functions we need to re-create a cast for the new call site @@ -2354,7 +2351,8 @@ Function *F = Functions[u]; if (!F->isDeclaration() && !F->isDefinitionExact() && F->getNumUses() && !GlobalValue::isInterposableLinkage(F->getLinkage())) { - Function *NewF = internalizeFunction(*F); + Function *NewF = Attributor::internalizeFunction(*F); + assert(NewF && "Could not internalize function."); Functions.insert(NewF); // Update call graph diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -546,6 +546,8 @@ if (IsModulePass) { Changed |= runAttributor(); + OMPInfoCache.recollectUses(); + Changed |= replaceGlobalization(); if (remarksEnabled()) analysisGlobalization(); @@ -2561,11 +2563,19 @@ if (DisableOpenMPOptimizations) return PreservedAnalyses::all(); - // Look at every function definition in the Module. + // Create internal copies of each function if this is a kernel Module. + DenseSet InternalizedFuncs; + if (!OMPInModule.getKernels().empty()) + for (Function &F : M) + if (!F.isDeclaration() && !OMPInModule.getKernels().contains(&F)) + if (Attributor::internalizeFunction(F, /* Force */ true)) + InternalizedFuncs.insert(&F); + + // Look at every function definition in the Module that wasn't internalized. SmallVector SCC; - for (Function &Fn : M) - if (!Fn.isDeclaration()) - SCC.push_back(&Fn); + for (Function &F : M) + if (!F.isDeclaration() && !InternalizedFuncs.contains(&F)) + SCC.push_back(&F); if (SCC.empty()) return PreservedAnalyses::all(); @@ -2586,7 +2596,7 @@ OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, OMPInModule.getKernels()); - Attributor A(Functions, InfoCache, CGUpdater); + Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); bool Changed = OMPOpt.run(true); diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll --- a/llvm/test/Transforms/OpenMP/replace_globalization.ll +++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll @@ -3,6 +3,8 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64" +@S = external local_unnamed_addr global i8* + ; CHECK-REMARKS: remark: replace_globalization.c:5:7: Replaced globalized variable with 16 bytes of shared memory ; CHECK-REMARKS: remark: replace_globalization.c:5:14: Replaced globalized variable with 4 bytes of shared memory ; CHECK: [[SHARED_X:@.+]] = internal addrspace(3) global [16 x i8] undef @@ -70,7 +72,7 @@ define void @use(i8* %x) { entry: %addr = alloca i8* - store i8* %x, i8** %addr + store i8* %x, i8** @S ret void } diff --git a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll --- a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll +++ b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll @@ -1,8 +1,8 @@ -; RUN: opt -passes=openmp-opt-cgscc -debug-only=openmp-opt -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -passes=openmp-opt -debug-only=openmp-opt -disable-output < %s 2>&1 | FileCheck %s ; REQUIRES: asserts ; ModuleID = 'single_threaded_exeuction.c' -define void @kernel() { +define weak void @kernel() { call void @__kmpc_kernel_init(i32 512, i16 1) call void @nvptx() call void @amdgcn() @@ -12,7 +12,7 @@ ; CHECK-NOT: [openmp-opt] Basic block @nvptx entry is executed by a single thread. ; CHECK: [openmp-opt] Basic block @nvptx if.then is executed by a single thread. ; CHECK-NOT: [openmp-opt] Basic block @nvptx if.end is executed by a single thread. -; Function Attrs: noinline nounwind uwtable +; Function Attrs: noinline define dso_local void @nvptx() { entry: %call = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() @@ -20,6 +20,7 @@ br i1 %cmp, label %if.then, label %if.end if.then: + call void @foo() call void @bar() br label %if.end @@ -30,7 +31,7 @@ ; CHECK-NOT: [openmp-opt] Basic block @amdgcn entry is executed by a single thread. ; CHECK: [openmp-opt] Basic block @amdgcn if.then is executed by a single thread. ; CHECK-NOT: [openmp-opt] Basic block @amdgcn if.end is executed by a single thread. -; Function Attrs: noinline nounwind uwtable +; Function Attrs: noinline define dso_local void @amdgcn() { entry: %call = call i32 @llvm.amdgcn.workitem.id.x() @@ -38,6 +39,7 @@ br i1 %cmp, label %if.then, label %if.end if.then: + call void @foo() call void @bar() br label %if.end @@ -45,9 +47,16 @@ ret void } -; CHECK: [openmp-opt] Basic block @bar entry is executed by a single thread. -; Function Attrs: noinline nounwind uwtable -define internal void @bar() { +; CHECK: [openmp-opt] Basic block @foo entry is executed by a single thread. +; Function Attrs: noinline +define internal void @foo() { +entry: + ret void +} + +; CHECK: [openmp-opt] Basic block @bar.internalized entry is executed by a single thread. +; Function Attrs: noinline +define void @bar() { entry: ret void }