diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -1855,6 +1855,10 @@ /// static void createShallowWrapper(Function &F); + /// Returns true if the function \p F can be internalized. i.e. it has a + /// compatible linkage. + static bool isInternalizable(Function &F); + /// Make another copy of the function \p F such that the copied version has /// internal linkage afterwards and can be analysed. Then we replace all uses /// of the original function to the copied one @@ -1870,6 +1874,22 @@ /// null pointer. static Function *internalizeFunction(Function &F, bool Force = false); + /// Make copies of each function in the set \p FnSet such that the copied + /// version has internal linkage afterwards and can be analysed. Then we + /// replace all uses of the original function to the copied one. The map + /// \p FnMap contains a mapping of functions to their internalized versions. + /// + /// Only non-locally linked functions that have `linkonce_odr` or `weak_odr` + /// linkage can be internalized because these linkages guarantee that other + /// definitions with the same name have the same semantics as this one. + /// + /// This version will internalize all the functions in the set \p FnSet at + /// once and then replace the uses. This prevents internalized functions being + /// called by external functions when there is an internalized version in the + /// module. + static bool internalizeFunctions(SmallPtrSetImpl &FnSet, + DenseMap &FnMap); + /// Return the data layout associated with the anchor scope. const DataLayout &getDataLayout() const { return InfoCache.DL; } diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -1925,49 +1925,85 @@ NumFnShallowWrappersCreated++; } +bool Attributor::isInternalizable(Function &F) { + if (F.isDeclaration() || F.hasLocalLinkage() || + GlobalValue::isInterposableLinkage(F.getLinkage())) + return false; + return true; +} + Function *Attributor::internalizeFunction(Function &F, bool Force) { if (!AllowDeepWrapper && !Force) return nullptr; - if (F.isDeclaration() || F.hasLocalLinkage() || - GlobalValue::isInterposableLinkage(F.getLinkage())) + if (!isInternalizable(F)) return nullptr; - Module &M = *F.getParent(); - FunctionType *FnTy = F.getFunctionType(); - - // create a copy of the current function - Function *Copied = Function::Create(FnTy, F.getLinkage(), F.getAddressSpace(), - F.getName() + ".internalized"); - ValueToValueMapTy VMap; - auto *NewFArgIt = Copied->arg_begin(); - for (auto &Arg : F.args()) { - auto ArgName = Arg.getName(); - NewFArgIt->setName(ArgName); - VMap[&Arg] = &(*NewFArgIt++); - } - SmallVector Returns; + SmallPtrSet FnSet = {&F}; + DenseMap InternalizedFns; + internalizeFunctions(FnSet, InternalizedFns); - // Copy the body of the original function to the new one - CloneFunctionInto(Copied, &F, VMap, CloneFunctionChangeType::LocalChangesOnly, - Returns); - - // Set the linakage and visibility late as CloneFunctionInto has some implicit - // requirements. - Copied->setVisibility(GlobalValue::DefaultVisibility); - Copied->setLinkage(GlobalValue::PrivateLinkage); + return InternalizedFns[&F]; +} - // Copy metadata - SmallVector, 1> MDs; - F.getAllMetadata(MDs); - for (auto MDIt : MDs) - if (!Copied->hasMetadata()) - Copied->addMetadata(MDIt.first, *MDIt.second); +bool Attributor::internalizeFunctions(SmallPtrSetImpl &FnSet, + DenseMap &FnMap) { + for (Function *F : FnSet) + if (!Attributor::isInternalizable(*F)) + return false; - M.getFunctionList().insert(F.getIterator(), Copied); - F.replaceAllUsesWith(Copied); - Copied->setDSOLocal(true); + FnMap.clear(); + // Generate the internalized version of each function. + for (Function *F : FnSet) { + Module &M = *F->getParent(); + FunctionType *FnTy = F->getFunctionType(); + + // Create a copy of the current function + Function *Copied = + Function::Create(FnTy, F->getLinkage(), F->getAddressSpace(), + F->getName() + ".internalized"); + ValueToValueMapTy VMap; + auto *NewFArgIt = Copied->arg_begin(); + for (auto &Arg : F->args()) { + auto ArgName = Arg.getName(); + NewFArgIt->setName(ArgName); + VMap[&Arg] = &(*NewFArgIt++); + } + SmallVector Returns; + + // Copy the body of the original function to the new one + CloneFunctionInto(Copied, F, VMap, + CloneFunctionChangeType::LocalChangesOnly, Returns); + + // Set the linakage and visibility late as CloneFunctionInto has some + // implicit requirements. + Copied->setVisibility(GlobalValue::DefaultVisibility); + Copied->setLinkage(GlobalValue::PrivateLinkage); + + // Copy metadata + SmallVector, 1> MDs; + F->getAllMetadata(MDs); + for (auto MDIt : MDs) + if (!Copied->hasMetadata()) + Copied->addMetadata(MDIt.first, *MDIt.second); + + M.getFunctionList().insert(F->getIterator(), Copied); + Copied->setDSOLocal(true); + FnMap[F] = Copied; + } + + // Replace all uses of the old function with the new internalized function + // unless the caller is a function that was just internalized. + for (Function *F : FnSet) { + auto &InternalizedFn = FnMap[F]; + auto IsNotInternalized = [&](Use &U) -> bool { + if (auto *CB = dyn_cast(U.getUser())) + return !FnMap.lookup(CB->getCaller()); + return false; + }; + F->replaceUsesWithIf(InternalizedFn, IsNotInternalized); + } - return Copied; + return true; } bool Attributor::isValidFunctionSignatureRewrite( diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -4176,28 +4176,32 @@ ORE.emit([&]() { OptimizationRemarkAnalysis ORA(DEBUG_TYPE, "OMP140", &F); return ORA << "Could not internalize function. " - << "Some optimizations may not be possible."; + << "Some optimizations may not be possible. [OMP140]"; }); }; // Create internal copies of each function if this is a kernel Module. This // allows iterprocedural passes to see every call edge. - DenseSet InternalizedFuncs; - if (isOpenMPDevice(M)) + DenseMap InternalizedMap; + if (isOpenMPDevice(M)) { + SmallPtrSet InternalizeFns; for (Function &F : M) if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) && !DisableInternalization) { - if (Attributor::internalizeFunction(F, /* Force */ true)) { - InternalizedFuncs.insert(&F); + if (Attributor::isInternalizable(F)) { + InternalizeFns.insert(&F); } else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) { EmitRemark(F); } } + Attributor::internalizeFunctions(InternalizeFns, InternalizedMap); + } + // Look at every function in the Module unless it was internalized. SmallVector SCC; for (Function &F : M) - if (!F.isDeclaration() && !InternalizedFuncs.contains(&F)) + if (!F.isDeclaration() && !InternalizedMap.lookup(&F)) SCC.push_back(&F); if (SCC.empty()) diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll --- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll @@ -1664,15 +1664,15 @@ ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: ; CHECK-NEXT: br label [[RETURN:%.*]] ; CHECK: if.end: ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 -; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[A]], 1 -; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR7]] -; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR7]] +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 +; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR8]] +; CHECK-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR8]] ; CHECK-NEXT: br label [[RETURN]] ; CHECK: return: ; CHECK-NEXT: ret void