diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -20,8 +20,10 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/NoFolder.h" #include "llvm/IR/Verifier.h" @@ -33,6 +35,7 @@ #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include @@ -87,6 +90,12 @@ "wrappers for non-exact definitions."), cl::init(false)); +static cl::opt + AllowDeepWrapper("attributor-allow-deep-wrappers", cl::Hidden, + cl::desc("Allow the Attributor to use IP information " + "derived from non-exact functions"), + cl::init(false)); + static cl::list SeedAllowList("attributor-seed-allow-list", cl::Hidden, cl::desc("Comma seperated list of attrbute names that are " @@ -1405,6 +1414,54 @@ NumFnShallowWrapperCreated++; } +/// Make another copy of the function \p F such that the copied version has +/// internal linkage afterwards and can be analysed. Then we replace all uses +/// of the original function to the copied one +/// +/// Only non-exactly defined functions that have `linkonce_odr` or `weak_odr` +/// linkdage can be internalized because these linkages guarantee that other +/// definitions with the same name have the same semantics as this one +/// +static Function *internalizeFunction(Function &F) { + assert(AllowDeepWrapper && "Cannot create a copy if not allowed."); + assert(!F.hasExactDefinition() && + ((F.getLinkage() != llvm::GlobalValue::LinkOnceAnyLinkage) || + (F.getLinkage() != llvm::GlobalValue::WeakAnyLinkage)) && + "Trying to internalize function which cannot be internalized."); + + Module &M = *F.getParent(); + FunctionType *FnTy = F.getFunctionType(); + + // create a copy of the current function + Function *Copied = + Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, + F.getAddressSpace(), F.getName() + "_copied"); + ValueToValueMapTy VMap; + auto *NewFArgIt = Copied->arg_begin(); + for (auto &Arg : F.args()) { + auto ArgName = Arg.getName(); + NewFArgIt->setName(ArgName); + VMap[&Arg] = &(*NewFArgIt++); + } + SmallVector Returns; + + // Copy the body of the original function to the new one + llvm::CloneFunctionInto(Copied, &F, VMap, /* ModuleLevelChanges */ true, + Returns); + + // Copy metadata + SmallVector, 1> MDs; + F.getAllMetadata(MDs); + for (auto MDIt : MDs) + Copied->addMetadata(MDIt.first, *MDIt.second); + + M.getFunctionList().insert(F.getIterator(), Copied); + F.replaceAllUsesWith(Copied); + Copied->setDSOLocal(true); + + return Copied; +} + bool Attributor::isValidFunctionSignatureRewrite( Argument &Arg, ArrayRef ReplacementTypes) { @@ -2124,6 +2181,26 @@ if (!A.isFunctionIPOAmendable(*F)) createShallowWrapper(*F); + // Internalize non-exact functions + // TODO: for now we eagerly internalize functions without calculating the + // cost, we need a cost interface to determine whether internalizing + // a function is "benefitial" + if (AllowDeepWrapper) + for (Function *F : Functions) + if (!F->hasExactDefinition() && F->getNumUses() && + F->getLinkage() != llvm::GlobalValue::LinkOnceAnyLinkage && + F->getLinkage() != llvm::GlobalValue::WeakAnyLinkage) { + Function *NewF = internalizeFunction(*F); + Functions.insert(NewF); + + // Update call graph + for (const Use &U : NewF->uses()) + if (const auto *CB = dyn_cast(U.getUser())) { + auto *CallerF = const_cast(CB->getCaller()); + CGUpdater.reanalyzeFunction(*CallerF); + } + } + for (Function *F : Functions) { if (F->hasExactDefinition()) NumFnWithExactDefinition++; diff --git a/llvm/test/Transforms/Attributor/internalize.ll b/llvm/test/Transforms/Attributor/internalize.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Attributor/internalize.ll @@ -0,0 +1,83 @@ +; RUN: opt -attributor -attributor-cgscc -disable-inlining -S < %s | FileCheck %s --check-prefix=DWRAPPER + +; TEST 1: This function is of linkage `linkonce`, we cannot internalize this +; function and use information derived from it +; +; DWRAPPER-NOT: Function Attrs +; DWRAPPER-NOT: inner1_copied +define linkonce i32 @inner1(i32 %a, i32 %b) { +entry: + %c = add i32 %a, %b + ret i32 %c +} + +; TEST 2: This function is of linkage `weak`, we cannot internalize this function and +; use information derived from it +; +; DWRAPPER-NOT: Function Attrs +; DWRAPPER-NOT: inner2_copied +define weak i32 @inner2(i32 %a, i32 %b) { +entry: + %c = add i32 %a, %b + ret i32 %c +} + +; TEST 3: This function is of linkage `linkonce_odr`, which can be internalized using the +; deep wrapper, and the IP information derived from this function can be used +; +; DWRAPPER: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; DWRAPPER: define internal i32 @inner3_copied(i32 %a, i32 %b) +; DWRAPPER-NEXT: entry: +; DWRAPPER-NEXT: %c = add i32 %a, %b +; DWRAPPER-NEXT: ret i32 %c +define linkonce_odr i32 @inner3(i32 %a, i32 %b) { +entry: + %c = add i32 %a, %b + ret i32 %c +} + +; TEST 4: This function is of linkage `weak_odr`, which can be internalized using the deep +; wrapper +; +; DWRAPPER: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; DWRAPPER: define internal i32 @inner4_copied(i32 %a, i32 %b) +; DWRAPPER-NEXT: entry: +; DWRAPPER-NEXT: %c = add i32 %a, %b +; DWRAPPER-NEXT: ret i32 %c +define weak_odr i32 @inner4(i32 %a, i32 %b) { +entry: + %c = add i32 %a, %b + ret i32 %c +} + +; TEST 5: This function has linkage `linkonce_odr` but is never called (num of use = 0), so there +; is no need to internalize this +; +; DWRAPPER-NOT: inner5_copied +define linkonce_odr i32 @inner5(i32 %a, i32 %b) { +entry: + %c = add i32 %a, %b + ret i32 %c +} + +; Since the inner1 cannot be internalized, there should be no change to its callsite +; Since the inner2 cannot be internalized, there should be no change to its callsite +; Since the inner3 is internalized, the use of the original function should be replaced by the +; copied one +; +; DWRAPPER-NOT: call i32 @inner1_copied +; DWRAPPER: call i32 @inner1 +; DWRAPPER-NOT: call i32 @inner2_copied +; DWRAPPER: call i32 @inner2 +; DWRAPPER-NOT: call i32 @inner3 +; DWRAPPER: call i32 @inner3_copied +; DWRAPPER-NOT: call i32 @inner4 +; DWRAPPER: call i32 @inner4_copied +define i32 @outer1() { +entry: + %ret1 = call i32 @inner1(i32 1, i32 2) + %ret2 = call i32 @inner2(i32 1, i32 2) + %ret3 = call i32 @inner3(i32 %ret1, i32 %ret2) + %ret4 = call i32 @inner4(i32 %ret3, i32 %ret3) + ret i32 %ret4 +} \ No newline at end of file