diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -21,8 +21,10 @@ #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/NoFolder.h" #include "llvm/IR/Verifier.h" @@ -34,6 +36,7 @@ #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include @@ -88,6 +91,12 @@ "wrappers for non-exact definitions."), cl::init(false)); +static cl::opt + AllowDeepWrapper("attributor-allow-deep-wrappers", cl::Hidden, + cl::desc("Allow the Attributor to use IP information " + "derived from non-exact functions via cloning"), + cl::init(false)); + static cl::list SeedAllowList("attributor-seed-allow-list", cl::Hidden, cl::desc("Comma seperated list of attrbute names that are " @@ -1413,6 +1422,52 @@ NumFnShallowWrapperCreated++; } +/// Make another copy of the function \p F such that the copied version has +/// internal linkage afterwards and can be analysed. Then we replace all uses +/// of the original function to the copied one +/// +/// Only non-exactly defined functions that have `linkonce_odr` or `weak_odr` +/// linkage can be internalized because these linkages guarantee that other +/// definitions with the same name have the same semantics as this one +/// +static Function *internalizeFunction(Function &F) { + assert(AllowDeepWrapper && "Cannot create a copy if not allowed."); + assert(!F.isDeclaration() && !F.hasExactDefinition() && + !GlobalValue::isInterposableLinkage(F.getLinkage()) && + "Trying to internalize function which cannot be internalized."); + + Module &M = *F.getParent(); + FunctionType *FnTy = F.getFunctionType(); + + // create a copy of the current function + Function *Copied = + Function::Create(FnTy, GlobalValue::PrivateLinkage, F.getAddressSpace(), + F.getName() + ".internalized"); + ValueToValueMapTy VMap; + auto *NewFArgIt = Copied->arg_begin(); + for (auto &Arg : F.args()) { + auto ArgName = Arg.getName(); + NewFArgIt->setName(ArgName); + VMap[&Arg] = &(*NewFArgIt++); + } + SmallVector Returns; + + // Copy the body of the original function to the new one + CloneFunctionInto(Copied, &F, VMap, /* ModuleLevelChanges */ false, Returns); + + // Copy metadata + SmallVector, 1> MDs; + F.getAllMetadata(MDs); + for (auto MDIt : MDs) + Copied->addMetadata(MDIt.first, *MDIt.second); + + M.getFunctionList().insert(F.getIterator(), Copied); + F.replaceAllUsesWith(Copied); + Copied->setDSOLocal(true); + + return Copied; +} + bool Attributor::isValidFunctionSignatureRewrite( Argument &Arg, ArrayRef ReplacementTypes) { @@ -2145,6 +2200,27 @@ if (!A.isFunctionIPOAmendable(*F)) createShallowWrapper(*F); + // Internalize non-exact functions + // TODO: for now we eagerly internalize functions without calculating the + // cost, we need a cost interface to determine whether internalizing + // a function is "benefitial" + if (AllowDeepWrapper) { + for (Function *F : Functions) + if (!F->isDeclaration() && !F->isDefinitionExact() && F->getNumUses() && + !GlobalValue::isInterposableLinkage(F->getLinkage())) { + Function *NewF = internalizeFunction(*F); + Functions.insert(NewF); + + // Update call graph + CGUpdater.registerOutlinedFunction(*NewF); + for (const Use &U : NewF->uses()) + if (CallBase *CB = dyn_cast(U.getUser())) { + auto *CallerF = CB->getCaller(); + CGUpdater.reanalyzeFunction(*CallerF); + } + } + } + for (Function *F : Functions) { if (F->hasExactDefinition()) NumFnWithExactDefinition++; diff --git a/llvm/test/Transforms/Attributor/internalize.ll b/llvm/test/Transforms/Attributor/internalize.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Attributor/internalize.ll @@ -0,0 +1,143 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --check-attributes +; Deep Wrapper disabled + +; RUN: opt -attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM,CHECK_DISABLED,NOT_CGSCC_NPM_DISABLED,NOT_CGSCC_OPM_DISABLED,NOT_TUNIT_NPM_DISABLED,IS__TUNIT_____DISABLED,IS________OPM_DISABLED,IS__TUNIT_OPM_DISABLED +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM,CHECK_DISABLED,NOT_CGSCC_OPM_DISABLED,NOT_CGSCC_NPM_DISABLED,NOT_TUNIT_OPM_DISABLED,IS__TUNIT_____DISABLED,IS________NPM_DISABLED,IS__TUNIT_NPM_DISABLED +; RUN: opt -attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM,CHECK_DISABLED,NOT_TUNIT_NPM_DISABLED,NOT_TUNIT_OPM_DISABLED,NOT_CGSCC_NPM_DISABLED,IS__CGSCC_____DISABLED,IS________OPM_DISABLED,IS__CGSCC_OPM_DISABLED +; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM,CHECK_DISABLED,NOT_TUNIT_NPM_DISABLED,NOT_TUNIT_OPM_DISABLED,NOT_CGSCC_OPM_DISABLED,IS__CGSCC_____DISABLED,IS________NPM_DISABLED,IS__CGSCC_NPM_DISABLED + +; Deep Wrapper enabled + +; RUN: opt -attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM,CHECK_ENABLED,NOT_CGSCC_NPM_ENABLED,NOT_CGSCC_OPM_ENABLED,NOT_TUNIT_NPM_ENABLED,IS__TUNIT_____ENABLED,IS________OPM_ENABLED,IS__TUNIT_OPM_ENABLED +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM,CHECK_ENABLED,NOT_CGSCC_OPM_ENABLED,NOT_CGSCC_NPM_ENABLED,NOT_TUNIT_OPM_ENABLED,IS__TUNIT_____ENABLED,IS________NPM_ENABLED,IS__TUNIT_NPM_ENABLED +; RUN: opt -attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM,CHECK_ENABLED,NOT_TUNIT_NPM_ENABLED,NOT_TUNIT_OPM_ENABLED,NOT_CGSCC_NPM_ENABLED,IS__CGSCC_____ENABLED,IS________OPM_ENABLED,IS__CGSCC_OPM_ENABLED +; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM,CHECK_ENABLED,NOT_TUNIT_NPM_ENABLED,NOT_TUNIT_OPM_ENABLED,NOT_CGSCC_OPM_ENABLED,IS__CGSCC_____ENABLED,IS________NPM_ENABLED,IS__CGSCC_NPM_ENABLED +; RUN: opt -attributor -attributor-cgscc -disable-inlining -attributor-allow-deep-wrappers -S < %s | FileCheck %s --check-prefix=DWRAPPER + +; TEST 1: This function is of linkage `linkonce`, we cannot internalize this +; function and use information derived from it +; +; DWRAPPER-NOT: Function Attrs +; DWRAPPER-NOT: inner1.internalized +define linkonce i32 @inner1(i32 %a, i32 %b) { +; CHECK-LABEL: define {{[^@]+}}@inner1 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] +; CHECK-NEXT: ret i32 [[C]] +; +entry: + %c = add i32 %a, %b + ret i32 %c +} + +; TEST 2: This function is of linkage `weak`, we cannot internalize this function and +; use information derived from it +; +; DWRAPPER-NOT: Function Attrs +; DWRAPPER-NOT: inner2.internalized +define weak i32 @inner2(i32 %a, i32 %b) { +; CHECK-LABEL: define {{[^@]+}}@inner2 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] +; CHECK-NEXT: ret i32 [[C]] +; +entry: + %c = add i32 %a, %b + ret i32 %c +} + +; TEST 3: This function is of linkage `linkonce_odr`, which can be internalized using the +; deep wrapper, and the IP information derived from this function can be used +; +; DWRAPPER: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; DWRAPPER: define private i32 @inner3.internalized(i32 %a, i32 %b) +; DWRAPPER-NEXT: entry: +; DWRAPPER-NEXT: %c = add i32 %a, %b +; DWRAPPER-NEXT: ret i32 %c +define linkonce_odr i32 @inner3(i32 %a, i32 %b) { +; CHECK-LABEL: define {{[^@]+}}@inner3 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] +; CHECK-NEXT: ret i32 [[C]] +; +entry: + %c = add i32 %a, %b + ret i32 %c +} + +; TEST 4: This function is of linkage `weak_odr`, which can be internalized using the deep +; wrapper +; +; DWRAPPER: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; DWRAPPER: define private i32 @inner4.internalized(i32 %a, i32 %b) +; DWRAPPER-NEXT: entry: +; DWRAPPER-NEXT: %c = add i32 %a, %b +; DWRAPPER-NEXT: ret i32 %c +define weak_odr i32 @inner4(i32 %a, i32 %b) { +; CHECK-LABEL: define {{[^@]+}}@inner4 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] +; CHECK-NEXT: ret i32 [[C]] +; +entry: + %c = add i32 %a, %b + ret i32 %c +} + +; TEST 5: This function has linkage `linkonce_odr` but is never called (num of use = 0), so there +; is no need to internalize this +; +; DWRAPPER-NOT: inner5.internalized +define linkonce_odr i32 @inner5(i32 %a, i32 %b) { +; CHECK-LABEL: define {{[^@]+}}@inner5 +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] +; CHECK-NEXT: ret i32 [[C]] +; +entry: + %c = add i32 %a, %b + ret i32 %c +} + +; Since the inner1 cannot be internalized, there should be no change to its callsite +; Since the inner2 cannot be internalized, there should be no change to its callsite +; Since the inner3 is internalized, the use of the original function should be replaced by the +; copied one +; +; DWRAPPER-NOT: call i32 @inner1.internalized +; DWRAPPER: call i32 @inner1 +; DWRAPPER-NOT: call i32 @inner2.internalized +; DWRAPPER: call i32 @inner2 +; DWRAPPER-NOT: call i32 @inner3 +; DWRAPPER: call i32 @inner3.internalized +; DWRAPPER-NOT: call i32 @inner4 +; DWRAPPER: call i32 @inner4.internalized +define i32 @outer1() { +; CHECK_DISABLED-LABEL: define {{[^@]+}}@outer1() +; CHECK_DISABLED-NEXT: entry: +; CHECK_DISABLED-NEXT: [[RET1:%.*]] = call i32 @inner1(i32 1, i32 2) +; CHECK_DISABLED-NEXT: [[RET2:%.*]] = call i32 @inner2(i32 1, i32 2) +; CHECK_DISABLED-NEXT: [[RET3:%.*]] = call i32 @inner3(i32 [[RET1]], i32 [[RET2]]) +; CHECK_DISABLED-NEXT: [[RET4:%.*]] = call i32 @inner4(i32 [[RET3]], i32 [[RET3]]) +; CHECK_DISABLED-NEXT: ret i32 [[RET4]] +; +; CHECK_ENABLED-LABEL: define {{[^@]+}}@outer1() +; CHECK_ENABLED-NEXT: entry: +; CHECK_ENABLED-NEXT: [[RET1:%.*]] = call i32 @inner1(i32 1, i32 2) +; CHECK_ENABLED-NEXT: [[RET2:%.*]] = call i32 @inner2(i32 1, i32 2) +; CHECK_ENABLED-NEXT: [[RET3:%.*]] = call i32 @inner3.internalized(i32 [[RET1]], i32 [[RET2]]) +; CHECK_ENABLED-NEXT: [[RET4:%.*]] = call i32 @inner4.internalized(i32 [[RET3]], i32 [[RET3]]) +; CHECK_ENABLED-NEXT: ret i32 [[RET4]] +; +entry: + %ret1 = call i32 @inner1(i32 1, i32 2) + %ret2 = call i32 @inner2(i32 1, i32 2) + %ret3 = call i32 @inner3(i32 %ret1, i32 %ret2) + %ret4 = call i32 @inner4(i32 %ret3, i32 %ret3) + ret i32 %ret4 +}