Index: llvm/lib/Transforms/IPO/Attributor.cpp =================================================================== --- llvm/lib/Transforms/IPO/Attributor.cpp +++ llvm/lib/Transforms/IPO/Attributor.cpp @@ -57,6 +57,7 @@ "Number of function with exact definitions"); STATISTIC(NumFnWithoutExactDefinition, "Number of function without exact definitions"); +STATISTIC(NumFnShallowWrapperCreated, "Number of shallow wrappers created"); STATISTIC(NumAttributesTimedOut, "Number of abstract attributes timed out before fixpoint"); STATISTIC(NumAttributesValidFixpoint, @@ -184,6 +185,12 @@ static cl::opt MaxHeapToStackSize("max-heap-to-stack-size", cl::init(128), cl::Hidden); +static cl::opt + AllowShallowWrappers("attributor-allow-shallow-wrappers", cl::Hidden, + cl::desc("Allow the Attributor to create shallow " + "wrappers for non-exact definitions."), + cl::init(false)); + /// Logic operators for the change status enum class. /// ///{ @@ -724,7 +731,6 @@ return HasAttr; } - void IRPosition::verify() { switch (KindOrArgNo) { default: @@ -5633,7 +5639,7 @@ createReplacementValues( PrivatizableType.getValue(), ACS, ACS.getCallArgOperand(ARI.getReplacedArg().getArgNo()), - NewArgOperands); + NewArgOperands); }; // Collect the types that will replace the privatizable type in the function @@ -8025,6 +8031,69 @@ return ManifestChange; } +/// Create a shallow wrapper for \p F such that \p F has internal linkage +/// afterwards. It also sets the original \p F 's name to anonymous +/// +/// A wrapper is a function with the same type (and attributes) as \p F +/// that will only call \p F and return the result, if any. +/// +/// Assuming the declaration of looks like: +/// rty F(aty0 arg0, ..., atyN argN); +/// +/// The wrapper will then look as follows: +/// rty wrapper(aty0 arg0, ..., atyN argN) { +/// return F(arg0, ..., argN); +/// } +/// +static void createShallowWrapper(Function &F) { + assert(AllowShallowWrappers && + "Cannot create a wrapper if it is not allowed!"); + assert(!F.isDeclaration() && "Cannot create a wrapper around a declaration!"); + + Module &M = *F.getParent(); + LLVMContext &Ctx = M.getContext(); + FunctionType *FnTy = F.getFunctionType(); + + Function *Wrapper = + Function::Create(FnTy, F.getLinkage(), F.getAddressSpace(), F.getName()); + F.setName(""); // set the inside function anonymous + M.getFunctionList().insert(F.getIterator(), Wrapper); + + F.setLinkage(GlobalValue::InternalLinkage); + + F.replaceAllUsesWith(Wrapper); + assert(F.getNumUses() == 0 && "Uses remained after wrapper was created!"); + + // Move the COMDAT section to the wrapper. + // TODO: Check if we need to keep it for F as well. + Wrapper->setComdat(F.getComdat()); + F.setComdat(nullptr); + + // Copy all metadata and attributes but keep them on F as well. + SmallVector, 1> MDs; + F.getAllMetadata(MDs); + for (auto MDIt : MDs) + Wrapper->addMetadata(MDIt.first, *MDIt.second); + Wrapper->setAttributes(F.getAttributes()); + + // Create the call in the wrapper. + BasicBlock *EntryBB = BasicBlock::Create(Ctx, "entry", Wrapper); + + SmallVector Args; + auto FArgIt = F.arg_begin(); + for (Argument &Arg : Wrapper->args()) { + Args.push_back(&Arg); + Arg.setName((FArgIt++)->getName()); + } + + CallInst *CI = CallInst::Create(&F, Args, "", EntryBB); + CI->setTailCall(true); + CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoInline); + ReturnInst::Create(Ctx, CI->getType()->isVoidTy() ? nullptr : CI, EntryBB); + + NumFnShallowWrapperCreated++; +} + bool Attributor::isValidFunctionSignatureRewrite( Argument &Arg, ArrayRef ReplacementTypes) { @@ -8650,6 +8719,12 @@ for (Function *F : Functions) A.initializeInformationCache(*F); + // Create shallow wrappers for all functions that are not IPO amendable + if (AllowShallowWrappers) + for (Function *F : Functions) + if (!A.isFunctionIPOAmendable(*F)) + createShallowWrapper(*F); + for (Function *F : Functions) { if (F->hasExactDefinition()) NumFnWithExactDefinition++; Index: llvm/test/Transforms/Attributor/wrapper.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Attributor/wrapper.ll @@ -0,0 +1,88 @@ +; RUN: opt -passes=attributor-cgscc -attributor-disable=false -attributor-annotate-decl-cs -attributor-allow-shallow-wrappers -S < %s | FileCheck %s --check-prefix=CHECK + +; TEST 1: simple test, without argument +; A wrapper will be generated for this function, Check the wrapper first +; CHECK-NOT: Function Attrs: +; CHECK: define linkonce i32 @inner1() +; CHECK: tail call i32 @0() +; CHECK: ret +; +; Check the original function, which is wrapped and becomes anonymous +; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: define internal i32 @0() +; CHECK: %a = alloca i32 +; CHECK: store i32 1, i32* %a +; CHECK: %b = load i32, i32* %a +; CHECK: ret i32 %b +define linkonce i32 @inner1() { +entry: + %a = alloca i32 + store i32 1, i32* %a + %b = load i32, i32* %a + ret i32 %b +} + +; Check for call +; CHECK: define i32 @outer1 +; CHECK: call i32 @inner1 +; CHECK: ret +define i32 @outer1() { +entry: + %ret = call i32 @inner1() + ret i32 %ret +} + +; TEST 2: with argument +; CHECK-NOT: Function Attrs +; CHECK: define linkonce i32 @inner2(i32 %a, i32 %b) +; CHECK: tail call i32 @1(i32 %a, i32 %b) +; CHECK: ret +; +; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: define internal i32 @1(i32 %a, i32 %b) +; CHECK: %c = add i32 %a, %b +; CHECK: ret i32 %c +define linkonce i32 @inner2(i32 %a, i32 %b) { +entry: + %c = add i32 %a, %b + ret i32 %c +} + +; CHECK: define i32 @outer2 +; CHECK: call i32 @inner2 +; CHECK: ret +define i32 @outer2() { +entry: + %ret = call i32 @inner2(i32 1, i32 2) + ret i32 %ret +} + +; TEST 3: check nocurse +; This function calls itself, there will be no attribute +; CHECK-NOT: Function Attrs +; CHECK: define linkonce i32 @inner3(i32 %0) +; CHECK: tail call i32 @2(i32 %0) +; CHECK: ret +; +; CHECK-NOT: Function Attrs: +; CHECK: define internal i32 @2(i32 %0) +define linkonce i32 @inner3(i32) { +entry: + %1 = alloca i32 + store i32 %0, i32* %1 + br label %2 +2: + %3 = load i32, i32* %1 + %4 = icmp slt i32 %3, 4 + br i1 %4, label %5, label %9 +5: + %6 = load i32, i32* %1 + %7 = add nsw i32 %6, 1 + %8 = call i32 @inner3(i32 %7) + store i32 %8, i32* %1 + br label %2 +9: + %10 = load i32, i32* %1 + ret i32 %10 +} +