diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2270,6 +2270,67 @@ return true; } +static void RemovePreallocated(Function *F) { + RemoveAttribute(F, Attribute::Preallocated); + + // Cannot modify users() while iterating over it, so make a copy. + SmallVector PreallocatedCalls(F->users()); + for (User *U : PreallocatedCalls) { + CallBase *CB = dyn_cast(U); + if (!CB) + continue; + + // Create copy of call without "preallocated" operand bundle. + SmallVector OpBundles; + CB->getOperandBundlesAsDefs(OpBundles); + Value *PreallocatedToken = nullptr; + for (auto *It = OpBundles.begin(); It != OpBundles.end(); ++It) { + if (It->getTag() == "preallocated") { + PreallocatedToken = *It->input_begin(); + OpBundles.erase(It); + break; + } + } + assert(PreallocatedToken && "Did not find preallocated bundle"); + CallBase *NewCB = nullptr; + if (InvokeInst *II = dyn_cast(CB)) { + NewCB = InvokeInst::Create(II, OpBundles, CB); + } else { + CallInst *CI = cast(CB); + NewCB = CallInst::Create(CI, OpBundles, CB); + } + uint64_t W; + if (CB->extractProfTotalWeight(W)) + NewCB->setProfWeight(W); + CB->replaceAllUsesWith(NewCB); + NewCB->takeName(CB); + CB->eraseFromParent(); + + // Replace @llvm.call.preallocated.arg() with alloca. + // Cannot modify users() while iterating over it, so make a copy. + SmallVector PreallocatedArgs(PreallocatedToken->users()); + for (auto *User : PreallocatedArgs) { + auto *UseCall = cast(User); + assert(UseCall->getCalledFunction()->getIntrinsicID() == + Intrinsic::call_preallocated_arg && + "preallocated token use was not a llvm.call.preallocated.arg"); + auto AddressSpace = UseCall->getType()->getPointerAddressSpace(); + auto *ArgType = UseCall + ->getAttribute(AttributeList::FunctionIndex, + Attribute::Preallocated) + .getValueAsType(); + auto *Alloca = + new AllocaInst(ArgType, AddressSpace, UseCall->getName(), UseCall); + auto *BitCast = new BitCastInst( + Alloca, Type::getInt8PtrTy(CB->getContext()), "pa_cast", UseCall); + UseCall->replaceAllUsesWith(BitCast); + UseCall->eraseFromParent(); + } + // Remove @llvm.call.preallocated.setup(). + cast(PreallocatedToken)->eraseFromParent(); + } +} + static bool OptimizeFunctions(Module &M, function_ref GetTLI, @@ -2333,13 +2394,18 @@ // wouldn't be safe in the presence of inalloca. // FIXME: We should also hoist alloca affected by this to the entry // block if possible. - // FIXME: handle preallocated if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) && !F->hasAddressTaken()) { RemoveAttribute(F, Attribute::InAlloca); Changed = true; } + if (F->getAttributes().hasAttrSomewhere(Attribute::Preallocated) && + !F->hasAddressTaken()) { + RemovePreallocated(F); + Changed = true; + } + if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) { NumInternalFunc++; TargetTransformInfo &TTI = GetTTI(*F); diff --git a/llvm/test/Transforms/GlobalOpt/fastcc.ll b/llvm/test/Transforms/GlobalOpt/fastcc.ll --- a/llvm/test/Transforms/GlobalOpt/fastcc.ll +++ b/llvm/test/Transforms/GlobalOpt/fastcc.ll @@ -36,8 +36,7 @@ } define internal i32 @preallocated(i32* preallocated(i32) %p) { -; TODO: handle preallocated: -; CHECK-NOT-LABEL: define internal fastcc i32 @preallocated(i32* %p) +; CHECK-LABEL: define internal fastcc i32 @preallocated(i32* %p) %rv = load i32, i32* %p ret i32 %rv } @@ -50,11 +49,10 @@ call i32 @j(i32* %m) %args = alloca inalloca i32 call i32 @inalloca(i32* inalloca %args) - ; TODO: handle preallocated - ;%c = call token @llvm.call.preallocated.setup(i32 1) - ;%N = call i8* @llvm.call.preallocated.arg(token %c, i32 0) preallocated(i32) - ;%n = bitcast i8* %N to i32* - ;call i32 @preallocated(i32* preallocated(i32) %n) ["preallocated"(token %c)] + %c = call token @llvm.call.preallocated.setup(i32 1) + %N = call i8* @llvm.call.preallocated.arg(token %c, i32 0) preallocated(i32) + %n = bitcast i8* %N to i32* + call i32 @preallocated(i32* preallocated(i32) %n) ["preallocated"(token %c)] ret void } @@ -68,3 +66,5 @@ ; CHECK: call coldcc i32 @h ; CHECK: call i32 @j ; CHECK: call fastcc i32 @inalloca(i32* %args) +; CHECK-NOT: llvm.call.preallocated +; CHECK: call fastcc i32 @preallocated(i32* %n)