diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2270,6 +2270,81 @@ return true; } +static bool hasMustTailCallers(Function *F) { + for (User *U : F->users()) { + CallBase *CB = dyn_cast(U); + if (!CB) { + assert(isa(U) && + "Expected either CallBase or BlockAddress"); + continue; + } + if (CB->isMustTailCall()) + return true; + } + return false; +} + +static void RemovePreallocated(Function *F) { + RemoveAttribute(F, Attribute::Preallocated); + + // Cannot modify users() while iterating over it, so make a copy. + SmallVector PreallocatedCalls(F->users()); + for (User *U : PreallocatedCalls) { + CallBase *CB = dyn_cast(U); + if (!CB) + continue; + + // Create copy of call without "preallocated" operand bundle. + SmallVector OpBundles; + CB->getOperandBundlesAsDefs(OpBundles); + Value *PreallocatedToken = nullptr; + for (auto *It = OpBundles.begin(); It != OpBundles.end(); ++It) { + if (It->getTag() == "preallocated") { + PreallocatedToken = *It->input_begin(); + OpBundles.erase(It); + break; + } + } + assert(PreallocatedToken && "Did not find preallocated bundle"); + CallBase *NewCB = nullptr; + if (InvokeInst *II = dyn_cast(CB)) { + NewCB = InvokeInst::Create(II, OpBundles, CB); + } else { + CallInst *CI = cast(CB); + NewCB = CallInst::Create(CI, OpBundles, CB); + } + uint64_t W; + if (CB->extractProfTotalWeight(W)) + NewCB->setProfWeight(W); + CB->replaceAllUsesWith(NewCB); + NewCB->takeName(CB); + CB->eraseFromParent(); + + // Replace @llvm.call.preallocated.arg() with alloca. + // Cannot modify users() while iterating over it, so make a copy. + SmallVector PreallocatedArgs(PreallocatedToken->users()); + for (auto *User : PreallocatedArgs) { + auto *UseCall = cast(User); + assert(UseCall->getCalledFunction()->getIntrinsicID() == + Intrinsic::call_preallocated_arg && + "preallocated token use was not a llvm.call.preallocated.arg"); + auto AddressSpace = UseCall->getType()->getPointerAddressSpace(); + auto *ArgType = UseCall + ->getAttribute(AttributeList::FunctionIndex, + Attribute::Preallocated) + .getValueAsType(); + auto *Alloca = + new AllocaInst(ArgType, AddressSpace, UseCall->getName(), UseCall); + auto *BitCast = new BitCastInst( + Alloca, Type::getInt8PtrTy(CB->getContext()), "pa_cast", UseCall); + UseCall->replaceAllUsesWith(BitCast); + UseCall->eraseFromParent(); + } + // Remove @llvm.call.preallocated.setup(). + cast(PreallocatedToken)->eraseFromParent(); + } +} + static bool OptimizeFunctions(Module &M, function_ref GetTLI, @@ -2333,13 +2408,18 @@ // wouldn't be safe in the presence of inalloca. // FIXME: We should also hoist alloca affected by this to the entry // block if possible. - // FIXME: handle preallocated if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) && !F->hasAddressTaken()) { RemoveAttribute(F, Attribute::InAlloca); Changed = true; } + if (F->getAttributes().hasAttrSomewhere(Attribute::Preallocated) && + !F->hasAddressTaken() && !hasMustTailCallers(F)) { + RemovePreallocated(F); + Changed = true; + } + if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) { NumInternalFunc++; TargetTransformInfo &TTI = GetTTI(*F); diff --git a/llvm/test/Transforms/GlobalOpt/fastcc.ll b/llvm/test/Transforms/GlobalOpt/fastcc.ll --- a/llvm/test/Transforms/GlobalOpt/fastcc.ll +++ b/llvm/test/Transforms/GlobalOpt/fastcc.ll @@ -36,8 +36,7 @@ } define internal i32 @preallocated(i32* preallocated(i32) %p) { -; TODO: handle preallocated: -; CHECK-NOT-LABEL: define internal fastcc i32 @preallocated(i32* %p) +; CHECK-LABEL: define internal fastcc i32 @preallocated(i32* %p) %rv = load i32, i32* %p ret i32 %rv } @@ -50,21 +49,44 @@ call i32 @j(i32* %m) %args = alloca inalloca i32 call i32 @inalloca(i32* inalloca %args) - ; TODO: handle preallocated - ;%c = call token @llvm.call.preallocated.setup(i32 1) - ;%N = call i8* @llvm.call.preallocated.arg(token %c, i32 0) preallocated(i32) - ;%n = bitcast i8* %N to i32* - ;call i32 @preallocated(i32* preallocated(i32) %n) ["preallocated"(token %c)] + %c = call token @llvm.call.preallocated.setup(i32 1) + %N = call i8* @llvm.call.preallocated.arg(token %c, i32 0) preallocated(i32) + %n = bitcast i8* %N to i32* + call i32 @preallocated(i32* preallocated(i32) %n) ["preallocated"(token %c)] ret void } - -@llvm.used = appending global [1 x i8*] [ - i8* bitcast (i32(i32*)* @j to i8*) -], section "llvm.metadata" - ; CHECK-LABEL: define void @call_things() ; CHECK: call fastcc i32 @f ; CHECK: call fastcc i32 @g ; CHECK: call coldcc i32 @h ; CHECK: call i32 @j ; CHECK: call fastcc i32 @inalloca(i32* %args) +; CHECK-NOT: llvm.call.preallocated +; CHECK: call fastcc i32 @preallocated(i32* %n) + +define internal i32 @preallocated_musttail(i32* preallocated(i32) %p) { +; CHECK-LABEL: define internal i32 @preallocated_musttail(i32* preallocated(i32) %p) + %rv = load i32, i32* %p + ret i32 %rv +} + +define i32 @call_preallocated_musttail(i32* preallocated(i32) %a) { + %r = musttail call i32 @preallocated_musttail(i32* preallocated(i32) %a) + ret i32 %r +} +; CHECK-LABEL: define i32 @call_preallocated_musttail(i32* preallocated(i32) %a) +; CHECK: musttail call i32 @preallocated_musttail(i32* preallocated(i32) %a) + +define i32 @call_preallocated_musttail_without_musttail() { + %c = call token @llvm.call.preallocated.setup(i32 1) + %N = call i8* @llvm.call.preallocated.arg(token %c, i32 0) preallocated(i32) + %n = bitcast i8* %N to i32* + %r = call i32 @preallocated_musttail(i32* preallocated(i32) %n) ["preallocated"(token %c)] + ret i32 %r +} +; CHECK-LABEL: define i32 @call_preallocated_musttail_without_musttail() +; CHECK: call i32 @preallocated_musttail(i32* preallocated(i32) %n) + +@llvm.used = appending global [1 x i8*] [ + i8* bitcast (i32(i32*)* @j to i8*) +], section "llvm.metadata"