Index: llvm/include/llvm/Transforms/Scalar/SROA.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/SROA.h +++ llvm/include/llvm/Transforms/Scalar/SROA.h @@ -123,6 +123,7 @@ PreservedAnalyses runImpl(Function &F, DominatorTree &RunDT, AssumptionCache &RunAC); + bool isolateNonCapturingCalls(Function &F); bool presplitLoadsAndStores(AllocaInst &AI, sroa::AllocaSlices &AS); AllocaInst *rewritePartition(AllocaInst &AI, sroa::AllocaSlices &AS, sroa::Partition &P); Index: llvm/include/llvm/Transforms/Utils/PromoteMemToReg.h =================================================================== --- llvm/include/llvm/Transforms/Utils/PromoteMemToReg.h +++ llvm/include/llvm/Transforms/Utils/PromoteMemToReg.h @@ -27,7 +27,8 @@ /// (transitively) using this alloca. This also enforces that there is only /// ever one layer of bitcasts or GEPs between the alloca and the lifetime /// markers. -bool isAllocaPromotable(const AllocaInst *AI); +bool isAllocaPromotable(const AllocaInst *AI, + bool AllowNonCapturingCalls = false); /// Promote the specified list of alloca instructions into scalar /// registers, inserting PHI nodes as appropriate. Index: llvm/lib/Transforms/Scalar/SROA.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SROA.cpp +++ llvm/lib/Transforms/Scalar/SROA.cpp @@ -4702,6 +4702,63 @@ return true; } +bool SROA::isolateNonCapturingCalls(Function &F) { + BasicBlock &EntryBB = F.getEntryBlock(); + bool Changed = false; + + for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end()); + I != E; ++I) { + AllocaInst *AI = dyn_cast(I); + if (!AI) + continue; + + // We're only interested if the alloca is used by a non-intrinsic + // call instruction... + if (!any_of(AI->users(), [](const User *U) { + return isa(U) && !isa(U); + })) + continue; + + // ...and if the alloca would otherwise be promotable. + if (!isAllocaPromotable(AI, /*AllowNonCapturingCalls=*/true)) + continue; + + Changed = true; + LLVM_DEBUG(dbgs() << "SROA: Isolating calls using alloca: " << *AI << "\n"); + + // Create a new alloca, then replace users around the call(s). + IRBuilderTy Builder(AI); + Type *PointeeTy = AI->getType()->getPointerElementType(); + AllocaInst *NewAI = + Builder.CreateAlloca(PointeeTy, nullptr, "sroa.call.isolate"); + LLVM_DEBUG(dbgs() << "\tCreating new alloca: " << *NewAI << "\n"); + + for (User *U : AI->users()) { + CallInst *CI = dyn_cast(U); + if (!CI) + continue; + + if (isa(CI) || CI->hasOperandBundles()) + continue; + + LLVM_DEBUG(dbgs() << "\tIsolating call: " << *CI << "\n"); + Builder.SetInsertPoint(CI); + LoadInst *Load = Builder.CreateLoad(PointeeTy, AI); + Builder.CreateStore(Load, NewAI); + + Builder.SetInsertPoint(CI->getNextNonDebugInstruction()); + Load = Builder.CreateLoad(PointeeTy, NewAI); + Builder.CreateStore(Load, AI); + + for (unsigned i = 0; i < CI->arg_size(); ++i) + if (CI->getArgOperand(i) == AI) + CI->setArgOperand(i, NewAI); + } + } + + return Changed; +} + PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT, AssumptionCache &RunAC) { LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n"); @@ -4709,6 +4766,12 @@ DT = &RunDT; AC = &RunAC; + // First look for allocas which are used in a call but not captured, and + // add a new alloca to cover uses before the first capture and store the + // value to the old alloca; this may enable additional optimizations for + // the uncaptured alloca. + bool Changed = isolateNonCapturingCalls(F); + BasicBlock &EntryBB = F.getEntryBlock(); for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end()); I != E; ++I) { @@ -4722,7 +4785,6 @@ } } - bool Changed = false; // A set of deleted alloca instruction pointers which should be removed from // the list of promotable allocas. SmallPtrSet DeletedAllocas; Index: llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp =================================================================== --- llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -61,7 +61,8 @@ STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); -bool llvm::isAllocaPromotable(const AllocaInst *AI) { +bool llvm::isAllocaPromotable(const AllocaInst *AI, + bool AllowNonCapturingCalls) { // Only allow direct and non-volatile loads and stores... for (const User *U : AI->users()) { if (const LoadInst *LI = dyn_cast(U)) { @@ -91,6 +92,22 @@ } else if (const AddrSpaceCastInst *ASCI = dyn_cast(U)) { if (!onlyUsedByLifetimeMarkers(ASCI)) return false; + } else if (const CallInst *CI = dyn_cast(U)) { + if (!AllowNonCapturingCalls) + return false; + + // Reject calls with operand bundles for now. + if (CI->hasOperandBundles()) + return false; + + // Only allow a call if all uses of the alloca do not capture the pointer. + bool NoCaptures = true; + for (unsigned i = 0; i < CI->arg_size(); ++i) + if (CI->getArgOperand(i) == AI) + NoCaptures &= CI->paramHasAttr(i, Attribute::NoCapture); + + if (!NoCaptures) + return false; } else { return false; } Index: llvm/test/Transforms/PhaseOrdering/lifetime-sanitizer.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/lifetime-sanitizer.ll +++ llvm/test/Transforms/PhaseOrdering/lifetime-sanitizer.ll @@ -9,7 +9,7 @@ declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) -declare void @foo(i8* nocapture) +declare void @foo(i8*) define void @asan() sanitize_address { entry: Index: llvm/test/Transforms/SROA/non-capturing-call.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SROA/non-capturing-call.ll @@ -0,0 +1,58 @@ +; RUN: opt < %s -sroa -S | FileCheck %s +; RUN: opt < %s -passes=sroa -S | FileCheck %s + +; CHECK-LABEL: @alloca_used_in_call +define i32 @alloca_used_in_call(i32* nocapture nonnull readonly %data, i32 %n) { +; CHECK-NOT: %retval = alloca +; CHECK: %sroa.call.isolate = alloca i32, align 4 +entry: + %retval = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + %limit = zext i32 %n to i64 + br label %loop.ph + +loop.ph: + %iter.check = icmp ugt i64 %limit, 0 + br i1 %iter.check, label %loop.body, label %loop.exit + +; CHECK-LABEL: loop.body: +; CHECK: %retval.0 = phi i32 [ 0, %loop.ph ], [ %retval.1, %if.end ] +loop.body: + %indvars.iv = phi i64 [ 0, %loop.ph ], [ %indvars.iv.next, %if.end ] + %arrayidx = getelementptr inbounds i32, i32* %data, i64 %indvars.iv + %pred = load i32, i32* %arrayidx, align 4 + %tobool.not = icmp eq i32 %pred, 0 + br i1 %tobool.not, label %if.end, label %if.then + +; CHECK-LABEL: if.then: +; CHECK: %rdx.inc = add nsw i32 %retval.0, 1 +if.then: + %rdx = load i32, i32* %retval, align 4 + %rdx.inc = add nsw i32 %rdx, 1 + store i32 %rdx.inc, i32* %retval, align 4 + br label %if.end + +; CHECK-LABEL: if.end: +; CHECK: %retval.1 = phi i32 [ %retval.0, %loop.body ], [ %rdx.inc, %if.then ] +if.end: + %indvars.iv.next = add nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, %limit + br i1 %exitcond, label %loop.body, label %loop.exit + +; CHECK-LABEL: loop.exit: +; CHECK: %retval.2 = phi i32 [ %retval.1, %if.end ], [ 0, %loop.ph ] +loop.exit: + br label %ext.user + +; CHECK-LABEL: ext.user: +; CHECK: store i32 %retval.2, i32* %sroa.call.isolate, align 4 +; CHECK: %0 = call i32 @user_of_alloca(i32* nocapture nonnull %sroa.call.isolate) +; CHECK: %1 = load i32, i32* %sroa.call.isolate, align 4 +; CHECK: ret i32 %1 +ext.user: + %0 = call i32 @user_of_alloca(i32* nocapture nonnull %retval) + %1 = load i32, i32* %retval, align 4 + ret i32 %1 +} + +declare dso_local i32 @user_of_alloca(i32* nocapture nonnull)