diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3378,6 +3378,231 @@ return Changed; } +// See which (if any) arguments can of the callsite can inherit nocapture from +// caller arguments. This is useful if the caller function is inlined as +// inlining may lose the nocapture information. +static void getNoCapturePropagations(const CallBase *CB, + SmallVector *NoCaptureArgs) { + // TODO: Handle CallBr and Invoke. At the moment we only handle cases where + // the callsite dominates a return instruction. We could add more + // sophisticated logic to handle multiple target BBs as well, however. + if (!isa(CB)) + return; + + SmallPtrSet NoCaptureParentArguments; + // If this callsite is to a readonly function that doesn't throw then the only + // way to the pointer to be captured is through the return value. If the + // return type is void or the return value of this callsite is unused, then + // all the pointer parameters at this callsite must be nocapture. NB: This is + // a slight strengthening of the case done in the FunctionAttrs pass which has + // the same logic but only for void function. At specific callsites we can do + // non-void function if the return value is unused. + bool IsAlwaysNoCapture = CB->onlyReadsMemory() && CB->doesNotThrow() && + (CB->getType()->isVoidTy() || CB->use_empty()); + if (IsAlwaysNoCapture) { + unsigned N = 0; + for (Value *V : CB->args()) { + if (V->getType()->isPointerTy() && + !CB->paramHasAttr(N, Attribute::NoCapture)) + NoCaptureArgs->push_back(N); + ++N; + } + return; + } + + // If this is not trivially nocapture, then we propagate a nocapture argument + // if the callsite meets the following requirements: + // + // 1) The callsite is in a basic block that ends with a return statement. + // 2) Between the callsite the end of its basic block there are no + // may-write instructions. + // 3) The return value of the callsite is not used (directly or indirectly) + // as the address of a may-read instruction. + // 4) There are allocas or leaked (not freed or returned) mallocs reachable + // from the callsite. + // 5) The callsite/caller are nothrow OR there is no landing padd in the + // caller. + // + // These requirements are intentionally over conservative. We are only trying + // to catch relatively trivial cases. + // + // Requirements 1 & 2 are there to ensure that after the callsite has + // returned, the state of any captured in memory pointers cannot change. This + // implies that if the caller has any nocapture in memory gurantees, that + // state has been reached by the end of the callsite. + // + // Requirements 3 & 4 are to cover cases where pointers could escape the + // callsite (but not the caller) through non-dead code. Any return value thats + // loaded from (or used to create a pointer that is loaded from) could have + // derived from an argument. Finally, allocas/leaked mallocs in general are + // difficult (so we avoid them entirely). Callsites can arbitrarily store + // pointers in allocas for use later without violating a nocapture gurantee by + // the caller, as the allocas are torn down at caller return. Likewise a + // leaked malloc would not be accessible outside of the caller, but could + // still be accessible after the callsite. There are a variety of complex + // cases involving allocas/leaked mallocs. For simplicity, if we see either we + // simply fail. + // + // Requirement 5 is to cover the last way to escape to occur. If the + // callsite/caller is nothrow its a non-issue. If the callsite may throw, then + // a method of capture is through an exception. If the caller has no landing + // padd to catch this exception, then the exception state will be visible + // outside of the caller so any gurantees about nocapture made by the caller + // will apply to the callsites throw. If the caller has a landing padd, its + // possible for the callsite to capture a pointer in a throw that is later + // cleared by the caller. + const BasicBlock *BB = CB->getParent(); + if (!BB) + return; + + // Make sure this BB ends in a return (Requirement 1). + const Instruction *ITerm = BB->getTerminator(); + if (!ITerm || !isa(ITerm)) + return; + + // Get caller. + const Function *PF = BB->getParent(); + if (!PF) + return; + + // See if caller has any nocapture arguments we may be able to propagate + // attributes from. + for (unsigned I = 0; I < PF->arg_size(); ++I) + if (PF->getArg(I)->hasNoCaptureAttr()) + NoCaptureParentArguments.insert(PF->getArg(I)); + + unsigned N = 0; + for (Value *V : CB->args()) { + // See if this callsite argument is missing nocapture and its propagatable + // (nocapture in the caller). + if (!CB->paramHasAttr(N, Attribute::NoCapture) && + NoCaptureParentArguments.contains(V)) + NoCaptureArgs->push_back(N); + ++N; + } + + if (NoCaptureArgs->empty()) + return; + + // Limit maximum amount of instructions we will check. The primary benefit + // of this combine is for smaller functions that will be inlined + // (potentially losing nocapture information), so a relatively small + // threshhold should be sufficient. + const unsigned kMaxChecks = 40; + unsigned Cnt = 0; + + // Check requirements 2 & 3. We do so by scanning the basic block with the + // callsite from the callsite to the return. We keep track of all values + // derived from the return value of callsite. If we run into a may-read + // instruction that uses one of those derived value or any may-store + // instruction we fail. + SmallPtrSet DerivedFromReturn; + for (const Value *U : CB->uses()) + DerivedFromReturn.insert(U); + + const Instruction *Ins = CB; + for (Cnt = 0; Cnt < kMaxChecks; ++Cnt) { + Ins = Ins->getNextNode(); + if (Ins == nullptr || Ins == ITerm) + break; + + // We have a write operation after callsite so fail as it may be clearing + // captured memory. + if (Ins->mayWriteToMemory() || isa(Ins)) { + NoCaptureArgs->clear(); + return; + } + + for (const Value *U : Ins->operands()) { + if (DerivedFromReturn.contains(U)) { + DerivedFromReturn.insert(Ins); + break; + } + } + + // We have a load from the returned pointer (or a pointer derived with the + // return value) so fail as this may have been from a captured return. + if (Ins->mayReadFromMemory() && DerivedFromReturn.contains(Ins)) { + NoCaptureArgs->clear(); + return; + } + } + + if (Cnt == kMaxChecks) { + NoCaptureArgs->clear(); + return; + } + + // Check all predecessors (basic blocks from which an alloca or leaked malloc + // may be able to reach this callsite). We are being incredibly conservative + // here. We could likely skip the alloca/leaked malloc search in a few cases. + // 1) If the callsite is the last instruction before the return or if there + // are no may-read instructions between the callsite and the return. 2) If + // there are possible stores to the alloca/leaked malloc that may reach the + // callsite its probably also safe. And/Or 3) If the callsite is readonly it + // could never capture in memory so these are non factor concerns. For now + // stay conservative, but over time these optimizations can be added. + SmallPtrSet AllPreds{BB}; + SmallVector Preds = {BB}; + bool MayThrow = !(CB->doesNotThrow() || PF->doesNotThrow()); + while (!Preds.empty()) { + const BasicBlock *CurBB = Preds.back(); + Preds.pop_back(); + // Check all instructions in current BB for an alloca/leaked malloc. + for (const Value &V : *CurBB) { + if (&V == CB) + break; + + bool Fail = false; + + // If we reach max checks and can't rule out alloca/leaked malloc case + // fail. + if (Cnt++ >= kMaxChecks) + Fail = true; + // If we find an alloca fail. + if (isa(&V)) + Fail = true; + // If we find leaked malloc fail. + else if (auto *MCB = dyn_cast(&V)) + Fail = MCB->returnDoesNotAlias() && + MCB != cast(ITerm)->getReturnValue(); + // If we find a landing padd instruction fail. + else if (MayThrow && isa(&V)) + Fail = true; + + if (Fail) { + NoCaptureArgs->clear(); + return; + } + } + + for (const BasicBlock *Pred : predecessors(CurBB)) + if (AllPreds.insert(Pred).second) + Preds.push_back(Pred); + } + + // Finally, if the callsite may throw and there is a landing padd in the + // caller then fail as we only analyzed the path from callsite -> dominated + // return. If the callsite may throw but there is no landing padd in the + // caller, then the throw will exit the callers scope so any nocapture + // gurantees inplace by the caller will apply to the callsite as well. + if (MayThrow) { + for (const BasicBlock &CurBB : *PF) { + // Skip BBs we have already checked. + if (AllPreds.contains(&CurBB)) + continue; + for (const Instruction &CurIns : CurBB) { + if (isa(&CurIns) || Cnt++ >= kMaxChecks) { + NoCaptureArgs->clear(); + return; + } + } + } + } + + return; +} + /// Improvements for call, callbr and invoke instructions. Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) { bool Changed = annotateAnyAllocSite(Call, &TLI); @@ -3385,24 +3610,34 @@ // Mark any parameters that are known to be non-null with the nonnull // attribute. This is helpful for inlining calls to functions with null // checks on their arguments. - SmallVector ArgNos; + // Likewise try to mark parameters that are known not captured from parent + // attributes as nocapture. + SmallVector ArgNosNonNull, ArgNosNoCapture; unsigned ArgNo = 0; for (Value *V : Call.args()) { - if (V->getType()->isPointerTy() && - !Call.paramHasAttr(ArgNo, Attribute::NonNull) && - isKnownNonZero(V, DL, 0, &AC, &Call, &DT)) - ArgNos.push_back(ArgNo); + if (V->getType()->isPointerTy()) { + if (!Call.paramHasAttr(ArgNo, Attribute::NonNull) && + isKnownNonZero(V, DL, 0, &AC, &Call, &DT)) + ArgNosNonNull.push_back(ArgNo); + } + ArgNo++; } + getNoCapturePropagations(&Call, &ArgNosNoCapture); + assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly."); - if (!ArgNos.empty()) { + if (!ArgNosNonNull.empty() || !ArgNosNoCapture.empty()) { AttributeList AS = Call.getAttributes(); LLVMContext &Ctx = Call.getContext(); - AS = AS.addParamAttribute(Ctx, ArgNos, - Attribute::get(Ctx, Attribute::NonNull)); + if (!ArgNosNonNull.empty()) + AS = AS.addParamAttribute(Ctx, ArgNosNonNull, + Attribute::get(Ctx, Attribute::NonNull)); + if (!ArgNosNoCapture.empty()) + AS = AS.addParamAttribute(Ctx, ArgNosNoCapture, + Attribute::get(Ctx, Attribute::NoCapture)); Call.setAttributes(AS); Changed = true; } diff --git a/llvm/test/Transforms/InstCombine/nocapture-attribute.ll b/llvm/test/Transforms/InstCombine/nocapture-attribute.ll --- a/llvm/test/Transforms/InstCombine/nocapture-attribute.ll +++ b/llvm/test/Transforms/InstCombine/nocapture-attribute.ll @@ -21,7 +21,7 @@ define void @simple_propegated_a0_a1_nocapture_a2_maybe_capture(ptr nocapture %a0, ptr nocapture %a1, ptr %a2) local_unnamed_addr { ; CHECK-LABEL: define void @simple_propegated_a0_a1_nocapture_a2_maybe_capture ; CHECK-SAME: (ptr nocapture [[A0:%.*]], ptr nocapture [[A1:%.*]], ptr [[A2:%.*]]) local_unnamed_addr { -; CHECK-NEXT: tail call void @ptrs_maybe_capture(ptr [[A0]], ptr [[A1]], ptr [[A2]]) +; CHECK-NEXT: tail call void @ptrs_maybe_capture(ptr nocapture [[A0]], ptr nocapture [[A1]], ptr [[A2]]) ; CHECK-NEXT: ret void ; tail call void @ptrs_maybe_capture(ptr %a0, ptr %a1, ptr %a2) @@ -31,7 +31,7 @@ define void @simple_propegate_a2_nocapture2x_a1_maybe_capture(ptr nocapture %a0, ptr %a1, ptr nocapture %a2) { ; CHECK-LABEL: define void @simple_propegate_a2_nocapture2x_a1_maybe_capture ; CHECK-SAME: (ptr nocapture [[A0:%.*]], ptr [[A1:%.*]], ptr nocapture [[A2:%.*]]) { -; CHECK-NEXT: tail call void @ptrs_maybe_capture(ptr [[A2]], ptr [[A1]], ptr [[A2]]) +; CHECK-NEXT: tail call void @ptrs_maybe_capture(ptr nocapture [[A2]], ptr [[A1]], ptr nocapture [[A2]]) ; CHECK-NEXT: ret void ; tail call void @ptrs_maybe_capture(ptr %a2, ptr %a1, ptr %a2) @@ -41,7 +41,7 @@ define i64 @propegate_past_trivially_read_only(ptr nocapture %a0, i64 %r) local_unnamed_addr { ; CHECK-LABEL: define i64 @propegate_past_trivially_read_only ; CHECK-SAME: (ptr nocapture [[A0:%.*]], i64 [[R:%.*]]) local_unnamed_addr { -; CHECK-NEXT: call void @ptrs_maybe_capture(ptr [[A0]], ptr [[A0]], ptr [[A0]]) +; CHECK-NEXT: call void @ptrs_maybe_capture(ptr nocapture [[A0]], ptr nocapture [[A0]], ptr nocapture [[A0]]) ; CHECK-NEXT: [[R0:%.*]] = mul i64 [[R]], [[R]] ; CHECK-NEXT: [[R1:%.*]] = mul i64 [[R0]], [[R0]] ; CHECK-NEXT: [[R2:%.*]] = shl i64 [[R1]], 1 @@ -125,7 +125,7 @@ ; CHECK-NEXT: [[UNUSED:%.*]] = call i64 @barrier(i64 0) ; CHECK-NEXT: br i1 [[C2]], label [[TT:%.*]], label [[F]] ; CHECK: TT: -; CHECK-NEXT: call void @ptrs_maybe_capture(ptr [[A0]], ptr [[A0]], ptr [[A0]]) +; CHECK-NEXT: call void @ptrs_maybe_capture(ptr nocapture [[A0]], ptr nocapture [[A0]], ptr nocapture [[A0]]) ; CHECK-NEXT: ret i64 0 ; CHECK: F: ; CHECK-NEXT: [[PN:%.*]] = alloca i64, align 8 @@ -178,7 +178,7 @@ ; CHECK-NEXT: [[PN:%.*]] = call noalias ptr @malloc_like(i64 [[R]]) ; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; CHECK: T: -; CHECK-NEXT: call void @ptrs_maybe_capture(ptr [[A0]], ptr [[A0]], ptr [[A0]]) +; CHECK-NEXT: call void @ptrs_maybe_capture(ptr nocapture [[A0]], ptr nocapture [[A0]], ptr nocapture [[A0]]) ; CHECK-NEXT: ret ptr [[PN]] ; CHECK: F: ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @ptr_maybe_capture(ptr [[PN]]) #[[ATTR0]] @@ -208,7 +208,7 @@ ; CHECK-NEXT: [[R5:%.*]] = add i64 [[R4]], [[R3]] ; CHECK-NEXT: [[R6:%.*]] = call i64 @barrier(i64 [[R5]]) #[[ATTR0]] ; CHECK-NEXT: store i64 [[R6]], ptr [[A0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @ptr_maybe_capture(ptr nonnull [[A0]]) #[[ATTR0]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @ptr_maybe_capture(ptr nocapture nonnull [[A0]]) #[[ATTR0]] ; CHECK-NEXT: ret i64 [[R6]] ; call void @ptrs_maybe_capture(ptr %a0, ptr %a0, ptr %a0) @@ -319,7 +319,7 @@ define i64 @propegate_return(ptr nocapture %a0) { ; CHECK-LABEL: define i64 @propegate_return ; CHECK-SAME: (ptr nocapture [[A0:%.*]]) { -; CHECK-NEXT: [[R:%.*]] = call i64 @ptr_maybe_capture.i64(ptr [[A0]]) +; CHECK-NEXT: [[R:%.*]] = call i64 @ptr_maybe_capture.i64(ptr nocapture [[A0]]) ; CHECK-NEXT: [[RR:%.*]] = mul i64 [[R]], [[R]] ; CHECK-NEXT: ret i64 [[RR]] ;