diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3378,6 +3378,97 @@ return Changed; } +// See which (if any) arguments can of the callsite can inherit nocapture from +// caller arguments. This is useful if the caller function is inlined as +// inlining may lose the nocapture information. +static void getNoCapturePropegations(const CallBase *CB, + SmallVector *NoCaptureArgs) { + SmallPtrSet NoCaptureParentArguments; + + // If this callsite is to a readonly function that doesn't throw then the only + // way to the pointer to be captured is through the return value. If the + // return type is void or the return value of this callsite is unused, then + // all the pointer parameters at this callsite must be nocapture. NB: This is + // a slight strengthening of the case done in the FunctionAttrs pass which has + // the same logic but only for void function. At specific callsites we can do + // non-void function if the return value is unused. + bool IsAlwaysNoCapture = CB->onlyReadsMemory() && CB->doesNotThrow() && + (CB->getType()->isVoidTy() || CB->use_empty()); + if (IsAlwaysNoCapture) { + unsigned N = 0; + for (Value *V : CB->args()) { + if (V->getType()->isPointerTy() && + !CB->paramHasAttr(N, Attribute::NoCapture)) + NoCaptureArgs->push_back(N); + ++N; + } + return; + } + + // If this is not trivially nocapture, then we propegate a nocapture + // argument if there are only readonly instructions between this callsite + // and a return from the caller. If the caller has no capture on a pointer, + // then at the return of the caller that pointer can not have been captured. + // This is the same as say by the last may-write instruction before a + // return, the pointer cannot have been captured. This is true because + // readonly instructions cannot change the capture state. So, if all the + // instructions following this callsite are readonly, then callsite could + // not have left the pointer captured. + const BasicBlock *BB = CB->getParent(); + if (!BB) + return; + + // Make sure this BB ends in a return. + const Instruction *ITerm = BB->getTerminator(); + if (!ITerm || !isa(ITerm)) + return; + + // Get caller. + const Function *PF = BB->getParent(); + if (!PF) + return; + + // See if caller has any nocapture arguments we may be able to propegate + // attributes from. + for (unsigned I = 0; I < PF->arg_size(); ++I) + if (PF->getArg(I)->hasNoCaptureAttr()) + NoCaptureParentArguments.insert(PF->getArg(I)); + + unsigned N = 0; + for (Value *V : CB->args()) { + // See if this callsite argument is missing nocapture and its propegatable + // (nocapture in the caller). + if (!CB->paramHasAttr(N, Attribute::NoCapture) && + NoCaptureParentArguments.contains(V)) + NoCaptureArgs->push_back(N); + ++N; + } + + if (NoCaptureArgs->empty()) + return; + + // Finally, if we actually have some arguments that we will be able to + // propegate nocapture too, see if this callsite is a candidate (only + // readonly instructions between it and return). + + // Limit maximum amount of instructions we will check. The primary benefit + // of this combine is for smaller functions that will be inlined + // (potentially losing nocapture information), so a relatively small + // threshhold should be sufficient. + const unsigned kMaxInstsChecks = 25; + for (unsigned Cnt = 0; Cnt < kMaxInstsChecks; ++Cnt) { + ITerm = ITerm->getPrevNode(); + if (ITerm == nullptr || ITerm == CB) + return; + + if (ITerm->mayWriteToMemory()) + break; + } + + NoCaptureArgs->clear(); + return; +} + /// Improvements for call, callbr and invoke instructions. Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) { bool Changed = annotateAnyAllocSite(Call, &TLI); @@ -3385,24 +3476,34 @@ // Mark any parameters that are known to be non-null with the nonnull // attribute. This is helpful for inlining calls to functions with null // checks on their arguments. - SmallVector ArgNos; + // Likewise try to mark parameters that are known not captured from parent + // attributes as nocapture. + SmallVector ArgNosNonNull, ArgNosNoCapture; unsigned ArgNo = 0; for (Value *V : Call.args()) { - if (V->getType()->isPointerTy() && - !Call.paramHasAttr(ArgNo, Attribute::NonNull) && - isKnownNonZero(V, DL, 0, &AC, &Call, &DT)) - ArgNos.push_back(ArgNo); + if (V->getType()->isPointerTy()) { + if (!Call.paramHasAttr(ArgNo, Attribute::NonNull) && + isKnownNonZero(V, DL, 0, &AC, &Call, &DT)) + ArgNosNonNull.push_back(ArgNo); + } + ArgNo++; } + getNoCapturePropegations(&Call, &ArgNosNoCapture); + assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly."); - if (!ArgNos.empty()) { + if (!ArgNosNonNull.empty() || !ArgNosNoCapture.empty()) { AttributeList AS = Call.getAttributes(); LLVMContext &Ctx = Call.getContext(); - AS = AS.addParamAttribute(Ctx, ArgNos, - Attribute::get(Ctx, Attribute::NonNull)); + if (!ArgNosNonNull.empty()) + AS = AS.addParamAttribute(Ctx, ArgNosNonNull, + Attribute::get(Ctx, Attribute::NonNull)); + if (!ArgNosNoCapture.empty()) + AS = AS.addParamAttribute(Ctx, ArgNosNoCapture, + Attribute::get(Ctx, Attribute::NoCapture)); Call.setAttributes(AS); Changed = true; } diff --git a/llvm/test/Transforms/InstCombine/nocapture-attribute.ll b/llvm/test/Transforms/InstCombine/nocapture-attribute.ll --- a/llvm/test/Transforms/InstCombine/nocapture-attribute.ll +++ b/llvm/test/Transforms/InstCombine/nocapture-attribute.ll @@ -19,7 +19,7 @@ define void @simple_propegated_a0_nocapture_a1_a2_maybe_capture(ptr nocapture %a0, ptr %a1, ptr %a2) local_unnamed_addr { ; CHECK-LABEL: define void @simple_propegated_a0_nocapture_a1_a2_maybe_capture ; CHECK-SAME: (ptr nocapture [[A0:%.*]], ptr [[A1:%.*]], ptr [[A2:%.*]]) local_unnamed_addr { -; CHECK-NEXT: tail call void @ptrs_maybe_capture(ptr [[A0]], ptr [[A1]], ptr [[A2]]) +; CHECK-NEXT: tail call void @ptrs_maybe_capture(ptr nocapture [[A0]], ptr [[A1]], ptr [[A2]]) ; CHECK-NEXT: ret void ; tail call void @ptrs_maybe_capture(ptr %a0, ptr %a1, ptr %a2) @@ -39,7 +39,7 @@ define i64 @propegate_past_trivially_read_only(ptr nocapture %a0, i64 %r) local_unnamed_addr { ; CHECK-LABEL: define i64 @propegate_past_trivially_read_only ; CHECK-SAME: (ptr nocapture [[A0:%.*]], i64 [[R:%.*]]) local_unnamed_addr { -; CHECK-NEXT: call void @ptrs_maybe_capture(ptr [[A0]], ptr [[A0]], ptr [[A0]]) +; CHECK-NEXT: call void @ptrs_maybe_capture(ptr nocapture [[A0]], ptr nocapture [[A0]], ptr nocapture [[A0]]) ; CHECK-NEXT: [[R0:%.*]] = mul i64 [[R]], [[R]] ; CHECK-NEXT: [[R1:%.*]] = mul i64 [[R0]], [[R0]] ; CHECK-NEXT: [[R2:%.*]] = shl i64 [[R1]], 1 @@ -86,7 +86,7 @@ ; CHECK-LABEL: define i64 @propegate_past_rd_function_alloca ; CHECK-SAME: (ptr nocapture [[A0:%.*]], i64 [[R:%.*]]) local_unnamed_addr { ; CHECK-NEXT: [[PN:%.*]] = alloca i64, align 8 -; CHECK-NEXT: call void @ptrs_maybe_capture(ptr [[A0]], ptr [[A0]], ptr [[A0]]) +; CHECK-NEXT: call void @ptrs_maybe_capture(ptr nocapture [[A0]], ptr nocapture [[A0]], ptr nocapture [[A0]]) ; CHECK-NEXT: [[R0:%.*]] = mul i64 [[R]], [[R]] ; CHECK-NEXT: [[R1:%.*]] = mul i64 [[R0]], [[R0]] ; CHECK-NEXT: [[R2:%.*]] = shl i64 [[R1]], 1 @@ -157,7 +157,7 @@ ; CHECK-SAME: (ptr nocapture [[A0:%.*]], i64 [[R:%.*]]) { ; CHECK-NEXT: [[A:%.*]] = alloca ptr, align 8 ; CHECK-NEXT: call void @ptr_maybe_capture(ptr [[A0]]) -; CHECK-NEXT: call void @ptrs_maybe_capture(ptr [[A0]], ptr nonnull [[A]], ptr nonnull [[A]]) +; CHECK-NEXT: call void @ptrs_maybe_capture(ptr nocapture [[A0]], ptr nonnull [[A]], ptr nonnull [[A]]) ; CHECK-NEXT: ret void ; %a = alloca ptr @@ -170,8 +170,8 @@ ; CHECK-LABEL: define i32 @propegate_past_dead_alloca2 ; CHECK-SAME: (ptr nocapture [[A0:%.*]], i64 [[R:%.*]]) { ; CHECK-NEXT: [[A:%.*]] = alloca ptr, align 8 -; CHECK-NEXT: call void @ptrs_maybe_capture(ptr [[A0]], ptr nonnull [[A]], ptr nonnull [[A]]) -; CHECK-NEXT: call void @ptr_maybe_capture(ptr [[A0]]) #[[ATTR0]] +; CHECK-NEXT: call void @ptrs_maybe_capture(ptr nocapture [[A0]], ptr nonnull [[A]], ptr nonnull [[A]]) +; CHECK-NEXT: call void @ptr_maybe_capture(ptr nocapture [[A0]]) #[[ATTR0]] ; CHECK-NEXT: ret i32 0 ; %a = alloca ptr @@ -185,7 +185,7 @@ ; CHECK-SAME: (ptr nocapture [[A0:%.*]], i64 [[R:%.*]]) { ; CHECK-NEXT: [[A:%.*]] = alloca ptr, align 8 ; CHECK-NEXT: call void @ptrs_maybe_capture(ptr [[A0]], ptr nonnull [[A]], ptr nonnull [[A]]) -; CHECK-NEXT: call void @ptrs_maybe_capture(ptr [[A0]], ptr nonnull [[A]], ptr nonnull [[A]]) +; CHECK-NEXT: call void @ptrs_maybe_capture(ptr nocapture [[A0]], ptr nonnull [[A]], ptr nonnull [[A]]) ; CHECK-NEXT: ret i32 0 ; %a = alloca ptr @@ -235,7 +235,7 @@ define i64 @propegate_return(ptr nocapture %a0) { ; CHECK-LABEL: define i64 @propegate_return ; CHECK-SAME: (ptr nocapture [[A0:%.*]]) { -; CHECK-NEXT: [[R:%.*]] = call i64 @ptr_maybe_capture.i64(ptr [[A0]]) +; CHECK-NEXT: [[R:%.*]] = call i64 @ptr_maybe_capture.i64(ptr nocapture [[A0]]) ; CHECK-NEXT: [[RR:%.*]] = mul i64 [[R]], [[R]] ; CHECK-NEXT: ret i64 [[RR]] ;