diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -85,6 +85,16 @@ cl::init(true), cl::Hidden, cl::desc("Convert align attributes to assumptions during inlining.")); +static cl::opt UpdateReturnAttributes( + "update-return-attrs", cl::init(true), cl::Hidden, + cl::desc("Update return attributes on calls within inlined body")); + +static cl::opt InlinerAttributeWindow( + "max-inst-checked-for-throw-during-inlining", cl::Hidden, + cl::desc("the maximum number of instructions analyzed for may throw during " + "attribute inference in inlined body"), + cl::init(4)); + llvm::InlineResult llvm::InlineFunction(CallBase *CB, InlineFunctionInfo &IFI, AAResults *CalleeAAR, bool InsertLifetime) { @@ -1136,6 +1146,97 @@ } } +static bool MayContainThrowingOrExitingCall(Instruction *Begin, + Instruction *End) { + + assert(Begin->getParent() == End->getParent() && + "Expected to be in same basic block!"); + unsigned NumInstChecked = 0; + // Check that all instructions in the range [Begin, End) are guaranteed to + // transfer execution to successor. + for (auto &I : make_range(Begin->getIterator(), End->getIterator())) + if (NumInstChecked++ > InlinerAttributeWindow || + !isGuaranteedToTransferExecutionToSuccessor(&I)) + return true; + return false; +} + +static AttrBuilder IdentifyValidAttributes(CallSite CS) { + + AttrBuilder AB(CS.getAttributes(), AttributeList::ReturnIndex); + if (AB.empty()) + return AB; + AttrBuilder Valid; + // Only allow these white listed attributes to be propagated back to the + // callee. This is because other attributes may only be valid on the call + // itself, i.e. attributes such as signext and zeroext. + if (auto DerefBytes = AB.getDereferenceableBytes()) + Valid.addDereferenceableAttr(DerefBytes); + if (auto DerefOrNullBytes = AB.getDereferenceableOrNullBytes()) + Valid.addDereferenceableOrNullAttr(DerefOrNullBytes); + if (AB.contains(Attribute::NoAlias)) + Valid.addAttribute(Attribute::NoAlias); + if (AB.contains(Attribute::NonNull)) + Valid.addAttribute(Attribute::NonNull); + return Valid; +} + +static void AddReturnAttributes(CallSite CS, ValueToValueMapTy &VMap) { + if (!UpdateReturnAttributes) + return; + + AttrBuilder Valid = IdentifyValidAttributes(CS); + if (Valid.empty()) + return; + auto *CalledFunction = CS.getCalledFunction(); + auto &Context = CalledFunction->getContext(); + + for (auto &BB : *CalledFunction) { + auto *RI = dyn_cast(BB.getTerminator()); + if (!RI || !isa(RI->getOperand(0))) + continue; + auto *RetVal = cast(RI->getOperand(0)); + // Sanity check that the cloned RetVal exists and is a call, otherwise we + // cannot add the attributes on the cloned RetVal. + // Simplification during inlining could have transformed the cloned + // instruction. + auto *NewRetVal = dyn_cast_or_null(VMap.lookup(RetVal)); + if (!NewRetVal) + continue; + // Backward propagation of attributes to the returned value may be incorrect + // if it is control flow dependent. + // Consider: + // @callee { + // %rv = call @foo() + // %rv2 = call @bar() + // if (%rv2 != null) + // return %rv2 + // if (%rv == null) + // exit() + // return %rv + // } + // caller() { + // %val = call nonnull @callee() + // } + // Here we cannot add the nonnull attribute on either foo or bar. So, we + // limit the check to both RetVal and RI are in the same basic block and + // there are no throwing/exiting instructions between these instructions. + if (RI->getParent() != RetVal->getParent() || + MayContainThrowingOrExitingCall(RetVal, RI)) + continue; + // Add to the existing attributes of NewRetVal, i.e. the cloned call + // instruction. + // NB! When we have the same attribute already existing on NewRetVal, but + // with a differing value, the AttributeList's merge API honours the already + // existing attribute value (i.e. attributes such as dereferenceable, + // dereferenceable_or_null etc). See AttrBuilder::merge for more details. + AttributeList AL = NewRetVal->getAttributes(); + AttributeList NewAL = + AL.addAttributes(Context, AttributeList::ReturnIndex, Valid); + NewRetVal->setAttributes(NewAL); + } +} + /// If the inlined function has non-byval align arguments, then /// add @llvm.assume-based alignment assumptions to preserve this information. static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) { @@ -1801,6 +1902,10 @@ // Add noalias metadata if necessary. AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR); + // Clone return attributes on the callsite into the calls within the inlined + // function which feed into its return value. + AddReturnAttributes(CS, VMap); + // Propagate llvm.mem.parallel_loop_access if necessary. PropagateParallelLoopAccessMetadata(CS, VMap); diff --git a/llvm/test/Transforms/Inline/ret_attr_update.ll b/llvm/test/Transforms/Inline/ret_attr_update.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/ret_attr_update.ll @@ -0,0 +1,223 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -inline-threshold=0 -always-inline -S | FileCheck %s +; RUN: opt < %s -passes=always-inline -S | FileCheck %s + +declare i8* @foo(i8*) argmemonly nounwind + +define i8* @callee(i8 *%p) alwaysinline { +; CHECK-LABEL: @callee( +; CHECK-NEXT: [[R:%.*]] = call i8* @foo(i8* noalias [[P:%.*]]) +; CHECK-NEXT: ret i8* [[R]] +; + %r = call i8* @foo(i8* noalias %p) + ret i8* %r +} + +define i8* @caller(i8* %ptr, i64 %x) { +; CHECK-LABEL: @caller( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 [[X:%.*]] +; CHECK-NEXT: [[R_I:%.*]] = call nonnull i8* @foo(i8* noalias [[GEP]]) +; CHECK-NEXT: ret i8* [[R_I]] +; + %gep = getelementptr inbounds i8, i8* %ptr, i64 %x + %p = call nonnull i8* @callee(i8* %gep) + ret i8* %p +} + +declare void @llvm.experimental.guard(i1,...) +; Cannot add nonnull attribute to foo +; because the guard is a throwing call +define internal i8* @callee_with_throwable(i8* %p) alwaysinline { + %r = call i8* @foo(i8* %p) + %cond = icmp ne i8* %r, null + call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ] + ret i8* %r +} + +declare i8* @bar(i8*) readonly nounwind +; Here also we cannot add nonnull attribute to the call bar. +define internal i8* @callee_with_explicit_control_flow(i8* %p) alwaysinline { + %r = call i8* @bar(i8* %p) + %cond = icmp ne i8* %r, null + br i1 %cond, label %ret, label %orig + +ret: + ret i8* %r + +orig: + ret i8* %p +} + +define i8* @caller2(i8* %ptr, i64 %x, i1 %cond) { +; CHECK-LABEL: @caller2( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 [[X:%.*]] +; CHECK-NEXT: [[R_I:%.*]] = call i8* @foo(i8* [[GEP]]) +; CHECK-NEXT: [[COND_I:%.*]] = icmp ne i8* [[R_I]], null +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[COND_I]]) [ "deopt"() ] +; CHECK-NEXT: [[R_I1:%.*]] = call i8* @bar(i8* [[GEP]]) +; CHECK-NEXT: [[COND_I2:%.*]] = icmp ne i8* [[R_I1]], null +; CHECK-NEXT: br i1 [[COND_I2]], label [[RET_I:%.*]], label [[ORIG_I:%.*]] +; CHECK: ret.i: +; CHECK-NEXT: br label [[CALLEE_WITH_EXPLICIT_CONTROL_FLOW_EXIT:%.*]] +; CHECK: orig.i: +; CHECK-NEXT: br label [[CALLEE_WITH_EXPLICIT_CONTROL_FLOW_EXIT]] +; CHECK: callee_with_explicit_control_flow.exit: +; CHECK-NEXT: [[Q3:%.*]] = phi i8* [ [[R_I1]], [[RET_I]] ], [ [[GEP]], [[ORIG_I]] ] +; CHECK-NEXT: br i1 [[COND:%.*]], label [[PRET:%.*]], label [[QRET:%.*]] +; CHECK: pret: +; CHECK-NEXT: ret i8* [[R_I]] +; CHECK: qret: +; CHECK-NEXT: ret i8* [[Q3]] +; + %gep = getelementptr inbounds i8, i8* %ptr, i64 %x + %p = call nonnull i8* @callee_with_throwable(i8* %gep) + %q = call nonnull i8* @callee_with_explicit_control_flow(i8* %gep) + br i1 %cond, label %pret, label %qret + +pret: + ret i8* %p + +qret: + ret i8* %q +} + +define internal i8* @callee3(i8 *%p) alwaysinline { + %r = call noalias i8* @foo(i8* %p) + ret i8* %r +} + +; add the deref attribute to the existing attributes on foo. +define i8* @caller3(i8* %ptr, i64 %x) { +; CHECK-LABEL: @caller3( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 [[X:%.*]] +; CHECK-NEXT: [[R_I:%.*]] = call noalias dereferenceable_or_null(12) i8* @foo(i8* [[GEP]]) +; CHECK-NEXT: ret i8* [[R_I]] +; + %gep = getelementptr inbounds i8, i8* %ptr, i64 %x + %p = call dereferenceable_or_null(12) i8* @callee3(i8* %gep) + ret i8* %p +} + +declare i8* @inf_loop_call(i8*) nounwind +; We cannot propagate attributes to foo because we do not know whether inf_loop_call +; will return execution. +define internal i8* @callee_with_sideeffect_callsite(i8* %p) alwaysinline { + %r = call i8* @foo(i8* %p) + %v = call i8* @inf_loop_call(i8* %p) + ret i8* %r +} + +; do not add deref attribute to foo +define i8* @test4(i8* %ptr, i64 %x) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 [[X:%.*]] +; CHECK-NEXT: [[R_I:%.*]] = call i8* @foo(i8* [[GEP]]) +; CHECK-NEXT: [[V_I:%.*]] = call i8* @inf_loop_call(i8* [[GEP]]) +; CHECK-NEXT: ret i8* [[R_I]] +; + %gep = getelementptr inbounds i8, i8* %ptr, i64 %x + %p = call dereferenceable_or_null(12) i8* @callee_with_sideeffect_callsite(i8* %gep) + ret i8* %p +} + +declare i8* @baz(i8*) nounwind readonly +define internal i8* @callee5(i8* %p) alwaysinline { + %r = call i8* @foo(i8* %p) + %v = call i8* @baz(i8* %p) + ret i8* %r +} + +; add the deref attribute to foo. +define i8* @test5(i8* %ptr, i64 %x) { +; CHECK-LABEL: @test5( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 [[X:%.*]] +; CHECK-NEXT: [[R_I:%.*]] = call dereferenceable_or_null(12) i8* @foo(i8* [[GEP]]) +; CHECK-NEXT: [[V_I:%.*]] = call i8* @baz(i8* [[GEP]]) +; CHECK-NEXT: ret i8* [[R_I]] +; + %gep = getelementptr inbounds i8, i8* %ptr, i64 %x + %s = call dereferenceable_or_null(12) i8* @callee5(i8* %gep) + ret i8* %s +} + +; deref attributes have different values on the callee and the call feeding into +; the return. +; AttrBuilder chooses the already existing value and does not overwrite it. +define internal i8* @callee6(i8* %p) alwaysinline { + %r = call dereferenceable_or_null(16) i8* @foo(i8* %p) + %v = call i8* @baz(i8* %p) + ret i8* %r +} + + +define i8* @test6(i8* %ptr, i64 %x) { +; CHECK-LABEL: @test6( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 [[X:%.*]] +; CHECK-NEXT: [[R_I:%.*]] = call dereferenceable_or_null(16) i8* @foo(i8* [[GEP]]) +; CHECK-NEXT: [[V_I:%.*]] = call i8* @baz(i8* [[GEP]]) +; CHECK-NEXT: ret i8* [[R_I]] +; + %gep = getelementptr inbounds i8, i8* %ptr, i64 %x + %s = call dereferenceable_or_null(12) i8* @callee6(i8* %gep) + ret i8* %s +} + +; We add the attributes from the callee to both the calls below. +define internal i8* @callee7(i8 *%ptr, i1 %cond) alwaysinline { + br i1 %cond, label %pass, label %fail + +pass: + %r = call i8* @foo(i8* noalias %ptr) + ret i8* %r + +fail: + %s = call i8* @baz(i8* %ptr) + ret i8* %s +} + +define void @test7(i8* %ptr, i64 %x, i1 %cond) { +; CHECK-LABEL: @test7( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 [[X:%.*]] +; CHECK-NEXT: br i1 [[COND:%.*]], label [[PASS_I:%.*]], label [[FAIL_I:%.*]] +; CHECK: pass.i: +; CHECK-NEXT: [[R_I:%.*]] = call nonnull i8* @foo(i8* noalias [[GEP]]) +; CHECK-NEXT: br label [[CALLEE7_EXIT:%.*]] +; CHECK: fail.i: +; CHECK-NEXT: [[S_I:%.*]] = call nonnull i8* @baz(i8* [[GEP]]) +; CHECK-NEXT: br label [[CALLEE7_EXIT]] +; CHECK: callee7.exit: +; CHECK-NEXT: [[T1:%.*]] = phi i8* [ [[R_I]], [[PASS_I]] ], [ [[S_I]], [[FAIL_I]] ] +; CHECK-NEXT: call void @snort(i8* [[T1]]) +; CHECK-NEXT: ret void +; + + %gep = getelementptr inbounds i8, i8* %ptr, i64 %x + %t = call nonnull i8* @callee7(i8* %gep, i1 %cond) + call void @snort(i8* %t) + ret void +} +declare void @snort(i8*) + +declare i32 @intrinsic(i8*) nounwind argmemonly + +define internal i32 @callee8(i8* %ptr) alwaysinline { + %r = call i32 @intrinsic(i8* noalias %ptr) + ret i32 %r +} + + +; signext is an attribute specific to the target ABI and not the +; callee/callsite. +; We cannot propagate that attribute to another call since it can be invalid at +; that call. +define i32 @test8(i8* %ptr, i64 %x) { +; CHECK-LABEL: @test8( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 [[X:%.*]] +; CHECK-NEXT: [[R_I:%.*]] = call i32 @intrinsic(i8* noalias [[GEP]]) +; CHECK-NEXT: ret i32 [[R_I]] +; + + %gep = getelementptr inbounds i8, i8* %ptr, i64 %x + %t = call signext i32 @callee8(i8* %gep) + ret i32 %t +}