diff --git a/llvm/include/llvm/IR/CallSite.h b/llvm/include/llvm/IR/CallSite.h --- a/llvm/include/llvm/IR/CallSite.h +++ b/llvm/include/llvm/IR/CallSite.h @@ -693,6 +693,18 @@ User::op_iterator getCallee() const; }; +/// Establish a view to a call site for examination. +class ImmutableCallSite : public CallSiteBase<> { +public: + ImmutableCallSite() = default; + ImmutableCallSite(const CallInst *CI) : CallSiteBase(CI) {} + ImmutableCallSite(const InvokeInst *II) : CallSiteBase(II) {} + ImmutableCallSite(const CallBrInst *CBI) : CallSiteBase(CBI) {} + explicit ImmutableCallSite(const Instruction *II) : CallSiteBase(II) {} + explicit ImmutableCallSite(const Value *V) : CallSiteBase(V) {} + ImmutableCallSite(CallSite CS) : CallSiteBase(CS.getInstruction()) {} +}; + /// AbstractCallSite /// /// An abstract call site is a wrapper that allows to treat direct, @@ -765,6 +777,13 @@ /// as well as the callee of the abstract call site. AbstractCallSite(const Use *U); + /// Add operand uses of \p ICS that represent callback uses into \p CBUses. + /// + /// All uses added to \p CBUses can be used to create abstract call sites for + /// which AbstractCallSite::isCallbackCall() will return true. + static void getCallbackUses(ImmutableCallSite ICS, + SmallVectorImpl &CBUses); + /// Conversion operator to conveniently check for a valid/initialized ACS. explicit operator bool() const { return (bool)CS; } @@ -902,18 +921,6 @@ } }; -/// Establish a view to a call site for examination. -class ImmutableCallSite : public CallSiteBase<> { -public: - ImmutableCallSite() = default; - ImmutableCallSite(const CallInst *CI) : CallSiteBase(CI) {} - ImmutableCallSite(const InvokeInst *II) : CallSiteBase(II) {} - ImmutableCallSite(const CallBrInst *CBI) : CallSiteBase(CBI) {} - explicit ImmutableCallSite(const Instruction *II) : CallSiteBase(II) {} - explicit ImmutableCallSite(const Value *V) : CallSiteBase(V) {} - ImmutableCallSite(CallSite CS) : CallSiteBase(CS.getInstruction()) {} -}; - } // end namespace llvm #endif // LLVM_IR_CALLSITE_H diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -288,17 +288,7 @@ /// Return the associated argument, if any. /// ///{ - Argument *getAssociatedArgument() { - if (auto *Arg = dyn_cast(&getAnchorValue())) - return Arg; - int ArgNo = getArgNo(); - if (ArgNo < 0) - return nullptr; - Function *AssociatedFn = getAssociatedFunction(); - if (!AssociatedFn || AssociatedFn->arg_size() <= unsigned(ArgNo)) - return nullptr; - return AssociatedFn->arg_begin() + ArgNo; - } + Argument *getAssociatedArgument(); const Argument *getAssociatedArgument() const { return const_cast(this)->getAssociatedArgument(); } diff --git a/llvm/lib/IR/AbstractCallSite.cpp b/llvm/lib/IR/AbstractCallSite.cpp --- a/llvm/lib/IR/AbstractCallSite.cpp +++ b/llvm/lib/IR/AbstractCallSite.cpp @@ -33,6 +33,25 @@ STATISTIC(NumInvalidAbstractCallSitesNoCallback, "Number of invalid abstract call sites created (no callback)"); +void AbstractCallSite::getCallbackUses(ImmutableCallSite ICS, + SmallVectorImpl &CBUses) { + const Function *Callee = ICS.getCalledFunction(); + if (!Callee) + return; + + MDNode *CallbackMD = Callee->getMetadata(LLVMContext::MD_callback); + if (!CallbackMD) + return; + + for (const MDOperand &Op : CallbackMD->operands()) { + MDNode *OpMD = cast(Op.get()); + auto *CBCalleeIdxAsCM = cast(OpMD->getOperand(0)); + uint64_t CBCalleeIdx = + cast(CBCalleeIdxAsCM->getValue())->getZExtValue(); + CBUses.push_back(ICS.arg_begin() + CBCalleeIdx); + } +} + /// Create an abstract call site from a use. AbstractCallSite::AbstractCallSite(const Use *U) : CS(U->getUser()) { diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -153,6 +153,51 @@ } ///} +Argument *IRPosition::getAssociatedArgument() { + if (getPositionKind() == IRP_ARGUMENT) + return cast(&getAnchorValue()); + + // Not an Argument and no argument number means this is not a call site + // argument, thus we cannot find a callback argument to return. + int ArgNo = getArgNo(); + if (ArgNo < 0) + return nullptr; + + // Use abstract call sites to make the connection between the call site + // values and the ones in callbacks. If a callback was found that makes use + // of the underlying call site operand, we want the corresponding callback + // callee argument and not the direct callee argument. + SmallVector CBUses; + ImmutableCallSite ICS(&getAnchorValue()); + AbstractCallSite::getCallbackUses(ICS, CBUses); + for (const Use *U : CBUses) { + AbstractCallSite ACS(U); + assert(ACS && ACS.isCallbackCall()); + if (!ACS.getCalledFunction()) + continue; + + for (unsigned u = 0, e = ACS.getNumArgOperands(); u < e; u++) { + + // Test if the underlying call site operand is argument number u of the + // callback callee. + if (ACS.getCallArgOperandNo(u) != ArgNo) + continue; + + assert(ACS.getCalledFunction()->arg_size() > u && + "ACS mapped into var-args arguments!"); + return ACS.getCalledFunction()->getArg(u); + } + } + + // If no callbacks were found, or none used the underlying call site operand + // exclusively, use the direct callee argument if available. + const Function *Callee = ICS.getCalledFunction(); + if (Callee && Callee->arg_size() > unsigned(ArgNo)) + return Callee->getArg(ArgNo); + + return nullptr; +} + /// Recursively visit all values that might become \p IRP at some point. This /// will be done by looking through cast instructions, selects, phis, and calls /// with the "returned" attribute. Once we cannot look through the value any @@ -1930,8 +1975,43 @@ /// NoAlias attribute for an argument. struct AANoAliasArgument final : AAArgumentFromCallSiteArguments { - AANoAliasArgument(const IRPosition &IRP) - : AAArgumentFromCallSiteArguments(IRP) {} + using Base = AAArgumentFromCallSiteArguments; + AANoAliasArgument(const IRPosition &IRP) : Base(IRP) {} + + /// See AbstractAttribute::update(...). + ChangeStatus updateImpl(Attributor &A) override { + // We have to make sure no-alias on the argument does not break + // synchronization when this is a callback argument, see also [1] below. + // If synchronization cannot be affected, we delegate to the base updateImpl + // function, otherwise we give up for now. + + // If the function is no-sync, no-alias cannot break synchronization. + const auto &NoSyncAA = A.getAAFor( + *this, IRPosition::function_scope(getIRPosition())); + if (NoSyncAA.isAssumedNoSync()) + return Base::updateImpl(A); + + // If the argument is read-only, no-alias cannot break synchronization. + const auto &MemBehaviorAA = + A.getAAFor(*this, getIRPosition()); + if (MemBehaviorAA.isAssumedReadOnly()) + return Base::updateImpl(A); + + // If the argument is never passed through callbacks, no-alias cannot break + // synchronization. + if (A.checkForAllCallSites( + [](AbstractCallSite ACS) { return !ACS.isCallbackCall(); }, *this, + true)) + return Base::updateImpl(A); + + // TODO: add no-alias but make sure it doesn't break synchronization by + // introducing fake uses. See: + // [1] Compiler Optimizations for OpenMP, J. Doerfert and H. Finkel, + // International Workshop on OpenMP 2018, + // http://compilers.cs.uni-saarland.de/people/doerfert/par_opt18.pdf + + return indicatePessimisticFixpoint(); + } /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(noalias) } @@ -1984,6 +2064,7 @@ // (iii) Check there is no other pointer argument which could alias with the // value. + // TODO: AbstractCallSite ImmutableCallSite ICS(&getAnchorValue()); for (unsigned i = 0; i < ICS.getNumArgOperands(); i++) { if (getArgNo() == (int)i) diff --git a/llvm/test/Transforms/FunctionAttrs/callbacks.ll b/llvm/test/Transforms/FunctionAttrs/callbacks.ll --- a/llvm/test/Transforms/FunctionAttrs/callbacks.ll +++ b/llvm/test/Transforms/FunctionAttrs/callbacks.ll @@ -1,7 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; FIXME: Add -attributor-max-iterations-verify -attributor-max-iterations below. -; This flag was removed because max iterations is 2 in most cases, but in windows it is 1. -; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false < %s | FileCheck %s +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s ; ModuleID = 'callback_simple.c' target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" @@ -13,10 +11,11 @@ ; each other but argument 3-5 of the transitive call site in the caller match ; arguments 2-4 of the callback callee. Here we should see information and value ; transfer in both directions. -; FIXME: The callee -> call site direction is not working yet. +; FIXME: %a should be align 256 in the callback and at the call site define void @t0_caller(i32* %a) { -; CHECK-LABEL: @t0_caller( +; CHECK-LABEL: define {{[^@]+}}@t0_caller +; CHECK-SAME: (i32* [[A:%.*]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 32 ; CHECK-NEXT: [[C:%.*]] = alloca i32*, align 64 @@ -24,7 +23,7 @@ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; CHECK-NEXT: store i32 42, i32* [[B]], align 32 ; CHECK-NEXT: store i32* [[B]], i32** [[C]], align 64 -; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* [[A:%.*]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]]) +; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* [[A]], i64 99, i32** noalias nocapture nonnull align 64 dereferenceable(8) [[C]]) ; CHECK-NEXT: ret void ; entry: @@ -41,12 +40,13 @@ ; Note that the first two arguments are provided by the callback_broker according to the callback in !1 below! ; The others are annotated with alignment information, amongst others, or even replaced by the constants passed to the call. define internal void @t0_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) { -; CHECK-LABEL: @t0_callback_callee( +; CHECK-LABEL: define {{[^@]+}}@t0_callback_callee +; CHECK-SAME: (i32* nocapture nonnull writeonly dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly dereferenceable(4) [[PTR:%.*]], i32* [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) ; CHECK-NEXT: entry: -; CHECK-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR:%.*]], align 8 -; CHECK-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL:%.*]] -; CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C:%.*]], align 64 -; CHECK-NEXT: tail call void @t0_check(i32* align 256 [[A:%.*]], i64 99, i32* [[TMP0]]) +; CHECK-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 +; CHECK-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64 +; CHECK-NEXT: tail call void @t0_check(i32* align 256 [[A]], i64 99, i32* [[TMP0]]) ; CHECK-NEXT: ret void ; entry: @@ -61,5 +61,112 @@ declare !callback !0 void @t0_callback_broker(i32*, i32*, void (i32*, i32*, ...)*, ...) +; Test 1 +; +; Similar to test 0 but with some additional annotations (noalias/nocapute) to make sure +; we deduce and propagate noalias and others properly. + +define void @t1_caller(i32* noalias %a) { +; CHECK-LABEL: define {{[^@]+}}@t1_caller +; CHECK-SAME: (i32* noalias nocapture [[A:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 32 +; CHECK-NEXT: [[C:%.*]] = alloca i32*, align 64 +; CHECK-NEXT: [[PTR:%.*]] = alloca i32, align 128 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: store i32 42, i32* [[B]], align 32 +; CHECK-NEXT: store i32* [[B]], i32** [[C]], align 64 +; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture [[A]], i64 99, i32** noalias nocapture nonnull align 64 dereferenceable(8) [[C]]) +; CHECK-NEXT: ret void +; +entry: + %b = alloca i32, align 32 + %c = alloca i32*, align 64 + %ptr = alloca i32, align 128 + %0 = bitcast i32* %b to i8* + store i32 42, i32* %b, align 4 + store i32* %b, i32** %c, align 8 + call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* null, i32* %ptr, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* %a, i64 99, i32** %c) + ret void +} + +; Note that the first two arguments are provided by the callback_broker according to the callback in !1 below! +; The others are annotated with alignment information, amongst others, or even replaced by the constants passed to the call. +define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) { +; CHECK-LABEL: define {{[^@]+}}@t1_callback_callee +; CHECK-SAME: (i32* nocapture nonnull writeonly dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly dereferenceable(4) [[PTR:%.*]], i32* noalias nocapture [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 +; CHECK-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64 +; CHECK-NEXT: tail call void @t1_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]]) +; CHECK-NEXT: ret void +; +entry: + %ptr_val = load i32, i32* %ptr, align 8 + store i32 %ptr_val, i32* %is_not_null + %0 = load i32*, i32** %c, align 8 + tail call void @t1_check(i32* %a, i64 %b, i32* %0) + ret void +} + +declare void @t1_check(i32* nocapture align 256, i64, i32* nocapture) nosync + +declare !callback !0 void @t1_callback_broker(i32* nocapture , i32* nocapture , void (i32*, i32*, ...)* nocapture, ...) + +; Test 2 +; +; Similar to test 1 but checking that the noalias is only placed if potential synchronization through @t2_check is preserved. + +define void @t2_caller(i32* noalias %a) { +; CHECK-LABEL: define {{[^@]+}}@t2_caller +; CHECK-SAME: (i32* noalias nocapture [[A:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 32 +; CHECK-NEXT: [[C:%.*]] = alloca i32*, align 64 +; CHECK-NEXT: [[PTR:%.*]] = alloca i32, align 128 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: store i32 42, i32* [[B]], align 32 +; CHECK-NEXT: store i32* [[B]], i32** [[C]], align 64 +; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture [[A]], i64 99, i32** noalias nocapture nonnull align 64 dereferenceable(8) [[C]]) +; CHECK-NEXT: ret void +; +entry: + %b = alloca i32, align 32 + %c = alloca i32*, align 64 + %ptr = alloca i32, align 128 + %0 = bitcast i32* %b to i8* + store i32 42, i32* %b, align 4 + store i32* %b, i32** %c, align 8 + call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* null, i32* %ptr, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* %a, i64 99, i32** %c) + ret void +} + +; Note that the first two arguments are provided by the callback_broker according to the callback in !1 below! +; The others are annotated with alignment information, amongst others, or even replaced by the constants passed to the call. +; +; FIXME: We should derive noalias for %a and add a "fake use" of %a in all potentially synchronizing calls. +define internal void @t2_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) { +; CHECK-LABEL: define {{[^@]+}}@t2_callback_callee +; CHECK-SAME: (i32* nocapture nonnull writeonly dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly dereferenceable(4) [[PTR:%.*]], i32* nocapture [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 +; CHECK-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64 +; CHECK-NEXT: tail call void @t2_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]]) +; CHECK-NEXT: ret void +; +entry: + %ptr_val = load i32, i32* %ptr, align 8 + store i32 %ptr_val, i32* %is_not_null + %0 = load i32*, i32** %c, align 8 + tail call void @t2_check(i32* %a, i64 %b, i32* %0) + ret void +} + +declare void @t2_check(i32* nocapture align 256, i64, i32* nocapture) + +declare !callback !0 void @t2_callback_broker(i32* nocapture , i32* nocapture , void (i32*, i32*, ...)* nocapture, ...) + !0 = !{!1} !1 = !{i64 2, i64 -1, i64 -1, i1 true}