diff --git a/llvm/include/llvm/IR/CallSite.h b/llvm/include/llvm/IR/CallSite.h --- a/llvm/include/llvm/IR/CallSite.h +++ b/llvm/include/llvm/IR/CallSite.h @@ -693,6 +693,18 @@ User::op_iterator getCallee() const; }; +/// Establish a view to a call site for examination. +class ImmutableCallSite : public CallSiteBase<> { +public: + ImmutableCallSite() = default; + ImmutableCallSite(const CallInst *CI) : CallSiteBase(CI) {} + ImmutableCallSite(const InvokeInst *II) : CallSiteBase(II) {} + ImmutableCallSite(const CallBrInst *CBI) : CallSiteBase(CBI) {} + explicit ImmutableCallSite(const Instruction *II) : CallSiteBase(II) {} + explicit ImmutableCallSite(const Value *V) : CallSiteBase(V) {} + ImmutableCallSite(CallSite CS) : CallSiteBase(CS.getInstruction()) {} +}; + /// AbstractCallSite /// /// An abstract call site is a wrapper that allows to treat direct, @@ -765,6 +777,13 @@ /// as well as the callee of the abstract call site. AbstractCallSite(const Use *U); + /// Add operand uses of \p ICS that represent callback uses into \p CBUses. + /// + /// All uses added to \p CBUses can be used to create abstract call sites for + /// which AbstractCallSite::isCallbackCall() will return true. + static void getCallbackUses(ImmutableCallSite ICS, + SmallVectorImpl &CBUses); + /// Conversion operator to conveniently check for a valid/initialized ACS. explicit operator bool() const { return (bool)CS; } @@ -902,18 +921,6 @@ } }; -/// Establish a view to a call site for examination. -class ImmutableCallSite : public CallSiteBase<> { -public: - ImmutableCallSite() = default; - ImmutableCallSite(const CallInst *CI) : CallSiteBase(CI) {} - ImmutableCallSite(const InvokeInst *II) : CallSiteBase(II) {} - ImmutableCallSite(const CallBrInst *CBI) : CallSiteBase(CBI) {} - explicit ImmutableCallSite(const Instruction *II) : CallSiteBase(II) {} - explicit ImmutableCallSite(const Value *V) : CallSiteBase(V) {} - ImmutableCallSite(CallSite CS) : CallSiteBase(CS.getInstruction()) {} -}; - } // end namespace llvm #endif // LLVM_IR_CALLSITE_H diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -280,17 +280,7 @@ } /// Return the associated argument, if any. - Argument *getAssociatedArgument() const { - if (auto *Arg = dyn_cast(&getAnchorValue())) - return Arg; - int ArgNo = getArgNo(); - if (ArgNo < 0) - return nullptr; - Function *AssociatedFn = getAssociatedFunction(); - if (!AssociatedFn || AssociatedFn->arg_size() <= unsigned(ArgNo)) - return nullptr; - return AssociatedFn->arg_begin() + ArgNo; - } + Argument *getAssociatedArgument() const; /// Return true if the position refers to a function interface, that is the /// function scope, the function return, or an argumnt. diff --git a/llvm/lib/IR/AbstractCallSite.cpp b/llvm/lib/IR/AbstractCallSite.cpp --- a/llvm/lib/IR/AbstractCallSite.cpp +++ b/llvm/lib/IR/AbstractCallSite.cpp @@ -33,6 +33,25 @@ STATISTIC(NumInvalidAbstractCallSitesNoCallback, "Number of invalid abstract call sites created (no callback)"); +void AbstractCallSite::getCallbackUses(ImmutableCallSite ICS, + SmallVectorImpl &CBUses) { + const Function *Callee = ICS.getCalledFunction(); + if (!Callee) + return; + + MDNode *CallbackMD = Callee->getMetadata(LLVMContext::MD_callback); + if (!CallbackMD) + return; + + for (const MDOperand &Op : CallbackMD->operands()) { + MDNode *OpMD = cast(Op.get()); + auto *CBCalleeIdxAsCM = cast(OpMD->getOperand(0)); + uint64_t CBCalleeIdx = + cast(CBCalleeIdxAsCM->getValue())->getZExtValue(); + CBUses.push_back(ICS.arg_begin() + CBCalleeIdx); + } +} + /// Create an abstract call site from a use. AbstractCallSite::AbstractCallSite(const Use *U) : CS(U->getUser()) { diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -183,6 +183,60 @@ } ///} +Argument *IRPosition::getAssociatedArgument() const { + if (getPositionKind() == IRP_ARGUMENT) + return cast(&getAnchorValue()); + + // Not an Argument and no argument number means this is not a call site + // argument, thus we cannot find a callback argument to return. + int ArgNo = getArgNo(); + if (ArgNo < 0) + return nullptr; + + // Use abstract call sites to make the connection between the call site + // values and the ones in callbacks. If a callback was found that makes use + // of the underlying call site operand, we want the corresponding callback + // callee argument and not the direct callee argument. + Optional CBCandidateArg; + SmallVector CBUses; + ImmutableCallSite ICS(&getAnchorValue()); + AbstractCallSite::getCallbackUses(ICS, CBUses); + for (const Use *U : CBUses) { + AbstractCallSite ACS(U); + assert(ACS && ACS.isCallbackCall()); + if (!ACS.getCalledFunction()) + continue; + + for (unsigned u = 0, e = ACS.getNumArgOperands(); u < e; u++) { + + // Test if the underlying call site operand is argument number u of the + // callback callee. + if (ACS.getCallArgOperandNo(u) != ArgNo) + continue; + + assert(ACS.getCalledFunction()->arg_size() > u && + "ACS mapped into var-args arguments!"); + if (CBCandidateArg.hasValue()) { + CBCandidateArg = nullptr; + break; + } + CBCandidateArg = ACS.getCalledFunction()->getArg(u); + } + } + + // If we found a unique callback candidate argument, return it. + if (CBCandidateArg.hasValue() && CBCandidateArg.getValue()) + return CBCandidateArg.getValue(); + + // If no callbacks were found, or none used the underlying call site operand + // exclusively, use the direct callee argument if available. + const Function *Callee = ICS.getCalledFunction(); + if (Callee && Callee->arg_size() > unsigned(ArgNo)) + return Callee->getArg(ArgNo); + + return nullptr; +} + /// For calls (and invokes) we will only replace instruction uses to not disturb /// the old style call graph. /// TODO: Remove this once we get rid of the old PM. @@ -2339,8 +2393,43 @@ /// NoAlias attribute for an argument. struct AANoAliasArgument final : AAArgumentFromCallSiteArguments { - AANoAliasArgument(const IRPosition &IRP) - : AAArgumentFromCallSiteArguments(IRP) {} + using Base = AAArgumentFromCallSiteArguments; + AANoAliasArgument(const IRPosition &IRP) : Base(IRP) {} + + /// See AbstractAttribute::update(...). + ChangeStatus updateImpl(Attributor &A) override { + // We have to make sure no-alias on the argument does not break + // synchronization when this is a callback argument, see also [1] below. + // If synchronization cannot be affected, we delegate to the base updateImpl + // function, otherwise we give up for now. + + // If the function is no-sync, no-alias cannot break synchronization. + const auto &NoSyncAA = A.getAAFor( + *this, IRPosition::function_scope(getIRPosition())); + if (NoSyncAA.isAssumedNoSync()) + return Base::updateImpl(A); + + // If the argument is read-only, no-alias cannot break synchronization. + const auto &MemBehaviorAA = + A.getAAFor(*this, getIRPosition()); + if (MemBehaviorAA.isAssumedReadOnly()) + return Base::updateImpl(A); + + // If the argument is never passed through callbacks, no-alias cannot break + // synchronization. + if (A.checkForAllCallSites( + [](AbstractCallSite ACS) { return !ACS.isCallbackCall(); }, *this, + true)) + return Base::updateImpl(A); + + // TODO: add no-alias but make sure it doesn't break synchronization by + // introducing fake uses. See: + // [1] Compiler Optimizations for OpenMP, J. Doerfert and H. Finkel, + // International Workshop on OpenMP 2018, + // http://compilers.cs.uni-saarland.de/people/doerfert/par_opt18.pdf + + return indicatePessimisticFixpoint(); + } /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(noalias) } @@ -2395,6 +2484,7 @@ // (iii) Check there is no other pointer argument which could alias with the // value. + // TODO: AbstractCallSite ImmutableCallSite ICS(&getAnchorValue()); for (unsigned i = 0; i < ICS.getNumArgOperands(); i++) { if (getArgNo() == (int)i) diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll b/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll --- a/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll @@ -14,7 +14,7 @@ ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* undef, align 4 ; CHECK-NEXT: [[CALL:%.*]] = call i32 @fn1(i32 [[TMP0]]) ; CHECK-NEXT: store i32 [[CALL]], i32* [[P]] -; CHECK-NEXT: br label %for.cond1 +; CHECK-NEXT: br label [[FOR_COND1:%.*]] ; entry: br label %if.end @@ -55,7 +55,7 @@ ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* undef, align 4 ; CHECK-NEXT: [[CALL:%.*]] = call i32 @fn0(i32 [[TMP0]]) ; CHECK-NEXT: store i32 [[CALL]], i32* [[P]] -; CHECK-NEXT: br label %for.cond1 +; CHECK-NEXT: br label [[FOR_COND1:%.*]] ; entry: br label %if.end diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll b/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll --- a/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll @@ -33,7 +33,7 @@ ; CHECK-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 ; CHECK-NEXT: store float 3.000000e+00, float* [[P]], align 4 ; CHECK-NEXT: store i32 7, i32* [[N_ADDR]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) @1, i32 3, void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nonnull align 4 dereferenceable(4) [[N_ADDR]], float* nonnull align 4 dereferenceable(4) [[P]], i64 4617315517961601024) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) @1, i32 3, void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[P]], i64 4617315517961601024) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll --- a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll @@ -33,10 +33,10 @@ ; CHECK-NEXT: [[ALLOC1:%.*]] = alloca i8, align 8 ; CHECK-NEXT: [[ALLOC2:%.*]] = alloca i8, align 8 ; CHECK-NEXT: [[THREAD:%.*]] = alloca i64, align 8 -; CHECK-NEXT: [[CALL:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias null, i8* (i8*)* nonnull @foo, i8* noalias null) -; CHECK-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias null, i8* (i8*)* nonnull @bar, i8* nonnull align 8 dereferenceable(8) bitcast (i8** @GlobalVPtr to i8*)) -; CHECK-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias null, i8* (i8*)* nonnull @baz, i8* noalias nocapture nonnull align 8 dereferenceable(1) [[ALLOC1]]) -; CHECK-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias null, i8* (i8*)* nonnull @buz, i8* nonnull align 8 dereferenceable(1) [[ALLOC2]]) +; CHECK-NEXT: [[CALL:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias null, i8* (i8*)* nonnull @foo, i8* noalias nofree readnone null) +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias null, i8* (i8*)* nonnull @bar, i8* nofree nonnull readnone align 8 dereferenceable(8) bitcast (i8** @GlobalVPtr to i8*)) +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias null, i8* (i8*)* nonnull @baz, i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(1) [[ALLOC1]]) +; CHECK-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias null, i8* (i8*)* nonnull @buz, i8* nofree nonnull readnone align 8 dereferenceable(1) [[ALLOC2]]) ; CHECK-NEXT: ret i32 0 ; entry: @@ -76,7 +76,7 @@ ; CHECK-LABEL: define {{[^@]+}}@baz ; CHECK-SAME: (i8* noalias nofree nonnull readnone returned align 8 dereferenceable(1) [[ARG:%.*]]) ; CHECK-NEXT: entry: -; CHECK-NEXT: ret i8* [[ARG:%.*]] +; CHECK-NEXT: ret i8* [[ARG]] ; entry: ret i8* %arg @@ -86,7 +86,7 @@ ; CHECK-LABEL: define {{[^@]+}}@buz ; CHECK-SAME: (i8* nofree nonnull readnone returned align 8 dereferenceable(1) [[ARG:%.*]]) ; CHECK-NEXT: entry: -; CHECK-NEXT: ret i8* [[ARG:%.*]] +; CHECK-NEXT: ret i8* [[ARG]] ; entry: ret i8* %arg diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/thread_local_acs.ll b/llvm/test/Transforms/Attributor/IPConstantProp/thread_local_acs.ll --- a/llvm/test/Transforms/Attributor/IPConstantProp/thread_local_acs.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/thread_local_acs.ll @@ -39,7 +39,7 @@ define dso_local void @caller() { ; CHECK-LABEL: define {{[^@]+}}@caller() ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @broker(i32* nonnull align 4 dereferenceable(4) @gtl, i32 (i32*, i32*)* nonnull @callee, i32* nonnull align 4 dereferenceable(4) @gsh) +; CHECK-NEXT: call void @broker(i32* nofree nonnull readonly align 4 dereferenceable(4) @gtl, i32 (i32*, i32*)* nonnull @callee, i32* nofree nonnull readonly align 4 dereferenceable(4) @gsh) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/Attributor/callbacks.ll b/llvm/test/Transforms/Attributor/callbacks.ll --- a/llvm/test/Transforms/Attributor/callbacks.ll +++ b/llvm/test/Transforms/Attributor/callbacks.ll @@ -13,7 +13,7 @@ ; each other but argument 3-5 of the transitive call site in the caller match ; arguments 2-4 of the callback callee. Here we should see information and value ; transfer in both directions. -; FIXME: The callee -> call site direction is not working yet. +; FIXME: %a should be align 256 at the call site define void @t0_caller(i32* %a) { ; CHECK-LABEL: @t0_caller( @@ -24,7 +24,8 @@ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; CHECK-NEXT: store i32 42, i32* [[B]], align 32 ; CHECK-NEXT: store i32* [[B]], i32** [[C]], align 64 -; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* [[A:%.*]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]]) +; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* [[A:%.*]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) + ; CHECK-NEXT: ret void ; entry: @@ -41,7 +42,8 @@ ; Note that the first two arguments are provided by the callback_broker according to the callback in !1 below! ; The others are annotated with alignment information, amongst others, or even replaced by the constants passed to the call. define internal void @t0_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) { -; CHECK-LABEL: @t0_callback_callee( +; CHECK-LABEL: define {{[^@]+}}@t0_callback_callee +; CHECK-SAME: (i32* nocapture nonnull writeonly dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR:%.*]], align 8 ; CHECK-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL:%.*]]