diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -2685,6 +2685,24 @@ /// Return the locations encoded by \p MLK as a readable string. static std::string getMemoryLocationsAsStr(MemoryLocationsKind MLK); + /// Simple enum to distinguish read/write/read-write accesses. + enum AccessKind { + NONE = 0, + READ = 1 << 0, + WRITE = 1 << 1, + READ_WRITE = READ | WRITE, + }; + + /// Check \p Pred on all accesses to the memory kinds specified by \p MLK. + /// + /// This method will evaluate \p Pred on all accesses (access instruction + + /// underlying accessed memory pointer) and it will return true if \p Pred + /// holds every time. + virtual bool checkForAllAccessesToMemoryKind( + const function_ref &Pred, + MemoryLocationsKind MLK) const = 0; + /// Create an abstract attribute view for the position \p IRP. static AAMemoryLocation &createForPosition(const IRPosition &IRP, Attributor &A); diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -6201,17 +6201,101 @@ return IRAttribute::manifest(A); } + /// See AAMemoryLocation::checkForAllAccessesToMemoryKind(...). + bool checkForAllAccessesToMemoryKind( + const function_ref &Pred, + MemoryLocationsKind RequestedMLK) const override { + if (!isValidState()) + return false; + + MemoryLocationsKind AssumedMLK = getAssumedNotAccessedLocation(); + if (AssumedMLK == NO_LOCATIONS) + return true; + + for (MemoryLocationsKind CurMLK = 1; CurMLK < NO_LOCATIONS; CurMLK *= 2) { + if (CurMLK & RequestedMLK) + continue; + + const auto &Accesses = AccessKindAccessesMap.lookup(CurMLK); + for (const AccessInfo &AI : Accesses) + if (!Pred(*AI.I, AI.Ptr, AI.Kind, CurMLK)) + return false; + } + + return true; + } + + ChangeStatus indicatePessimisticFixpoint() override { + // If we give up and indicate a pessimistic fixpoint this instruction will + // become an access for all potential access kinds: + // TODO: Add pointers for argmemonly and globals to improve the results of + // checkForAllAccessesToMemoryKind. + bool Changed = false; + MemoryLocationsKind KnownMLK = getKnown(); + Instruction *I = dyn_cast(&getAssociatedValue()); + for (MemoryLocationsKind CurMLK = 1; CurMLK < NO_LOCATIONS; CurMLK *= 2) + if (!(CurMLK & KnownMLK)) + updateStateAndAccessesMap(getState(), AccessKindAccessesMap, CurMLK, I, + nullptr, Changed); + return AAMemoryLocation::indicatePessimisticFixpoint(); + } + protected: + /// Helper struct to tie together an instruction that has a read or write + /// effect with the pointer it accesses (if any). + struct AccessInfo { + + /// The instruction that caused the access. + const Instruction *I; + + /// The base pointer that is accessed, or null if unknown. + const Value *Ptr; + + /// The kind of access (read/write/read+write). + AccessKind Kind; + + bool operator==(const AccessInfo &RHS) const { + return I == RHS.I & Ptr == RHS.Ptr & Kind == RHS.Kind; + } + bool operator()(const AccessInfo &LHS, const AccessInfo &RHS) const { + if (LHS.I != RHS.I) + return LHS.I < RHS.I; + if (LHS.Ptr != RHS.Ptr) + return LHS.Ptr < RHS.Ptr; + if (LHS.Kind != RHS.Kind) + return LHS.Kind < RHS.Kind; + return false; + } + }; + + /// Mapping from *single* memory location kinds, e.g., LOCAL_MEM with the + /// value of NO_LOCAL_MEM, to the accesses encountered for this memory kind. + using AccessKindAccessesMapTy = + DenseMap>; + AccessKindAccessesMapTy AccessKindAccessesMap; + /// Return the kind(s) of location that may be accessed by \p V. AAMemoryLocation::MemoryLocationsKind categorizeAccessedLocations(Attributor &A, Instruction &I, bool &Changed); - /// Update the state \p State given that \p I is an access to a \p MLK memory - /// location with the access pointer \p Ptr. - static void updateState(AAMemoryLocation::StateType &State, - MemoryLocationsKind MLK, const Instruction &I, - const Value *Ptr, bool &Changed) { + /// Update the state \p State and the AccessKindAccessesMap given that \p I is + /// an access to a \p MLK memory location with the access pointer \p Ptr. + static void updateStateAndAccessesMap(AAMemoryLocation::StateType &State, + AccessKindAccessesMapTy &AccessMap, + MemoryLocationsKind MLK, + const Instruction *I, const Value *Ptr, + bool &Changed) { + // TODO: The kind should be determined at the call sites based on the + // information we have there. + AccessKind Kind = READ_WRITE; + if (I) { + Kind = I->mayReadFromMemory() ? READ : NONE; + Kind = AccessKind(Kind | (I->mayWriteToMemory() ? WRITE : NONE)); + } + assert(isPowerOf2_32(MLK) && "Expected a single location set!"); + Changed |= AccessMap[MLK].insert(AccessInfo{I, Ptr, Kind}).second; State.removeAssumedBits(MLK); } @@ -6251,32 +6335,39 @@ return true; if (auto *Arg = dyn_cast(&V)) { if (Arg->hasByValAttr()) - updateState(T, NO_LOCAL_MEM, I, &V, Changed); + updateStateAndAccessesMap(T, AccessKindAccessesMap, NO_LOCAL_MEM, &I, + &V, Changed); else - updateState(T, NO_ARGUMENT_MEM, I, &V, Changed); + updateStateAndAccessesMap(T, AccessKindAccessesMap, NO_ARGUMENT_MEM, &I, + &V, Changed); return true; } if (auto *GV = dyn_cast(&V)) { if (GV->hasLocalLinkage()) - updateState(T, NO_GLOBAL_INTERNAL_MEM, I, &V, Changed); + updateStateAndAccessesMap(T, AccessKindAccessesMap, + NO_GLOBAL_INTERNAL_MEM, &I, &V, Changed); else - updateState(T, NO_GLOBAL_EXTERNAL_MEM, I, &V, Changed); + updateStateAndAccessesMap(T, AccessKindAccessesMap, + NO_GLOBAL_EXTERNAL_MEM, &I, &V, Changed); return true; } if (isa(V)) { - updateState(T, NO_LOCAL_MEM, I, &V, Changed); + updateStateAndAccessesMap(T, AccessKindAccessesMap, NO_LOCAL_MEM, &I, &V, + Changed); return true; } if (ImmutableCallSite ICS = ImmutableCallSite(&V)) { const auto &NoAliasAA = A.getAAFor(*this, IRPosition::callsite_returned(ICS)); if (NoAliasAA.isAssumedNoAlias()) { - updateState(T, NO_MALLOCED_MEM, I, &V, Changed); + updateStateAndAccessesMap(T, AccessKindAccessesMap, NO_MALLOCED_MEM, &I, + &V, Changed); return true; } } - updateState(T, NO_UNKOWN_MEM, I, &V, Changed); + updateStateAndAccessesMap(T, AccessKindAccessesMap, NO_UNKOWN_MEM, &I, &V, + Changed); LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Ptr value cannot be categorized: " << V << " -> " << getMemoryLocationsAsStr(T.getAssumed()) << "\n"); @@ -6288,7 +6379,8 @@ /* MaxValues */ 32, StripGEPCB)) { LLVM_DEBUG( dbgs() << "[AAMemoryLocation] Pointer locations not categorized\n"); - updateState(State, NO_UNKOWN_MEM, I, nullptr, Changed); + updateStateAndAccessesMap(State, AccessKindAccessesMap, NO_UNKOWN_MEM, &I, + nullptr, Changed); } else { LLVM_DEBUG( dbgs() @@ -6318,21 +6410,37 @@ return NO_LOCATIONS; if (ICSMemLocationAA.isAssumedInaccessibleMemOnly()) { - updateState(AccessedLocs, NO_INACCESSIBLE_MEM, I, nullptr, Changed); + updateStateAndAccessesMap(AccessedLocs, AccessKindAccessesMap, + NO_INACCESSIBLE_MEM, &I, nullptr, Changed); return AccessedLocs.getAssumed(); } uint32_t ICSAssumedNotAccessedLocs = ICSMemLocationAA.getAssumedNotAccessedLocation(); - // Set the argmemonly bit as we handle them separately below. + // Set the argmemonly and global bit as we handle them separately below. uint32_t ICSAssumedNotAccessedLocsNoArgMem = - ICSAssumedNotAccessedLocs | NO_ARGUMENT_MEM; + ICSAssumedNotAccessedLocs | NO_ARGUMENT_MEM | NO_GLOBAL_MEM; for (MemoryLocationsKind CurMLK = 1; CurMLK < NO_LOCATIONS; CurMLK *= 2) { if (ICSAssumedNotAccessedLocsNoArgMem & CurMLK) continue; - updateState(AccessedLocs, CurMLK, I, nullptr, Changed); + updateStateAndAccessesMap(AccessedLocs, AccessKindAccessesMap, CurMLK, &I, + nullptr, Changed); + } + + // Now handle global memory if it might be accessed. + bool HasGlobalAccesses = !(ICSAssumedNotAccessedLocs & NO_GLOBAL_MEM); + if (HasGlobalAccesses) { + auto AccessPred = [&](const Instruction &, const Value *Ptr, + AccessKind Kind, MemoryLocationsKind MLK) { + updateStateAndAccessesMap(AccessedLocs, AccessKindAccessesMap, MLK, &I, + Ptr, Changed); + return true; + }; + if (!ICSMemLocationAA.checkForAllAccessesToMemoryKind( + AccessPred, inverseLocation(NO_GLOBAL_MEM, false, false))) + return AccessedLocs.getWorstState(); } LLVM_DEBUG( @@ -6381,7 +6489,8 @@ LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Failed to categorize instruction: " << I << "\n"); - updateState(AccessedLocs, NO_UNKOWN_MEM, I, nullptr, Changed); + updateStateAndAccessesMap(AccessedLocs, AccessKindAccessesMap, NO_UNKOWN_MEM, + &I, nullptr, Changed); return AccessedLocs.getAssumed(); } @@ -6456,9 +6565,16 @@ Function *F = getAssociatedFunction(); const IRPosition &FnPos = IRPosition::function(*F); auto &FnAA = A.getAAFor(*this, FnPos); - return clampStateAndIndicateChange( - getState(), - static_cast(FnAA.getState())); + bool Changed = false; + auto AccessPred = [&](const Instruction &I, const Value *Ptr, + AccessKind Kind, MemoryLocationsKind MLK) { + updateStateAndAccessesMap(getState(), AccessKindAccessesMap, MLK, &I, Ptr, + Changed); + return true; + }; + if (!FnAA.checkForAllAccessesToMemoryKind(AccessPred, ALL_LOCATIONS)) + return indicatePessimisticFixpoint(); + return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; } /// See AbstractAttribute::trackStatistics() diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes -; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=3 < %s | FileCheck %s +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=4 < %s | FileCheck %s %struct.ss = type { i32, i64 } diff --git a/llvm/test/Transforms/Attributor/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll --- a/llvm/test/Transforms/Attributor/liveness.ll +++ b/llvm/test/Transforms/Attributor/liveness.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes -; RUN: opt -attributor --attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,MODULE,ALL_BUT_OLD_CGSCCC -; RUN: opt -attributor-cgscc --attributor-disable=false -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC -; RUN: opt -passes=attributor --attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,MODULE,ALL_BUT_OLD_CGSCCC -; RUN: opt -passes='attributor-cgscc' --attributor-disable=false -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC,ALL_BUT_OLD_CGSCCC +; RUN: opt -attributor --attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,MODULE,ALL_BUT_OLD_CGSCCC +; RUN: opt -attributor-cgscc --attributor-disable=false -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC +; RUN: opt -passes=attributor --attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,MODULE,ALL_BUT_OLD_CGSCCC +; RUN: opt -passes='attributor-cgscc' --attributor-disable=false -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC,ALL_BUT_OLD_CGSCCC ; UTC_ARGS: --disable ; ALL_BUT_OLD_CGSCCC: @dead_with_blockaddress_users.l = constant [2 x i8*] [i8* inttoptr (i32 1 to i8*), i8* inttoptr (i32 1 to i8*)] diff --git a/llvm/test/Transforms/Attributor/memory_locations.ll b/llvm/test/Transforms/Attributor/memory_locations.ll --- a/llvm/test/Transforms/Attributor/memory_locations.ll +++ b/llvm/test/Transforms/Attributor/memory_locations.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes -; RUN: opt -attributor --attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,MODULE,OLD_MODULE -; RUN: opt -attributor-cgscc --attributor-disable=false -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC,OLD_CGSCC -; RUN: opt -passes=attributor --attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,MODULE,NEW_MODULE -; RUN: opt -passes='attributor-cgscc' --attributor-disable=false -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC,NEW_CGSCC +; RUN: opt -attributor --attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,MODULE,OLD_MODULE +; RUN: opt -attributor-cgscc --attributor-disable=false -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC,OLD_CGSCC +; RUN: opt -passes=attributor --attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,MODULE,NEW_MODULE +; RUN: opt -passes='attributor-cgscc' --attributor-disable=false -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC,NEW_CGSCC target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" ; CHECK: Function Attrs: inaccessiblememonly @@ -116,6 +116,63 @@ ret i8* %retval.0 } +define dso_local i8* @internal_only_rec_static_helper_malloc_noescape(i32 %arg) { +; FIXME: This is actually inaccessiblememonly because the malloced memory does not escape +; CHECK-NOT: inaccessiblememonly +; CHECK-LABEL: define {{[^@]+}}@internal_only_rec_static_helper_malloc_noescape +; CHECK-SAME: (i32 [[ARG:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call noalias i8* @internal_only_rec_static_malloc_noescape(i32 [[ARG]]) +; CHECK-NEXT: ret i8* [[CALL]] +; +entry: + %call = call i8* @internal_only_rec_static_malloc_noescape(i32 %arg) + ret i8* %call +} + +define internal i8* @internal_only_rec_static_malloc_noescape(i32 %arg) { +; FIXME: This is actually inaccessiblememonly because the malloced memory does not escape +; CHECK-NOT: inaccessiblememonly +; CHECK-LABEL: define {{[^@]+}}@internal_only_rec_static_malloc_noescape +; CHECK-SAME: (i32 [[ARG:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[REM:%.*]] = srem i32 [[ARG]], 2 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[REM]], 1 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[ARG]], 2 +; CHECK-NEXT: [[CALL:%.*]] = call noalias i8* @internal_only_rec(i32 [[DIV]]) +; CHECK-NEXT: br label [[RETURN:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[ARG]] to i64 +; CHECK-NEXT: [[CALL1:%.*]] = call noalias i8* @malloc(i64 [[CONV]]) +; CHECK-NEXT: store i8 0, i8* [[CALL1]] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i8* [ [[CALL]], [[IF_THEN]] ], [ null, [[IF_END]] ] +; CHECK-NEXT: ret i8* [[RETVAL_0]] +; +entry: + %rem = srem i32 %arg, 2 + %cmp = icmp eq i32 %rem, 1 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %div = sdiv i32 %arg, 2 + %call = call i8* @internal_only_rec(i32 %div) + br label %return + +if.end: ; preds = %entry + %conv = sext i32 %arg to i64 + %call1 = call i8* @malloc(i64 %conv) + store i8 0, i8* %call1 + br label %return + +return: ; preds = %if.end, %if.then + %retval.0 = phi i8* [ %call, %if.then ], [ null, %if.end ] + ret i8* %retval.0 +} + define dso_local i8* @internal_argmem_only_read(i32* %arg) { ; CHECK: Function Attrs: inaccessiblemem_or_argmemonly ; CHECK-LABEL: define {{[^@]+}}@internal_argmem_only_read diff --git a/llvm/test/Transforms/Attributor/nofree.ll b/llvm/test/Transforms/Attributor/nofree.ll --- a/llvm/test/Transforms/Attributor/nofree.ll +++ b/llvm/test/Transforms/Attributor/nofree.ll @@ -1,4 +1,4 @@ -; RUN: opt -attributor --attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefix=ATTRIBUTOR +; RUN: opt -attributor --attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefix=ATTRIBUTOR ; Copied from Transforms/FunctoinAttrs/nofree-attributor.ll target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/Attributor/noreturn_async.ll b/llvm/test/Transforms/Attributor/noreturn_async.ll --- a/llvm/test/Transforms/Attributor/noreturn_async.ll +++ b/llvm/test/Transforms/Attributor/noreturn_async.ll @@ -1,4 +1,4 @@ -; RUN: opt -attributor -attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s +; RUN: opt -attributor -attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s ; ; This file is the same as noreturn_sync.ll but with a personality which ; indicates that the exception handler *can* catch asynchronous exceptions. As diff --git a/llvm/test/Transforms/Attributor/noreturn_sync.ll b/llvm/test/Transforms/Attributor/noreturn_sync.ll --- a/llvm/test/Transforms/Attributor/noreturn_sync.ll +++ b/llvm/test/Transforms/Attributor/noreturn_sync.ll @@ -1,4 +1,4 @@ -; RUN: opt -attributor -attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s +; RUN: opt -attributor -attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s ; ; This file is the same as noreturn_async.ll but with a personality which ; indicates that the exception handler *cannot* catch asynchronous exceptions.