diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -6223,16 +6223,33 @@ static void getKnownStateFromValue(Attributor &A, const IRPosition &IRP, BitIntegerState &State, bool IgnoreSubsumingPositions = false) { - // For internal functions we ignore `argmemonly` and + // For internal functions and call sites we ignore `argmemonly` and // `inaccessiblememorargmemonly` as we might break it via interprocedural // constant propagation. It is unclear if this is the best way but it is // unlikely this will cause real performance problems. If we are deriving - // attributes for the anchor function we even remove the attribute in - // addition to ignoring it. - bool UseArgMemOnly = true; + // attributes for the internal anchor function we even remove the attribute + // in addition to ignoring it. + // + // The reason we ignore argmemonly on call sites is that we want + // AAMemoryLocation to describe the accessed location within the caller + // scope. If we set the know argmemonly bit it behaves differently than + // the argmemonly bit on a store next. There are two downsides, one + // theoretical probably the other worth revisiting later: + // 1) The call site could be annotated argmemonly but the callee not. This + // is unlikely to happen except if 2) is addressed and then we don't + // need to look at the IR but can simply look at the result of 2). + // 2) We use call site information to augment liveness and that makes the + // callee argmemonly (wrt. the callee) but only for this call site. + // Since we actually ask the function for the accesses this should not + // actually work fine but we don't have the infrastructure yet anyway. + bool UseArgMemOnly = !IRP.isAnyCallSitePosition(); + bool RemoveArgMemOnly = false; Function *AnchorFn = IRP.getAnchorScope(); if (AnchorFn && A.isRunOn(*AnchorFn)) - UseArgMemOnly = !AnchorFn->hasLocalLinkage(); + if (AnchorFn->hasLocalLinkage()) { + UseArgMemOnly = false; + RemoveArgMemOnly = true; + } SmallVector Attrs; IRP.getAttrs(AttrKinds, Attrs, IgnoreSubsumingPositions); @@ -6247,14 +6264,14 @@ case Attribute::ArgMemOnly: if (UseArgMemOnly) State.addKnownBits(inverseLocation(NO_ARGUMENT_MEM, true, true)); - else + else if (RemoveArgMemOnly) IRP.removeAttrs({Attribute::ArgMemOnly}); break; case Attribute::InaccessibleMemOrArgMemOnly: if (UseArgMemOnly) State.addKnownBits(inverseLocation( NO_INACCESSIBLE_MEM | NO_ARGUMENT_MEM, true, true)); - else + else if (RemoveArgMemOnly) IRP.removeAttrs({Attribute::InaccessibleMemOrArgMemOnly}); break; default: @@ -6677,11 +6694,15 @@ /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { - AAMemoryLocationImpl::initialize(A); + // We do not want to give up if the callee is not IPO amendable but has an + // argmemonly or inaccessiblememorargmemonly attribute. Function *F = getAssociatedFunction(); - if (!F || !A.isFunctionIPOAmendable(*F)) { - indicatePessimisticFixpoint(); - return; + if (F && (F->hasFnAttribute(Attribute::ArgMemOnly) || + F->hasFnAttribute(Attribute::InaccessibleMemOrArgMemOnly))) { + intersectAssumedBits(BEST_STATE); + getKnownStateFromValue(A, getIRPosition(), getState()); + } else { + AAMemoryLocationImpl::initialize(A); } } @@ -6691,21 +6712,74 @@ // call site specific liveness liveness information and then it makes // sense to specialize attributes for call sites arguments instead of // redirecting requests to the callee argument. + CallBase &CB = cast(getAnchorValue()); Function *F = getAssociatedFunction(); + if (!F) + return indicatePessimisticFixpoint(); const IRPosition &FnPos = IRPosition::function(*F); auto &FnAA = A.getAAFor(*this, FnPos); bool Changed = false; - auto AccessPred = [&](const Instruction *I, const Value *Ptr, - AccessKind Kind, MemoryLocationsKind MLK) { - updateStateAndAccessesMap(getState(), MLK, I, Ptr, Changed, - getAccessKindFromInst(I)); + + // First, check "argmem" accesses and register an access to the + // corresponding call site operand, if known, or all pointer call site + // operands, otherwise. + auto ArgAccessPred = [&](const Instruction *I, const Value *Ptr, + AccessKind Kind, MemoryLocationsKind) { + if (auto *Arg = cast_or_null(Ptr)) { + Ptr = CB.getArgOperand(Arg->getArgNo()); + categorizePtrValue(A, CB, *Ptr, getState(), Changed); + } else { + categorizeArgumentPointerLocations(A, CB, getState(), Changed); + } return true; }; - if (!FnAA.checkForAllAccessesToMemoryKind(AccessPred, ALL_LOCATIONS)) + if (!FnAA.checkForAllAccessesToMemoryKind( + ArgAccessPred, inverseLocation(NO_ARGUMENT_MEM, false, false))) return indicatePessimisticFixpoint(); + + // Next, check accesses to globals as we can represent them in the call site + // scope. + auto GlobalAccessPred = [&](const Instruction *I, const Value *Ptr, + AccessKind Kind, MemoryLocationsKind MLK) { + updateStateAndAccessesMap(getState(), MLK, &CB, Ptr, Changed, Kind); + return true; + }; + if (!FnAA.checkForAllAccessesToMemoryKind( + GlobalAccessPred, inverseLocation(NO_GLOBAL_MEM, false, false))) + return indicatePessimisticFixpoint(); + + // Finally, check the remaining kinds and register the call without a + // pointer. + MemoryLocationsKind FnMLK = FnAA.getAssumedNotAccessedLocation(); + for (MemoryLocationsKind CurMLK = 1; CurMLK < NO_LOCATIONS; CurMLK *= 2) + if (!(CurMLK & FnMLK) && CurMLK != NO_ARGUMENT_MEM && + CurMLK != NO_GLOBAL_MEM) + updateStateAndAccessesMap(getState(), CurMLK, &CB, nullptr, Changed, + getAccessKindFromInst(&CB)); + return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; } + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + // While we could annotate the call site, it is tricky. Take + // + // void foo(int *a) { *a = 1; } + // + // void bar() { + // int x; + // foo(&x); + // } + // + // While foo is argmemonly, we want the call site to be local (=stack) only + // while we summarize the effects of bar. At the same time we cannot use + // readnone for the call because it isn't. Since the call annotation would + // not be better than the callee annotation. It is also unclear if + // argmemonly on a call site means it is argmemonly wrt. the callee + // arguments or the caller arguments. + return ChangeStatus::UNCHANGED; + } + /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { if (isAssumedReadNone()) diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/align.ll b/llvm/test/Transforms/Attributor/align.ll --- a/llvm/test/Transforms/Attributor/align.ll +++ b/llvm/test/Transforms/Attributor/align.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=11 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=11 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=10 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=10 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/depgraph.ll b/llvm/test/Transforms/Attributor/depgraph.ll --- a/llvm/test/Transforms/Attributor/depgraph.ll +++ b/llvm/test/Transforms/Attributor/depgraph.ll @@ -88,7 +88,6 @@ ; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned ; GRAPH: [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument ; GRAPH: updates [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument -; GRAPH: updates [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument ; GRAPH: [AAAlign] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state align<0-16> ; GRAPH: updates [AAAlign] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state align<1-16> ; GRAPH: updates [AAAlign] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state align<1-16> @@ -118,22 +117,8 @@ ; GRAPH: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument ; GRAPH: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument ; GRAPH: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument -; GRAPH: [AANonNull] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state nonnull -; GRAPH: updates [AANonNull] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state nonnull -; GRAPH: updates [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state nonnull -; GRAPH: updates [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull -; GRAPH: updates [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull -; GRAPH: [AANoSync] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nosync -; GRAPH: updates [AANoSync] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nosync -; GRAPH: [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nofree -; GRAPH: updates [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nofree -; GRAPH: [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument -; GRAPH: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument -; GRAPH: [AAAlign] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state align<1-16> -; GRAPH: updates [AAAlign] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state align<0-16> -; GRAPH: [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state nonnull ; GRAPH: updates [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull -; GRAPH-NOT: update +; GRAPH-NOT: updates ; ; Check for .dot file diff --git a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll --- a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll +++ b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=16 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=16 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=15 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=15 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM