diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -1472,6 +1472,9 @@ This function attribute indicates that the function does not call itself either directly or indirectly down any possible call path. This produces undefined behavior at runtime if the function ever does recurse. +``nosync`` + This function attribute indicates that the function does not communicate + (synchronize) with another thread. ``nounwind`` This function attribute indicates that the function never raises an exception. If the function does raise an exception, its runtime diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -106,6 +106,9 @@ /// Mark the function as not returning. def NoReturn : EnumAttr<"noreturn">; +/// Function does not synchronize. +def NoSync : StrBoolAttr<"nosync">; + /// Disable Indirect Branch Tracking. def NoCfCheck : EnumAttr<"nocf_check">; diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -367,6 +367,90 @@ virtual void indicatePessimisticFixpoint() = 0; }; +/// Simple state with integers encoding. +/// +/// The interface ensures that the assumed bits are always a subset of the known +/// bits. Users can only add known bits and, except through adding known bits, +/// they can only remove assumed bits. This should guarantee monotoniticy and +/// thereby the existence of a fixpoint (if used corretly). The fixpoint is +/// reached when the assumed and known state/bits are equal. Users can +/// force/inidicate a fixpoint. If an optimistic one is indicated, the known +/// state will catch up with the assumed one, for a pessimistic fixpoint it is +/// the other way around. +struct IntegerState : public AbstractState { + /// Undrlying integer type, we assume 32 bits to be enough. + using base_t = uint32_t; + + /// Initialize the (best) state. + IntegerState(base_t BestState = ~0) : Assumed(BestState) {} + + /// Return the worst possible representable state. + static constexpr base_t getWorstState() { return 0; } + + /// See AbstractState::isValidState() + /// NOTE: For now we simply pretend that the worst possible state is invalid. + bool isValidState() const override { return Assumed != getWorstState(); } + + /// See AbstractState::isAtFixpoint() + bool isAtFixpoint() const override { return Assumed == Known; } + + /// See AbstractState::indicateOptimisticFixpoint(...) + void indicateOptimisticFixpoint() override { Known = Assumed; } + + /// See AbstractState::indicatePessimisticFixpoint(...) + void indicatePessimisticFixpoint() override { Assumed = Known; } + + /// Return the known state encoding + base_t getKnown() const { return Known; } + + /// Return the assumed state encoding. + base_t getAssumed() const { return Assumed; } + + /// Return true if the bits set in \p BitsEncoding are "known bits". + bool isKnown(base_t BitsEncoding) const { + return (Known & BitsEncoding) == BitsEncoding; + } + + /// Return true if the bits set in \p BitsEncoding are "assumed bits". + bool isAssumed(base_t BitsEncoding) const { + return (Assumed & BitsEncoding) == BitsEncoding; + } + + /// Add the bits in \p BitsEncoding to the "known bits". + IntegerState &addKnownBits(base_t Bits) { + // Make sure we never miss any "known bits". + Assumed |= Bits; + Known |= Bits; + return *this; + } + + /// Remove the bits in \p BitsEncoding from the "assumed bits" if not known. + IntegerState &removeAssumedBits(base_t BitsEncoding) { + // Make sure we never loose any "known bits". + Assumed = (Assumed & ~BitsEncoding) | Known; + return *this; + } + + /// Keep only "assumed bits" also set in \p BitsEncoding but all known ones. + IntegerState &intersectAssumedBits(base_t BitsEncoding) { + // Make sure we never loose any "known bits". + Assumed = (Assumed & BitsEncoding) | Known; + return *this; + } + +private: + /// The known state encoding in an integer of type base_t. + base_t Known = getWorstState(); + + /// The assumed state encoding in an integer of type base_t. + base_t Assumed; +}; + +/// Simple wrapper for a single bit (boolean) state. +struct BooleanState : public IntegerState { + BooleanState() : IntegerState(1){}; +}; + /// Base struct for all "concrete attribute" deductions. /// /// The abstract attribute is a minimal interface that allows the Attributor to @@ -560,6 +644,28 @@ /// Abstract Attribute Classes /// ---------------------------------------------------------------------------- +struct AANoSync : public AbstractAttribute, BooleanState { + /// An abstract interface for all nosync attributes. + AANoSync(Value &V, InformationCache &InfoCache) + : AbstractAttribute(V, InfoCache) {} + + /// See AbstractAttribute::getAsStr(). + virtual const std::string getAsStr() const override { + return getAssumed() ? "nosync" : "may-sync"; + } + + /// See AbstractAttribute::getAttrKind(). + virtual Attribute::AttrKind getAttrKind() const override { + return Attribute::None; + } + + /// Returns true if "nosync" is assumed. + bool isAssumedNoSync() const { return getAssumed(); } + + /// Returns true if "nosync" is known. + bool isKnownNoSync() const { return getKnown(); } +}; // namespace llvm + } // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -42,6 +42,7 @@ "Number of abstract attributes in a valid fixpoint state"); STATISTIC(NumAttributesManifested, "Number of abstract attributes manifested in IR"); +STATISTIC(NumFnNoSync, "Number of functions marked nosync"); // TODO: Determine a good default value. // @@ -78,12 +79,21 @@ } ///} + + /// Helper to adjust the statistics. static void bookkeeping(AbstractAttribute::ManifestPosition MP, const Attribute &Attr) { if (!AreStatisticsEnabled()) return; + if (Attr.isStringAttribute()) { + StringRef StringAttr = Attr.getKindAsString(); + if (StringAttr == "nosync") + NumFnNoSync++; + return; + } + if (!Attr.isEnumAttribute()) return; switch (Attr.getKindAsEnum()) { @@ -240,6 +250,129 @@ return const_cast(this)->getAnchorScope(); } +/// ------------------------ NoSync Function Attribute ------------------------- + +struct AANoSyncFunction : AANoSync { + + AANoSyncFunction(Function &F, InformationCache &InfoCache) + : AANoSync(F, InfoCache) {} + + /// See AbstractAttribute::getState() + /// { + AbstractState &getState() override { return *this; } + const AbstractState &getState() const override { return *this; } + /// } + + /// See AbstractAttribute::getManifestPosition(). + virtual ManifestPosition getManifestPosition() const override { + return MP_FUNCTION; + } + + /// See AbstractAttribute::updateImpl(...). + virtual ChangeStatus updateImpl(Attributor &A) override; + + /// Return deduced attributes in \p Attrs. + virtual void + getDeducedAttributes(SmallVectorImpl &Attrs) const override { + LLVMContext &Ctx = AnchoredVal.getContext(); + Attrs.emplace_back(Attribute::get(Ctx, "nosync")); + } + + static constexpr Attribute::AttrKind ID = + Attribute::AttrKind(Attribute::None + 1); + + /// Helper function used to determine whether an instruction is non-relaxed + /// atomic. In other words, if an atomic instruction does not have unordered + /// or monotonic ordering + static bool isNonRelaxedAtomic(Instruction *I); + + /// Helper function used to determine whether an instruction is volatile. + static bool isVolatile(Instruction *I); +}; + +bool AANoSyncFunction::isNonRelaxedAtomic(Instruction *I) { + if (!I->isAtomic()) + return false; + + AtomicOrdering Ordering; + switch (I->getOpcode()) { + case Instruction::AtomicRMW: + Ordering = cast(I)->getOrdering(); + break; + case Instruction::Store: + Ordering = cast(I)->getOrdering(); + break; + case Instruction::Load: + Ordering = cast(I)->getOrdering(); + break; + case Instruction::Fence: + Ordering = cast(I)->getOrdering(); + break; + case Instruction::AtomicCmpXchg: { + AtomicOrdering Success = cast(I)->getSuccessOrdering(); + AtomicOrdering Failure = cast(I)->getFailureOrdering(); + // Only if both are relaxed, than it can be treated as relaxed. + // Otherwise it is non-relaxed. + if (Success == AtomicOrdering::Unordered || + Success == AtomicOrdering::Monotonic) + return false; + if (Failure == AtomicOrdering::Unordered || + Failure == AtomicOrdering::Monotonic) + return false; + return true; + } + default: + // Unknown atomic, assume non-relaxed. + return true; + } + + // Relaxed. + if (Ordering == AtomicOrdering::Unordered || + Ordering == AtomicOrdering::Monotonic) + return false; + return true; +} + +bool AANoSyncFunction::isVolatile(Instruction *I) { + switch (I->getOpcode()) { + case Instruction::AtomicRMW: + return cast(I)->isVolatile(); + case Instruction::Store: + return cast(I)->isVolatile(); + case Instruction::Load: + return cast(I)->isVolatile(); + case Instruction::AtomicCmpXchg: + return cast(I)->isVolatile(); + default: + return false; + } +} + +ChangeStatus AANoSyncFunction::updateImpl(Attributor &A) { + Function &F = getAnchorScope(); + + /// We are looking for volatile instructions or Non-Relaxed atomics. + /// FIXME: We should ipmrove the handling of intrinsics. + for (Instruction *I : InfoCache.getReadOrWriteInstsForFunction(F)) { + ImmutableCallSite ICS(I); + auto *NoSyncAA = A.getAAFor(*this, *I); + + if (!ICS && (!NoSyncAA || !NoSyncAA->isAssumedNoSync()) && + !ICS.hasFnAttr("nosync")) { + indicatePessimisticFixpoint(); + return ChangeStatus::CHANGED; + } + + if (!isVolatile(I) && !isNonRelaxedAtomic(I)) + continue; + + indicatePessimisticFixpoint(); + return ChangeStatus::CHANGED; + } + + return ChangeStatus::UNCHANGED; +} + /// ---------------------------------------------------------------------------- /// Attributor /// ---------------------------------------------------------------------------- @@ -382,6 +515,9 @@ Function &F, InformationCache &InfoCache, DenseSet *Whitelist) { + // Every function might be marked "nosync" + registerAA(*new AANoSyncFunction(F, InfoCache)); + // Walk all instructions to find more attribute opportunities and also // interesting instructions that might be queried by abstract attributes // during their initialization or update. @@ -526,4 +662,3 @@ "Deduce and propagate attributes", false, false) INITIALIZE_PASS_END(AttributorLegacyPass, "attributor", "Deduce and propagate attributes", false, false) - diff --git a/llvm/test/Transforms/FunctionAttrs/nosync.ll b/llvm/test/Transforms/FunctionAttrs/nosync.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/FunctionAttrs/nosync.ll @@ -0,0 +1,239 @@ +; RUN: opt -functionattrs -S < %s | FileCheck %s --check-prefix=FNATTR +; RUN: opt -attributor -S < %s | FileCheck %s --check-prefix=ATTRIBUTOR +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Test cases designet for the "nosync" function attribute. +; FIXME's are used to indicate problems and missing attributes. + +; struct RT { +; char A; +; int B[10][20]; +; char C; +; }; +; struct ST { +; int X; +; double Y; +; struct RT Z; +; }; +; +; int *foo(struct ST *s) { +; return &s[1].Z.B[5][13]; +; } + +; TEST 1 +; attribute readnone implies "nosync" +%struct.RT = type { i8, [10 x [20 x i32]], i8 } +%struct.ST = type { i32, double, %struct.RT } + +; FNATTR: Function Attrs: nounwind uwtable readnone optsize ssp +; FNATTR-NEXT: define i32 *@foo(%struct.ST* %s) +; ATTRIBUTOR: Function Attrs: nounwind uwtable readnone optsize ssp "nosync" +; ATTRIBUTOR-NEXT: define i32 *@foo(%struct.ST* %s) +define i32* @foo(%struct.ST* %s) nounwind uwtable readnone optsize ssp { +entry: + %arrayidx = getelementptr inbounds %struct.ST, %struct.ST* %s, i64 1, i32 2, i32 1, i64 5, i64 13 + ret i32* %arrayidx +} + +; TEST 2 +; atomic load with monotonic ordering +; int load_monotonic(_Atomic int *num) { +; int n = atomic_load_explicit(num, memory_order_relaxed); +; return n; +; } + +; FNATTR: Function Attrs: norecurse nounwind uwtable +; FNATTR-NEXT: define i32 @load_monotonic(i32* nocapture readonly) +; ATTRIBUTOR: Function Attrs: norecurse nounwind uwtable "nosync" +; ATTRIBUTOR-NEXT: define i32 @load_monotonic(i32* nocapture readonly) +define i32 @load_monotonic(i32* nocapture readonly) norecurse nounwind uwtable { + %2 = load atomic i32, i32* %0 monotonic, align 4 + ret i32 %2 +} + + +; TEST 3 +; atomic store with monotonic ordering. +; void store_monotonic(_Atomic int *num) { +; atomic_load_explicit(num, memory_order_relaxed); +; } + +; FNATTR: Function Attrs: norecurse nounwind uwtable +; FNATTR-NEXT: define void @store_monotonic(i32* nocapture) +; ATTRIBUTOR: Function Attrs: norecurse nounwind uwtable "nosync" +; ATTRIBUTOR-NEXT: define void @store_monotonic(i32* nocapture) +define void @store_monotonic(i32* nocapture) norecurse nounwind uwtable { + store atomic i32 10, i32* %0 monotonic, align 4 + ret void +} + +; TEST 4 - negative, should not deduce "nosync" +; atomic load with acquire ordering. +; int load_acquire(_Atomic int *num) { +; int n = atomic_load_explicit(num, memory_order_acquire); +; return n; +; } + +; FNATTR: Function Attrs: norecurse nounwind uwtable +; FNATTR-NEXT: define i32 @load_acquire(i32* nocapture readonly) +; ATTRIBUTOR: Function Attrs: norecurse nounwind uwtable +; ATTRIBUTOR-NOT: "nosync" +; ATTRIBUTOR-NEXT: define i32 @load_acquire(i32* nocapture readonly) +define i32 @load_acquire(i32* nocapture readonly) norecurse nounwind uwtable { + %2 = load atomic i32, i32* %0 acquire, align 4 + ret i32 %2 +} + +; TEST 5 - negative, should not deduce "nosync" +; atomic load with release ordering +; void load_release(_Atomic int *num) { +; atomic_store_explicit(num, 10, memory_order_release); +; } + +; FNATTR: Function Attrs: norecurse nounwind uwtable +; FNATTR-NEXT: define void @load_release(i32* nocapture) +; ATTRIBUTOR: Function Attrs: norecurse nounwind uwtable +; ATTRIBUTOR-NOT: "nosync" +; ATTRIBUTOR-NEXT: define void @load_release(i32* nocapture) +define void @load_release(i32* nocapture) norecurse nounwind uwtable { + store atomic i32 10, i32* %0 release, align 4 + ret void +} + +; TEST 6 - negative, should not deduce "nosync" +; volatile store. +; void volatile_store(volatile int *num) { +; *num = 14; +; } + +; FNATTR: Function Attrs: norecurse nounwind uwtable +; FNATTR-NEXT: define void @volatile_store(i32*) +; ATTRIBUTOR: Function Attrs: norecurse nounwind uwtable +; ATTRIBUTOR-NOT: "nosync" +; ATTRIBUTOR-NEXT: define void @volatile_store(i32*) +define void @volatile_store(i32*) norecurse nounwind uwtable { + ;store volatile i32 14, i32* %0, align 4, !tbaa !2 + store volatile i32 14, i32* %0, align 4 + ret void +} + +; TEST 7 - negative, should not deduce "nosync" +; volatile load. +; int volatile_load(volatile int *num) { +; int n = *num; +; return n; +; } + +; FNATTR: Function Attrs: norecurse nounwind uwtable +; FNATTR-NEXT: define i32 @volatile_load(i32*) +; ATTRIBUTOR: Function Attrs: norecurse nounwind uwtable +; ATTRIBUTOR-NOT: "nosync" +; ATTRIBUTOR-NEXT: define i32 @volatile_load(i32*) +define i32 @volatile_load(i32*) norecurse nounwind uwtable { + ;%2 = load volatile i32, i32* %0, align 4, !tbaa !2 + %2 = load volatile i32, i32* %0, align 4 + ret i32 %2 +} + +; TEST 8 +declare void @nosync_function() noinline nounwind uwtable + +; FNATTR: Function Attrs: noinline nounwind uwtable +; FNATTR: define void @call_nosync_function() +; ATTRIBUTOR: Function Attrs: noinline nounwind uwtable "nosync" +; ATTRIBUTOR: define void @call_nosync_function() +define void @call_nosync_function() nounwind uwtable noinline { + tail call void @nosync_function() noinline nounwind uwtable + ret void +} + +; TEST 9 - negative, should not deduce "nosync" +declare void @might_sync() noinline nounwind uwtable + +; FNATTR: Function Attrs: noinline nounwind uwtable +; FNATTR: define void @call_might_sync() +; ATTRIBUTOR: Function Attrs: noinline nounwind uwtable +; ATTRIBUTOR-NOT: "nosync" +; ATTRIBUTOR: define void @call_might_sync() +define void @call_might_sync() nounwind uwtable noinline { + tail call void @might_sync() noinline nounwind uwtable + ret void +} + +; TEST 10 - negative, should not deduce "nosync" +; volatile operation in same scc. Call volatile_load defined in TEST 7. + +; FNATTR: Function Attrs: noinline nounwind uwtable +; FNATTR-NEXT: define i32 @scc1(i32*) +; ATTRIBUTOR: Function Attrs: noinline nounwind uwtable +; ATTRIBUTOR-NOT: "nosync" +; ATTRIBUTOR-NEXT: define i32 @scc1(i32*) +define i32 @scc1(i32*) noinline nounwind uwtable { + tail call void @scc2(i32* %0); + %val = tail call i32 @volatile_load(i32* %0); + ret i32 %val; +} + +; FNATTR: Function Attrs: noinline nounwind uwtable +; FNATTR-NEXT: define void @scc2(i32*) +; ATTRIBUTOR: Function Attrs: noinline nounwind uwtable +; ATTRIBUTOR-NOT: "nosync" +; ATTRIBUTOR-NEXT: define void @scc2(i32*) +define void @scc2(i32*) noinline nounwind uwtable { + tail call i32 @scc1(i32* %0); + ret void; +} + +; TEST 11 - fences, negative +; std::atomic flag(false); +; int a; +; +; void func1(){ + ; a = 100; +; atomic_thread_fence(std::memory_order_release); +; flag.store(true, std::memory_order_relaxed); +; } +; +; void foo(){ +; while(!flag.load(std::memory_order_relaxed)) +; ; +; +; atomic_thread_fence(std::memory_order_acquire); +; int b = a; +; } + +%"struct.std::atomic" = type { %"struct.std::__atomic_base" } +%"struct.std::__atomic_base" = type { i8 } + +; FNATTR: Function Attrs: norecurse nounwind uwtable +; FNATTR-NEXT: define void @foo1() +; ATTRIBUTOR: Function Attrs: norecurse nounwind uwtable +; ATTRIBUTOR-NOT: "nosync" +; ATTRIBUTOR-NEXT: define void @foo1() +define void @foo1(i32 *, %"struct.std::atomic"*) { + store i32 100, i32* %0, align 4 + fence release + %3 = getelementptr inbounds %"struct.std::atomic", %"struct.std::atomic"* %1, i64 0, i32 0, i32 0 + store atomic i8 1, i8* %3 monotonic, align 1 + ret void +} + +; FNATTR: Function Attrs: norecurse nounwind uwtable +; FNATTR-NEXT: define void @bar() +; ATTRIBUTOR: Function Attrs: norecurse nounwind uwtable +; ATTRIBUTOR-NOT: "nosync" +; ATTRIBUTOR-NEXT: define void @bar() +define void @bar(i32 *, %"struct.std::atomic"*) { + %3 = getelementptr inbounds %"struct.std::atomic", %"struct.std::atomic"* %1, i64 0, i32 0, i32 0 + br label %4 + +4: ; preds = %4, %2 + %5 = load atomic i8, i8* %3 monotonic, align 1 + %6 = and i8 %5, 1 + %7 = icmp eq i8 %6, 0 + br i1 %7, label %4, label %8 + +8: ; preds = %4 + fence acquire + ret void +}