Index: include/llvm/Analysis/AliasSetTracker.h =================================================================== --- include/llvm/Analysis/AliasSetTracker.h +++ include/llvm/Analysis/AliasSetTracker.h @@ -52,9 +52,13 @@ PointerRec **PrevInList = nullptr; PointerRec *NextInList = nullptr; AliasSet *AS = nullptr; - LocationSize Size = 0; + LocationSize Size = LocationSize::mapEmpty(); AAMDNodes AAInfo; + // Whether the size for this record has been set at all. This makes no + // guarantees about the size being known. + bool isSizeSet() const { return Size != LocationSize::mapEmpty(); } + public: PointerRec(Value *V) : Val(V), AAInfo(DenseMapInfo::getEmptyKey()) {} @@ -71,9 +75,10 @@ bool updateSizeAndAAInfo(LocationSize NewSize, const AAMDNodes &NewAAInfo) { bool SizeChanged = false; - if (NewSize > Size) { - Size = NewSize; - SizeChanged = true; + if (NewSize != Size) { + LocationSize OldSize = Size; + Size = isSizeSet() ? Size.combineWith(NewSize) : NewSize; + SizeChanged = OldSize != Size; } if (AAInfo == DenseMapInfo::getEmptyKey()) @@ -91,7 +96,10 @@ return SizeChanged; } - LocationSize getSize() const { return Size; } + LocationSize getSize() const { + assert(isSizeSet() && "Getting an unset size!"); + return Size; + } /// Return the AAInfo, or null if there is no information or conflicting /// information. Index: include/llvm/Analysis/MemoryLocation.h =================================================================== --- include/llvm/Analysis/MemoryLocation.h +++ include/llvm/Analysis/MemoryLocation.h @@ -34,8 +34,137 @@ class TargetLibraryInfo; // Represents the size of a MemoryLocation. Logically, it's an -// Optional, with a special UnknownSize value from `MemoryLocation`. -using LocationSize = uint64_t; +// Optional that also carries a bit to represent whether the integer +// it contains, N, is 'precise'. Precise, in this context, means that we know +// that the MemoryLocation we're referencing has to be at least N bytes large. +// +// An example of a precise MemoryLocation is (%p, 4) in +// store i32 0, i32* %p +// +// Since we know that %p must be at least 4 bytes large at this point. +// Otherwise, we have UB. An example of an imprecise MemoryLocation is (%p, 4) +// at the memcpy in +// +// %n = select i1 %foo, i64 1, i64 4 +// call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %baz, i64 %n, i32 1, +// i1 false) +// +// ...Since we'll copy *up to* 4 bytes into %p, but we can't guarantee that +// we'll ever actually do so. +// +// If asked to represent a pathologically large value, this will degrade to +// None. +class LocationSize { + enum : uint64_t { + Unknown = ~uint64_t(0), + ImpreciseBit = uint64_t(1) << 63, + MapEmpty = Unknown - 1, + MapTombstone = Unknown - 2, + + // The maximum value we can represent without falling back to 'unknown'. + MaxValue = (MapTombstone - 1) & ~ImpreciseBit, + }; + + uint64_t Value; + + // Hack to support implicit construction. This should disappear when the + // public LocationSize ctor goes away. + enum DirectConstruction { Direct }; + + constexpr LocationSize(uint64_t Raw, DirectConstruction): Value(Raw) {} + + static_assert(Unknown & ImpreciseBit, "Unknown is imprecise by definition."); +public: + // FIXME: Migrate all users to construct via either `precise` or `upperBound`, + // to make it more obvious at the callsite the kind of size that they're + // providing. + // + // Since the overwhelming majority of users of this provide precise values, + // this assumes the provided value is precise. + constexpr LocationSize(uint64_t Raw) + : Value(Raw > MaxValue ? Unknown : Raw) {} + + static LocationSize precise(uint64_t Value) { + if (LLVM_UNLIKELY(Value > MaxValue)) + return unknown(); + return LocationSize(Value, Direct); + } + + static LocationSize upperBound(uint64_t Value) { + // You can't go lower than 0, so give a precise result. + if (LLVM_UNLIKELY(Value == 0)) + return precise(0); + if (LLVM_UNLIKELY(Value > MaxValue)) + return unknown(); + return LocationSize(Value | ImpreciseBit, Direct); + } + + constexpr static LocationSize unknown() { + return LocationSize(Unknown, Direct); + } + + // Sentinel values, generally used for maps. + constexpr static LocationSize mapTombstone() { + return LocationSize(MapTombstone, Direct); + } + constexpr static LocationSize mapEmpty() { + return LocationSize(MapEmpty, Direct); + } + + // Returns a LocationSize that can sanely represent either `*this` or `Other`. + LocationSize combineWith(LocationSize Other) const { + if (Other == *this) + return *this; + + if (!hasValue() || !Other.hasValue()) + return unknown(); + + return upperBound(std::max(getValue(), Other.getValue())); + } + + bool hasValue() const { return Value != Unknown; } + uint64_t getValue() const { + assert(hasValue() && "Getting value from an unknown LocationSize!"); + return Value & ~ImpreciseBit; + } + + // Returns whether or not this value is precise. Note that if a value is + // precise, it's guaranteed to not be `unknown()`. + bool isPrecise() const { + return (Value & ImpreciseBit) == 0; + } + + bool isZero() const { return hasValue() && getValue() == 0; } + + bool operator==(const LocationSize &Other) const { + return Value == Other.Value; + } + + bool operator!=(const LocationSize &Other) const { + return !operator==(Other); + } + + bool equalsIgnoringPrecision(const LocationSize &Other) const { + // Special values (unknown+map values) are distinct even without the + // ImpreciseBit set. + return (Value & ~ImpreciseBit) == (Other.Value & ~ImpreciseBit); + } + + void print(raw_ostream &OS) const; + + // Ordering operators are not provided, since it's unclear if there's only one + // reasonable way to compare: + // - values that don't exist against values that do, and + // - precise values to imprecise values + + // Returns an opaque value that represents this LocationSize. + uint64_t toRaw() const { return Value; } +}; + +inline raw_ostream &operator<<(raw_ostream &OS, LocationSize Size) { + Size.print(OS); + return OS; +} /// Representation for a specific memory location. /// @@ -139,13 +268,30 @@ } }; -// Specialize DenseMapInfo for MemoryLocation. +// Specialize DenseMapInfo. +template <> struct DenseMapInfo { + static inline LocationSize getEmptyKey() { + return LocationSize::mapEmpty(); + } + static inline LocationSize getTombstoneKey() { + return LocationSize::mapTombstone(); + } + static unsigned getHashValue(const LocationSize &Val) { + return DenseMapInfo::getHashValue(Val.toRaw()); + } + static bool isEqual(const LocationSize &LHS, const LocationSize &RHS) { + return LHS == RHS; + } +}; + template <> struct DenseMapInfo { static inline MemoryLocation getEmptyKey() { - return MemoryLocation(DenseMapInfo::getEmptyKey(), 0); + return MemoryLocation(DenseMapInfo::getEmptyKey(), + DenseMapInfo::getEmptyKey()); } static inline MemoryLocation getTombstoneKey() { - return MemoryLocation(DenseMapInfo::getTombstoneKey(), 0); + return MemoryLocation(DenseMapInfo::getTombstoneKey(), + DenseMapInfo::getTombstoneKey()); } static unsigned getHashValue(const MemoryLocation &Val) { return DenseMapInfo::getHashValue(Val.Ptr) ^ Index: lib/Analysis/BasicAliasAnalysis.cpp =================================================================== --- lib/Analysis/BasicAliasAnalysis.cpp +++ lib/Analysis/BasicAliasAnalysis.cpp @@ -992,9 +992,9 @@ /// Provide ad-hoc rules to disambiguate accesses through two GEP operators, /// both having the exact same pointer operand. static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1, - LocationSize V1Size, + LocationSize MaybeV1Size, const GEPOperator *GEP2, - LocationSize V2Size, + LocationSize MaybeV2Size, const DataLayout &DL) { assert(GEP1->getPointerOperand()->stripPointerCastsAndInvariantGroups() == GEP2->getPointerOperand()->stripPointerCastsAndInvariantGroups() && @@ -1010,10 +1010,13 @@ // If we don't know the size of the accesses through both GEPs, we can't // determine whether the struct fields accessed can't alias. - if (V1Size == MemoryLocation::UnknownSize || - V2Size == MemoryLocation::UnknownSize) + if (MaybeV1Size == MemoryLocation::UnknownSize || + MaybeV2Size == MemoryLocation::UnknownSize) return MayAlias; + const uint64_t V1Size = MaybeV1Size.getValue(); + const uint64_t V2Size = MaybeV2Size.getValue(); + ConstantInt *C1 = dyn_cast(GEP1->getOperand(GEP1->getNumOperands() - 1)); ConstantInt *C2 = @@ -1170,11 +1173,14 @@ // than (%alloca - 1), and so is not inbounds, a contradiction. bool BasicAAResult::isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp, const DecomposedGEP &DecompGEP, const DecomposedGEP &DecompObject, - LocationSize ObjectAccessSize) { + LocationSize MaybeObjectAccessSize) { // If the object access size is unknown, or the GEP isn't inbounds, bail. - if (ObjectAccessSize == MemoryLocation::UnknownSize || !GEPOp->isInBounds()) + if (MaybeObjectAccessSize == MemoryLocation::UnknownSize || + !GEPOp->isInBounds()) return false; + const auto ObjectAccessSize = int64_t(MaybeObjectAccessSize.getValue()); + // We need the object to be an alloca or a globalvariable, and want to know // the offset of the pointer from the object precisely, so no variable // indices are allowed. @@ -1195,7 +1201,7 @@ int64_t GEPBaseOffset = DecompGEP.StructOffset; GEPBaseOffset += DecompGEP.OtherOffset; - return (GEPBaseOffset >= ObjectBaseOffset + (int64_t)ObjectAccessSize); + return GEPBaseOffset >= ObjectBaseOffset + ObjectAccessSize; } /// Provides a bunch of ad-hoc rules to disambiguate a GEP instruction against @@ -1337,7 +1343,7 @@ if (GEP1BaseOffset != 0 && DecompGEP1.VarIndices.empty()) { if (GEP1BaseOffset >= 0) { if (V2Size != MemoryLocation::UnknownSize) { - if ((uint64_t)GEP1BaseOffset < V2Size) + if ((uint64_t)GEP1BaseOffset < V2Size.getValue()) return PartialAlias; return NoAlias; } @@ -1352,7 +1358,7 @@ // stripped a gep with negative index ('gep , -1, ...). if (V1Size != MemoryLocation::UnknownSize && V2Size != MemoryLocation::UnknownSize) { - if (-(uint64_t)GEP1BaseOffset < V1Size) + if (-(uint64_t)GEP1BaseOffset < V1Size.getValue()) return PartialAlias; return NoAlias; } @@ -1402,14 +1408,16 @@ // two locations do not alias. uint64_t ModOffset = (uint64_t)GEP1BaseOffset & (Modulo - 1); if (V1Size != MemoryLocation::UnknownSize && - V2Size != MemoryLocation::UnknownSize && ModOffset >= V2Size && - V1Size <= Modulo - ModOffset) + V2Size != MemoryLocation::UnknownSize && + ModOffset >= V2Size.getValue() && + V1Size.getValue() <= Modulo - ModOffset) return NoAlias; // If we know all the variables are positive, then GEP1 >= GEP1BasePtr. // If GEP1BasePtr > V2 (GEP1BaseOffset > 0) then we know the pointers // don't alias if V2Size can fit in the gap between V2 and GEP1BasePtr. - if (AllPositive && GEP1BaseOffset > 0 && V2Size <= (uint64_t)GEP1BaseOffset) + if (AllPositive && GEP1BaseOffset > 0 && V2Size.hasValue() && + V2Size.getValue() <= (uint64_t)GEP1BaseOffset) return NoAlias; if (constantOffsetHeuristic(DecompGEP1.VarIndices, V1Size, V2Size, @@ -1660,10 +1668,10 @@ // If the size of one access is larger than the entire object on the other // side, then we know such behavior is undefined and can assume no alias. - if ((V1Size != MemoryLocation::UnknownSize && - isObjectSmallerThan(O2, V1Size, DL, TLI)) || - (V2Size != MemoryLocation::UnknownSize && - isObjectSmallerThan(O1, V2Size, DL, TLI))) + if ((V1Size.isPrecise() && + isObjectSmallerThan(O2, V1Size.getValue(), DL, TLI)) || + (V2Size.isPrecise() && + isObjectSmallerThan(O1, V2Size.getValue(), DL, TLI))) return NoAlias; // Check the cache before climbing up use-def chains. This also terminates @@ -1721,10 +1729,9 @@ // If both pointers are pointing into the same object and one of them // accesses the entire object, then the accesses must overlap in some way. if (O1 == O2) - if (V1Size != MemoryLocation::UnknownSize && - V2Size != MemoryLocation::UnknownSize && - (isObjectSize(O1, V1Size, DL, TLI) || - isObjectSize(O2, V2Size, DL, TLI))) + if (V1Size.isPrecise() && V2Size.isPrecise() && + (isObjectSize(O1, V1Size.getValue(), DL, TLI) || + isObjectSize(O2, V2Size.getValue(), DL, TLI))) return AliasCache[Locs] = PartialAlias; // Recurse back into the best AA results we have, potentially with refined @@ -1807,13 +1814,16 @@ } bool BasicAAResult::constantOffsetHeuristic( - const SmallVectorImpl &VarIndices, LocationSize V1Size, - LocationSize V2Size, int64_t BaseOffset, AssumptionCache *AC, - DominatorTree *DT) { - if (VarIndices.size() != 2 || V1Size == MemoryLocation::UnknownSize || - V2Size == MemoryLocation::UnknownSize) + const SmallVectorImpl &VarIndices, + LocationSize MaybeV1Size, LocationSize MaybeV2Size, int64_t BaseOffset, + AssumptionCache *AC, DominatorTree *DT) { + if (VarIndices.size() != 2 || MaybeV1Size == MemoryLocation::UnknownSize || + MaybeV2Size == MemoryLocation::UnknownSize) return false; + const uint64_t V1Size = MaybeV1Size.getValue(); + const uint64_t V2Size = MaybeV2Size.getValue(); + const VariableGEPIndex &Var0 = VarIndices[0], &Var1 = VarIndices[1]; if (Var0.ZExtBits != Var1.ZExtBits || Var0.SExtBits != Var1.SExtBits || Index: lib/Analysis/CFLAndersAliasAnalysis.cpp =================================================================== --- lib/Analysis/CFLAndersAliasAnalysis.cpp +++ lib/Analysis/CFLAndersAliasAnalysis.cpp @@ -562,6 +562,10 @@ RHSSize == MemoryLocation::UnknownSize) return true; + assert(LHSSize.getValue() <= INT64_MAX && + RHSSize.getValue() <= INT64_MAX && + "INT64_MAX is less than a uint63_t's max?"); + for (const auto &OVal : make_range(RangePair)) { // Be conservative about UnknownOffset if (OVal.Offset == UnknownOffset) @@ -572,15 +576,11 @@ // range-overlap queries over two ranges [OVal.Offset, OVal.Offset + // LHSSize) and [0, RHSSize). - // Try to be conservative on super large offsets - if (LLVM_UNLIKELY(LHSSize > INT64_MAX || RHSSize > INT64_MAX)) - return true; - - auto LHSStart = OVal.Offset; + int64_t LHSStart = OVal.Offset; // FIXME: Do we need to guard against integer overflow? - auto LHSEnd = OVal.Offset + static_cast(LHSSize); - auto RHSStart = 0; - auto RHSEnd = static_cast(RHSSize); + int64_t LHSEnd = OVal.Offset + static_cast(LHSSize.getValue()); + int64_t RHSStart = 0; + auto RHSEnd = static_cast(RHSSize.getValue()); if (LHSEnd > RHSStart && LHSStart < RHSEnd) return true; } Index: lib/Analysis/MemoryDependenceAnalysis.cpp =================================================================== --- lib/Analysis/MemoryDependenceAnalysis.cpp +++ lib/Analysis/MemoryDependenceAnalysis.cpp @@ -1112,21 +1112,33 @@ // If we already have a cache entry for this CacheKey, we may need to do some // work to reconcile the cache entry and the current query. if (!Pair.second) { - if (CacheInfo->Size < Loc.Size) { - // The query's Size is greater than the cached one. Throw out the - // cached data and proceed with the query at the greater size. - CacheInfo->Pair = BBSkipFirstBlockPair(); - CacheInfo->Size = Loc.Size; - for (auto &Entry : CacheInfo->NonLocalDeps) - if (Instruction *Inst = Entry.getResult().getInst()) - RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey); - CacheInfo->NonLocalDeps.clear(); - } else if (CacheInfo->Size > Loc.Size) { - // This query's Size is less than the cached one. Conservatively restart - // the query using the greater size. - return getNonLocalPointerDepFromBB( - QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad, - StartBB, Result, Visited, SkipFirstBlock); + if (!CacheInfo->Size.equalsIgnoringPrecision(Loc.Size)) { + // For our purposes, unknown size > all others. + bool CachedSizeIsGreater; + if (CacheInfo->Size.hasValue() != Loc.Size.hasValue()) { + CachedSizeIsGreater = Loc.Size.hasValue(); + } else { + assert(CacheInfo->Size.hasValue() && Loc.Size.hasValue() && + "Unequal sizes, but both are None?"); + CachedSizeIsGreater = CacheInfo->Size.getValue() > Loc.Size.getValue(); + } + + if (!CachedSizeIsGreater) { + // The query's Size is greater than the cached one. Throw out the + // cached data and proceed with the query at the greater size. + CacheInfo->Pair = BBSkipFirstBlockPair(); + CacheInfo->Size = Loc.Size; + for (auto &Entry : CacheInfo->NonLocalDeps) + if (Instruction *Inst = Entry.getResult().getInst()) + RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey); + CacheInfo->NonLocalDeps.clear(); + } else { + // This query's Size is less than the cached one. Conservatively restart + // the query using the greater size. + return getNonLocalPointerDepFromBB( + QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad, + StartBB, Result, Visited, SkipFirstBlock); + } } // If the query's AATags are inconsistent with the cached one, Index: lib/Analysis/MemoryLocation.cpp =================================================================== --- lib/Analysis/MemoryLocation.cpp +++ lib/Analysis/MemoryLocation.cpp @@ -18,6 +18,20 @@ #include "llvm/IR/Type.h" using namespace llvm; +void LocationSize::print(raw_ostream &OS) const { + OS << "LocationSize::"; + if (*this == unknown()) + OS << "unknown"; + else if (*this == mapEmpty()) + OS << "mapEmpty"; + else if (*this == mapTombstone()) + OS << "mapTombstone"; + else if (isPrecise()) + OS << "precise(" << getValue() << ')'; + else + OS << "upperBound(" << getValue() << ')'; +} + MemoryLocation MemoryLocation::get(const LoadInst *LI) { AAMDNodes AATags; LI->getAAMetadata(AATags); Index: lib/Analysis/ScalarEvolutionAliasAnalysis.cpp =================================================================== --- lib/Analysis/ScalarEvolutionAliasAnalysis.cpp +++ lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -27,7 +27,7 @@ // If either of the memory references is empty, it doesn't matter what the // pointer values are. This allows the code below to ignore this special // case. - if (LocA.Size == 0 || LocB.Size == 0) + if (LocA.Size.isZero() || LocB.Size.isZero()) return NoAlias; // This is SCEVAAResult. Get the SCEVs! @@ -40,11 +40,12 @@ // If something is known about the difference between the two addresses, // see if it's enough to prove a NoAlias. - if (SE.getEffectiveSCEVType(AS->getType()) == - SE.getEffectiveSCEVType(BS->getType())) { + if (LocA.Size.hasValue() && LocB.Size.hasValue() && + SE.getEffectiveSCEVType(AS->getType()) == + SE.getEffectiveSCEVType(BS->getType())) { unsigned BitWidth = SE.getTypeSizeInBits(AS->getType()); - APInt ASizeInt(BitWidth, LocA.Size); - APInt BSizeInt(BitWidth, LocB.Size); + APInt ASizeInt(BitWidth, LocA.Size.getValue()); + APInt BSizeInt(BitWidth, LocB.Size.getValue()); // Compute the difference between the two pointers. const SCEV *BA = SE.getMinusSCEV(BS, AS); Index: lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- lib/Transforms/Scalar/DeadStoreElimination.cpp +++ lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -344,11 +344,14 @@ Instruction *DepWrite, InstOverlapIntervalsTy &IOL, AliasAnalysis &AA) { - // If we don't know the sizes of either access, then we can't do a comparison. - if (Later.Size == MemoryLocation::UnknownSize || - Earlier.Size == MemoryLocation::UnknownSize) + // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll + // get imprecise values here, though (except for unknown sizes). + if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) return OW_Unknown; + uint64_t LaterSize = Later.Size.getValue(); + uint64_t EarlierSize = Earlier.Size.getValue(); + const Value *P1 = Earlier.Ptr->stripPointerCasts(); const Value *P2 = Later.Ptr->stripPointerCasts(); @@ -356,7 +359,7 @@ // the later store was larger than the earlier store. if (P1 == P2 || AA.isMustAlias(P1, P2)) { // Make sure that the Later size is >= the Earlier size. - if (Later.Size >= Earlier.Size) + if (LaterSize >= EarlierSize) return OW_Complete; } @@ -374,7 +377,7 @@ // If the "Later" store is to a recognizable object, get its size. uint64_t ObjectSize = getPointerSize(UO2, DL, TLI); if (ObjectSize != MemoryLocation::UnknownSize) - if (ObjectSize == Later.Size && ObjectSize >= Earlier.Size) + if (ObjectSize == LaterSize && ObjectSize >= EarlierSize) return OW_Complete; // Okay, we have stores to two completely different pointers. Try to @@ -405,8 +408,8 @@ // // We have to be careful here as *Off is signed while *.Size is unsigned. if (EarlierOff >= LaterOff && - Later.Size >= Earlier.Size && - uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size) + LaterSize >= EarlierSize && + uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize) return OW_Complete; // We may now overlap, although the overlap is not complete. There might also @@ -415,21 +418,21 @@ // Note: The correctness of this logic depends on the fact that this function // is not even called providing DepWrite when there are any intervening reads. if (EnablePartialOverwriteTracking && - LaterOff < int64_t(EarlierOff + Earlier.Size) && - int64_t(LaterOff + Later.Size) >= EarlierOff) { + LaterOff < int64_t(EarlierOff + EarlierSize) && + int64_t(LaterOff + LaterSize) >= EarlierOff) { // Insert our part of the overlap into the map. auto &IM = IOL[DepWrite]; LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: Earlier [" << EarlierOff - << ", " << int64_t(EarlierOff + Earlier.Size) + << ", " << int64_t(EarlierOff + EarlierSize) << ") Later [" << LaterOff << ", " - << int64_t(LaterOff + Later.Size) << ")\n"); + << int64_t(LaterOff + LaterSize) << ")\n"); // Make sure that we only insert non-overlapping intervals and combine // adjacent intervals. The intervals are stored in the map with the ending // offset as the key (in the half-open sense) and the starting offset as // the value. - int64_t LaterIntStart = LaterOff, LaterIntEnd = LaterOff + Later.Size; + int64_t LaterIntStart = LaterOff, LaterIntEnd = LaterOff + LaterSize; // Find any intervals ending at, or after, LaterIntStart which start // before LaterIntEnd. @@ -459,10 +462,10 @@ ILI = IM.begin(); if (ILI->second <= EarlierOff && - ILI->first >= int64_t(EarlierOff + Earlier.Size)) { + ILI->first >= int64_t(EarlierOff + EarlierSize)) { LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: Earlier [" << EarlierOff << ", " - << int64_t(EarlierOff + Earlier.Size) + << int64_t(EarlierOff + EarlierSize) << ") Composite Later [" << ILI->second << ", " << ILI->first << ")\n"); ++NumCompletePartials; @@ -473,13 +476,13 @@ // Check for an earlier store which writes to all the memory locations that // the later store writes to. if (EnablePartialStoreMerging && LaterOff >= EarlierOff && - int64_t(EarlierOff + Earlier.Size) > LaterOff && - uint64_t(LaterOff - EarlierOff) + Later.Size <= Earlier.Size) { + int64_t(EarlierOff + EarlierSize) > LaterOff && + uint64_t(LaterOff - EarlierOff) + LaterSize <= EarlierSize) { LLVM_DEBUG(dbgs() << "DSE: Partial overwrite an earlier load [" << EarlierOff << ", " - << int64_t(EarlierOff + Earlier.Size) + << int64_t(EarlierOff + EarlierSize) << ") by a later store [" << LaterOff << ", " - << int64_t(LaterOff + Later.Size) << ")\n"); + << int64_t(LaterOff + LaterSize) << ")\n"); // TODO: Maybe come up with a better name? return OW_PartialEarlierWithFullLater; } @@ -493,8 +496,8 @@ // In this case we may want to trim the size of earlier to avoid generating // writes to addresses which will definitely be overwritten later if (!EnablePartialOverwriteTracking && - (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + Earlier.Size) && - int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size))) + (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + EarlierSize) && + int64_t(LaterOff + LaterSize) >= int64_t(EarlierOff + EarlierSize))) return OW_End; // Finally, we also need to check if the later store overwrites the beginning @@ -507,9 +510,8 @@ // of earlier to avoid generating writes to addresses which will definitely // be overwritten later. if (!EnablePartialOverwriteTracking && - (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff)) { - assert(int64_t(LaterOff + Later.Size) < - int64_t(EarlierOff + Earlier.Size) && + (LaterOff <= EarlierOff && int64_t(LaterOff + LaterSize) > EarlierOff)) { + assert(int64_t(LaterOff + LaterSize) < int64_t(EarlierOff + EarlierSize) && "Expect to be handled as OW_Complete"); return OW_Begin; } @@ -995,11 +997,10 @@ Instruction *EarlierWrite = OI.first; MemoryLocation Loc = getLocForWrite(EarlierWrite); assert(isRemovable(EarlierWrite) && "Expect only removable instruction"); - assert(Loc.Size != MemoryLocation::UnknownSize && "Unexpected mem loc"); + auto EarlierSize = int64_t(Loc.Size.getValue()); const Value *Ptr = Loc.Ptr->stripPointerCasts(); int64_t EarlierStart = 0; - int64_t EarlierSize = int64_t(Loc.Size); GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL); OverlapIntervalsTy &IntervalMap = OI.second; Changed |= @@ -1195,8 +1196,9 @@ assert(!EnablePartialOverwriteTracking && "Do not expect to perform " "when partial-overwrite " "tracking is enabled"); - int64_t EarlierSize = DepLoc.Size; - int64_t LaterSize = Loc.Size; + // The overwrite result is known, so these must be known, too. + int64_t EarlierSize = DepLoc.Size.getValue(); + int64_t LaterSize = Loc.Size.getValue(); bool IsOverwriteEnd = (OR == OW_End); MadeChange |= tryToShorten(DepWrite, DepWriteOffset, EarlierSize, InstWriteOffset, LaterSize, IsOverwriteEnd); Index: test/Analysis/AliasSet/intrinsics.ll =================================================================== --- test/Analysis/AliasSet/intrinsics.ll +++ test/Analysis/AliasSet/intrinsics.ll @@ -2,9 +2,9 @@ ; CHECK: Alias sets for function 'test1': ; CHECK: Alias Set Tracker: 2 alias sets for 2 pointer values. -; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %a, 1) +; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %a, LocationSize::precise(1)) ; CHECK-NOT: 1 Unknown instruction -; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %b, 1) +; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %b, LocationSize::precise(1)) define void @test1(i32 %c) { entry: %a = alloca i8, align 1 Index: test/Analysis/AliasSet/memtransfer.ll =================================================================== --- test/Analysis/AliasSet/memtransfer.ll +++ test/Analysis/AliasSet/memtransfer.ll @@ -5,10 +5,10 @@ ; CHECK: Alias sets for function 'test1': ; CHECK: Alias Set Tracker: 3 alias sets for 4 pointer values. -; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %a, 1) +; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %a, LocationSize::precise(1)) ; CHECK-NOT: 1 Unknown instructions -; CHECK: AliasSet[0x{{[0-9a-f]+}}, 2] may alias, Mod/Ref Pointers: (i8* %s, 1), (i8* %d, 1) -; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %b, 1) +; CHECK: AliasSet[0x{{[0-9a-f]+}}, 2] may alias, Mod/Ref Pointers: (i8* %s, LocationSize::precise(1)), (i8* %d, LocationSize::precise(1)) +; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %b, LocationSize::precise(1)) define void @test1(i8* %s, i8* %d) { entry: %a = alloca i8, align 1 @@ -21,10 +21,10 @@ ; CHECK: Alias sets for function 'test2': ; CHECK: Alias Set Tracker: 3 alias sets for 4 pointer values. -; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %a, 1) +; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %a, LocationSize::precise(1)) ; CHECK-NOT: 1 Unknown instructions -; CHECK: AliasSet[0x{{[0-9a-f]+}}, 2] may alias, Mod/Ref [volatile] Pointers: (i8* %s, 1), (i8* %d, 1) -; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %b, 1) +; CHECK: AliasSet[0x{{[0-9a-f]+}}, 2] may alias, Mod/Ref [volatile] Pointers: (i8* %s, LocationSize::precise(1)), (i8* %d, LocationSize::precise(1)) +; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %b, LocationSize::precise(1)) define void @test2(i8* %s, i8* %d) { entry: %a = alloca i8, align 1 @@ -37,10 +37,10 @@ ; CHECK: Alias sets for function 'test3': ; CHECK: Alias Set Tracker: 3 alias sets for 4 pointer values. -; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %a, 1) +; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %a, LocationSize::precise(1)) ; CHECK-NOT: 1 Unknown instructions -; CHECK: AliasSet[0x{{[0-9a-f]+}}, 2] may alias, Mod/Ref Pointers: (i8* %s, 1), (i8* %d, 1) -; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %b, 1) +; CHECK: AliasSet[0x{{[0-9a-f]+}}, 2] may alias, Mod/Ref Pointers: (i8* %s, LocationSize::precise(1)), (i8* %d, LocationSize::precise(1)) +; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %b, LocationSize::precise(1)) define void @test3(i8* %s, i8* %d) { entry: %a = alloca i8, align 1 @@ -53,10 +53,10 @@ ; CHECK: Alias sets for function 'test4': ; CHECK: Alias Set Tracker: 3 alias sets for 4 pointer values. -; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %a, 1) +; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %a, LocationSize::precise(1)) ; CHECK-NOT: 1 Unknown instructions -; CHECK: AliasSet[0x{{[0-9a-f]+}}, 2] may alias, Mod/Ref [volatile] Pointers: (i8* %s, 1), (i8* %d, 1) -; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %b, 1) +; CHECK: AliasSet[0x{{[0-9a-f]+}}, 2] may alias, Mod/Ref [volatile] Pointers: (i8* %s, LocationSize::precise(1)), (i8* %d, LocationSize::precise(1)) +; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %b, LocationSize::precise(1)) define void @test4(i8* %s, i8* %d) { entry: %a = alloca i8, align 1 @@ -69,8 +69,8 @@ ; CHECK: Alias sets for function 'test5': ; CHECK: Alias Set Tracker: 2 alias sets for 2 pointer values. -; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod/Ref Pointers: (i8* %a, 1) -; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %b, 1) +; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod/Ref Pointers: (i8* %a, LocationSize::precise(1)) +; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %b, LocationSize::precise(1)) define void @test5() { entry: %a = alloca i8, align 1 @@ -83,8 +83,8 @@ ; CHECK: Alias sets for function 'test6': ; CHECK: Alias Set Tracker: 2 alias sets for 2 pointer values. -; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod/Ref Pointers: (i8* %a, 1) -; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %b, 1) +; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod/Ref Pointers: (i8* %a, LocationSize::precise(1)) +; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod Pointers: (i8* %b, LocationSize::precise(1)) define void @test6() { entry: %a = alloca i8, align 1 @@ -97,8 +97,8 @@ ; CHECK: Alias sets for function 'test7': ; CHECK: Alias Set Tracker: 2 alias sets for 2 pointer values. -; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod/Ref Pointers: (i8* %a, 1) -; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod/Ref Pointers: (i8* %b, 1) +; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod/Ref Pointers: (i8* %a, LocationSize::precise(1)) +; CHECK: AliasSet[0x{{[0-9a-f]+}}, 1] must alias, Mod/Ref Pointers: (i8* %b, LocationSize::precise(1)) define void @test7() { entry: %a = alloca i8, align 1 Index: test/Analysis/AliasSet/saturation.ll =================================================================== --- test/Analysis/AliasSet/saturation.ll +++ test/Analysis/AliasSet/saturation.ll @@ -2,10 +2,10 @@ ; RUN: opt -basicaa -print-alias-sets -alias-set-saturation-threshold=1 -S -o - < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=SAT ; CHECK-LABEL: 'allmust' -; CHECK: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (i32* %a, 4) -; CHECK: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (i32* %b, 4) -; CHECK: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (i32* %c, 4) -; CHECK: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (i32* %d, 4) +; CHECK: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (i32* %a, LocationSize::precise(4)) +; CHECK: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (i32* %b, LocationSize::precise(4)) +; CHECK: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (i32* %c, LocationSize::precise(4)) +; CHECK: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (i32* %d, LocationSize::precise(4)) define void @allmust() { %a = alloca i32 %b = alloca i32 @@ -19,11 +19,11 @@ } ; CHECK-LABEL :'mergemay' -; NOSAT: AliasSet[{{.*}}, 2] may alias, Mod Pointers: (i32* %a, 4), (i32* %a1, 4) -; NOSAT: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (i32* %b, 4) +; NOSAT: AliasSet[{{.*}}, 2] may alias, Mod Pointers: (i32* %a, LocationSize::precise(4)), (i32* %a1, LocationSize::precise(4)) +; NOSAT: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (i32* %b, LocationSize::precise(4)) ; SAT: AliasSet[{{.*}}, 2] may alias, Mod forwarding to 0x[[FWD:[0-9a-f]*]] ; SAT: AliasSet[{{.*}}, 1] must alias, Mod forwarding to 0x[[FWD]] -; SAT: AliasSet[0x[[FWD]], 2] may alias, Mod/Ref Pointers: (i32* %a, 4), (i32* %a1, 4), (i32* %b, 4) +; SAT: AliasSet[0x[[FWD]], 2] may alias, Mod/Ref Pointers: (i32* %a, LocationSize::precise(4)), (i32* %a1, LocationSize::precise(4)), (i32* %b, LocationSize::precise(4)) define void @mergemay(i32 %k) { %a = alloca i32 %b = alloca i32 @@ -35,13 +35,13 @@ } ; CHECK-LABEL: 'mergemust' -; NOSAT: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (i32* %a, 4) -; NOSAT: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (i32* %b, 4) -; NOSAT: AliasSet[{{.*}}, 2] may alias, Mod Pointers: (i32* %c, 4), (i32* %d, 4) +; NOSAT: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (i32* %a, LocationSize::precise(4)) +; NOSAT: AliasSet[{{.*}}, 1] must alias, Mod Pointers: (i32* %b, LocationSize::precise(4)) +; NOSAT: AliasSet[{{.*}}, 2] may alias, Mod Pointers: (i32* %c, LocationSize::precise(4)), (i32* %d, LocationSize::precise(4)) ; SAT: AliasSet[{{.*}}, 1] must alias, Mod forwarding to 0x[[FWD:[0-9a-f]*]] ; SAT: AliasSet[{{.*}}, 1] must alias, Mod forwarding to 0x[[FWD]] ; SAT: AliasSet[{{.*}}, 2] may alias, Mod forwarding to 0x[[FWD]] -; SAT: AliasSet[0x[[FWD]], 3] may alias, Mod/Ref Pointers: (i32* %a, 4), (i32* %b, 4), (i32* %c, 4), (i32* %d, 4) +; SAT: AliasSet[0x[[FWD]], 3] may alias, Mod/Ref Pointers: (i32* %a, LocationSize::precise(4)), (i32* %b, LocationSize::precise(4)), (i32* %c, LocationSize::precise(4)), (i32* %d, LocationSize::precise(4)) define void @mergemust(i32* %c, i32* %d) { %a = alloca i32 %b = alloca i32 Index: test/Transforms/LICM/pr36228.ll =================================================================== --- /dev/null +++ test/Transforms/LICM/pr36228.ll @@ -0,0 +1,40 @@ +; RUN: opt -S -licm -o - %s | FileCheck %s +; +; Be sure that we don't hoist loads incorrectly if a loop has conditional UB. +; See PR36228. + +declare void @check(i8) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) + +; CHECK-LABEL: define void @buggy +define void @buggy(i8* %src, i1* %kOne) { +entry: + %dst = alloca [1 x i8], align 1 + %0 = getelementptr inbounds [1 x i8], [1 x i8]* %dst, i64 0, i64 0 + store i8 42, i8* %0, align 1 + %src16 = bitcast i8* %src to i16* + %srcval = load i16, i16* %src16 + br label %while.cond + +while.cond: ; preds = %if.end, %entry + %dp.0 = phi i8* [ %0, %entry ], [ %dp.1, %if.end ] + %1 = load volatile i1, i1* %kOne, align 4 + br i1 %1, label %if.else, label %if.then + +if.then: ; preds = %while.cond + store i8 9, i8* %dp.0, align 1 + br label %if.end + +if.else: ; preds = %while.cond + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dp.0, i8* %src, i64 2, i32 1, i1 false) + %dp.new = getelementptr inbounds i8, i8* %dp.0, i64 1 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %dp.1 = phi i8* [ %dp.0, %if.then ], [ %dp.new, %if.else ] + ; CHECK: %2 = load i8, i8* %0 + %2 = load i8, i8* %0, align 1 + ; CHECK-NEXT: call void @check(i8 %2) + call void @check(i8 %2) + br label %while.cond +}