Index: include/llvm/Analysis/BasicAliasAnalysis.h =================================================================== --- include/llvm/Analysis/BasicAliasAnalysis.h +++ include/llvm/Analysis/BasicAliasAnalysis.h @@ -110,7 +110,7 @@ unsigned ZExtBits; unsigned SExtBits; - int64_t Scale; + APInt Scale; bool operator==(const VariableGEPIndex &Other) const { return V == Other.V && ZExtBits == Other.ZExtBits && @@ -128,10 +128,10 @@ // Base pointer of the GEP const Value *Base; // Total constant offset w.r.t the base from indexing into structs - int64_t StructOffset; + APInt StructOffset; // Total constant offset w.r.t the base from indexing through // pointers/arrays/vectors - int64_t OtherOffset; + APInt OtherOffset; // Scaled variable (non-constant) indices. SmallVector VarIndices; }; @@ -183,7 +183,7 @@ /// the addition overflows. bool constantOffsetHeuristic(const SmallVectorImpl &VarIndices, - uint64_t V1Size, uint64_t V2Size, int64_t BaseOffset, + uint64_t V1Size, uint64_t V2Size, APInt BaseOffset, AssumptionCache *AC, DominatorTree *DT); bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2); Index: include/llvm/IR/DataLayout.h =================================================================== --- include/llvm/IR/DataLayout.h +++ include/llvm/IR/DataLayout.h @@ -323,6 +323,9 @@ /// the backends/clients are updated. unsigned getPointerSize(unsigned AS = 0) const; + /// Returns the maximum pointer size over all address spaces. + unsigned getMaxPointerSize() const; + /// Return the address spaces containing non-integral pointers. Pointers in /// this address space don't have a well-defined bitwise representation. ArrayRef getNonIntegralAddressSpaces() const { @@ -347,6 +350,11 @@ return getPointerSize(AS) * 8; } + /// Returns the maximum pointer size over all address spaces. + unsigned getMaxPointerSizeInBits() const { + return getMaxPointerSize() * 8; + } + /// Layout pointer size, in bits, based on the type. If this function is /// called with a pointer type, then the type size of the pointer is returned. /// If this function is called with a vector of pointers, then the type size Index: lib/Analysis/BasicAliasAnalysis.cpp =================================================================== --- lib/Analysis/BasicAliasAnalysis.cpp +++ lib/Analysis/BasicAliasAnalysis.cpp @@ -67,6 +67,16 @@ /// Enable analysis of recursive PHI nodes. static cl::opt EnableRecPhiAnalysis("basicaa-recphi", cl::Hidden, cl::init(false)); + +/// By default, even on 32-bit architectures we use 64-bit integers for +/// calculations. This will allow us to more-aggressively decompose indexing +/// expressions calculated using i64 values (e.g., long long in C) which is +/// common enough to worry about. +static cl::opt ForceAtLeast64Bits("basicaa-force-at-least-64b", + cl::Hidden, cl::init(true)); +static cl::opt DoubleCalcBits("basicaa-double-calc-bits", + cl::Hidden, cl::init(false)); + /// SearchLimitReached / SearchTimes shows how often the limit of /// to decompose GEPs is reached. It will affect the precision /// of basic alias analysis. @@ -359,13 +369,22 @@ } /// To ensure a pointer offset fits in an integer of size PointerSize -/// (in bits) when that size is smaller than 64. This is an issue in -/// particular for 32b programs with negative indices that rely on two's -/// complement wrap-arounds for precise alias information. -static int64_t adjustToPointerSize(int64_t Offset, unsigned PointerSize) { - assert(PointerSize <= 64 && "Invalid PointerSize!"); - unsigned ShiftBits = 64 - PointerSize; - return (int64_t)((uint64_t)Offset << ShiftBits) >> ShiftBits; +/// (in bits) when that size is smaller than the maximum pointer size. This is +/// an issue, for example, in particular for 32b pointers with negative indices +/// that rely on two's complement wrap-arounds for precise alias information +/// where the maximum pointer size is 64b. +static APInt adjustToPointerSize(APInt Offset, unsigned PointerSize) { + assert(PointerSize <= Offset.getBitWidth() && "Invalid PointerSize!"); + unsigned ShiftBits = Offset.getBitWidth() - PointerSize; + return (Offset << ShiftBits).ashr(ShiftBits); +} + +static unsigned getMaxPointerSize(const DataLayout &DL) { + unsigned MaxPointerSize = DL.getMaxPointerSizeInBits(); + if (MaxPointerSize < 64 && ForceAtLeast64Bits) MaxPointerSize = 64; + if (DoubleCalcBits) MaxPointerSize *= 2; + + return MaxPointerSize; } /// If V is a symbolic pointer expression, decompose it into a base pointer @@ -388,8 +407,7 @@ unsigned MaxLookup = MaxLookupSearchDepth; SearchTimes++; - Decomposed.StructOffset = 0; - Decomposed.OtherOffset = 0; + unsigned MaxPointerSize = getMaxPointerSize(DL); Decomposed.VarIndices.clear(); do { // See if this is a bitcast or GEP. @@ -469,13 +487,15 @@ if (CIdx->isZero()) continue; Decomposed.OtherOffset += - DL.getTypeAllocSize(GTI.getIndexedType()) * CIdx->getSExtValue(); + (DL.getTypeAllocSize(GTI.getIndexedType()) * + CIdx->getValue().sextOrSelf(MaxPointerSize)) + .sextOrTrunc(MaxPointerSize); continue; } GepHasConstantOffset = false; - uint64_t Scale = DL.getTypeAllocSize(GTI.getIndexedType()); + APInt Scale(MaxPointerSize, DL.getTypeAllocSize(GTI.getIndexedType())); unsigned ZExtBits = 0, SExtBits = 0; // If the integer type is smaller than the pointer size, it is implicitly @@ -487,13 +507,30 @@ // Use GetLinearExpression to decompose the index into a C1*V+C2 form. APInt IndexScale(Width, 0), IndexOffset(Width, 0); bool NSW = true, NUW = true; + const Value *OrigIndex = Index; Index = GetLinearExpression(Index, IndexScale, IndexOffset, ZExtBits, SExtBits, DL, 0, AC, DT, NSW, NUW); // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale. // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale. - Decomposed.OtherOffset += IndexOffset.getSExtValue() * Scale; - Scale *= IndexScale.getSExtValue(); + + // It can be the case that, even through C1*V+C2 does not overflow for + // relevant values of V, (C2*Scale) can overflow. In that case, we cannot + // decompose the expression in this way. + APInt WideScaledOffset = IndexOffset.sextOrTrunc(MaxPointerSize*2) * + Scale.sext(MaxPointerSize*2); + if (WideScaledOffset.getMinSignedBits() > MaxPointerSize) { + Index = OrigIndex; + IndexScale = 1; + IndexOffset = 0; + + ZExtBits = SExtBits = 0; + if (PointerSize > Width) + SExtBits += PointerSize - Width; + } else { + Decomposed.OtherOffset += IndexOffset.sextOrTrunc(MaxPointerSize) * Scale; + Scale *= IndexScale.sextOrTrunc(MaxPointerSize); + } // If we already had an occurrence of this index variable, merge this // scale into it. For example, we want to handle: @@ -513,9 +550,8 @@ // pointer size. Scale = adjustToPointerSize(Scale, PointerSize); - if (Scale) { - VariableGEPIndex Entry = {Index, ZExtBits, SExtBits, - static_cast(Scale)}; + if (!!Scale) { + VariableGEPIndex Entry = {Index, ZExtBits, SExtBits, Scale}; Decomposed.VarIndices.push_back(Entry); } } @@ -970,8 +1006,12 @@ // If the last (struct) indices are constants and are equal, the other indices // might be also be dynamically equal, so the GEPs can alias. - if (C1 && C2 && C1->getSExtValue() == C2->getSExtValue()) - return MayAlias; + if (C1 && C2) { + unsigned BitWidth = std::max(C1->getBitWidth(), C2->getBitWidth()); + if (C1->getValue().sextOrSelf(BitWidth) == + C2->getValue().sextOrSelf(BitWidth)) + return MayAlias; + } // Find the last-indexed type of the GEP, i.e., the type you'd get if // you stripped the last index. @@ -1054,6 +1094,9 @@ return MayAlias; } + if (C1->getBitWidth() > 64 || C2->getBitWidth() > 64) + return MayAlias; + // We know that: // - both GEPs begin indexing from the exact same pointer; // - the last indices in both GEPs are constants, indexing into a struct; @@ -1132,19 +1175,19 @@ !DecompObject.VarIndices.empty()) return false; - int64_t ObjectBaseOffset = DecompObject.StructOffset + - DecompObject.OtherOffset; + APInt ObjectBaseOffset = DecompObject.StructOffset + + DecompObject.OtherOffset; // If the GEP has no variable indices, we know the precise offset // from the base, then use it. If the GEP has variable indices, we're in // a bit more trouble: we can't count on the constant offsets that come // from non-struct sources, since these can be "rewound" by a negative // variable offset. So use only offsets that came from structs. - int64_t GEPBaseOffset = DecompGEP.StructOffset; + APInt GEPBaseOffset = DecompGEP.StructOffset; if (DecompGEP.VarIndices.empty()) GEPBaseOffset += DecompGEP.OtherOffset; - return (GEPBaseOffset >= ObjectBaseOffset + (int64_t)ObjectAccessSize); + return GEPBaseOffset.sge(ObjectBaseOffset + (int64_t)ObjectAccessSize); } /// Provides a bunch of ad-hoc rules to disambiguate a GEP instruction against @@ -1159,13 +1202,17 @@ const Value *UnderlyingV1, const Value *UnderlyingV2) { DecomposedGEP DecompGEP1, DecompGEP2; + unsigned MaxPointerSize = getMaxPointerSize(DL); + DecompGEP1.StructOffset = DecompGEP1.OtherOffset = APInt(MaxPointerSize, 0); + DecompGEP2.StructOffset = DecompGEP2.OtherOffset = APInt(MaxPointerSize, 0); + bool GEP1MaxLookupReached = DecomposeGEPExpression(GEP1, DecompGEP1, DL, &AC, DT); bool GEP2MaxLookupReached = DecomposeGEPExpression(V2, DecompGEP2, DL, &AC, DT); - int64_t GEP1BaseOffset = DecompGEP1.StructOffset + DecompGEP1.OtherOffset; - int64_t GEP2BaseOffset = DecompGEP2.StructOffset + DecompGEP2.OtherOffset; + APInt GEP1BaseOffset = DecompGEP1.StructOffset + DecompGEP1.OtherOffset; + APInt GEP2BaseOffset = DecompGEP2.StructOffset + DecompGEP2.OtherOffset; assert(DecompGEP1.Base == UnderlyingV1 && DecompGEP2.Base == UnderlyingV2 && "DecomposeGEPExpression returned a result different from " @@ -1284,9 +1331,9 @@ // that the objects are partially overlapping. If the difference is // greater, we know they do not overlap. if (GEP1BaseOffset != 0 && DecompGEP1.VarIndices.empty()) { - if (GEP1BaseOffset >= 0) { + if (GEP1BaseOffset.sge(0)) { if (V2Size != MemoryLocation::UnknownSize) { - if ((uint64_t)GEP1BaseOffset < V2Size) + if (GEP1BaseOffset.ult(V2Size)) return PartialAlias; return NoAlias; } @@ -1301,7 +1348,7 @@ // stripped a gep with negative index ('gep , -1, ...). if (V1Size != MemoryLocation::UnknownSize && V2Size != MemoryLocation::UnknownSize) { - if (-(uint64_t)GEP1BaseOffset < V1Size) + if ((-GEP1BaseOffset).ult(V1Size)) return PartialAlias; return NoAlias; } @@ -1309,7 +1356,7 @@ } if (!DecompGEP1.VarIndices.empty()) { - uint64_t Modulo = 0; + APInt Modulo(MaxPointerSize, 0); bool AllPositive = true; for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) { @@ -1317,7 +1364,7 @@ // Grab the least significant bit set in any of the scales. We // don't need std::abs here (even if the scale's negative) as we'll // be ^'ing Modulo with itself later. - Modulo |= (uint64_t)DecompGEP1.VarIndices[i].Scale; + Modulo |= DecompGEP1.VarIndices[i].Scale; if (AllPositive) { // If the Value could change between cycles, then any reasoning about @@ -1338,9 +1385,9 @@ // If the variable begins with a zero then we know it's // positive, regardless of whether the value is signed or // unsigned. - int64_t Scale = DecompGEP1.VarIndices[i].Scale; + APInt Scale = DecompGEP1.VarIndices[i].Scale; AllPositive = - (SignKnownZero && Scale >= 0) || (SignKnownOne && Scale < 0); + (SignKnownZero && Scale.sge(0)) || (SignKnownOne && Scale.slt(0)); } } @@ -1349,16 +1396,16 @@ // We can compute the difference between the two addresses // mod Modulo. Check whether that difference guarantees that the // two locations do not alias. - uint64_t ModOffset = (uint64_t)GEP1BaseOffset & (Modulo - 1); + APInt ModOffset = GEP1BaseOffset & (Modulo - 1); if (V1Size != MemoryLocation::UnknownSize && - V2Size != MemoryLocation::UnknownSize && ModOffset >= V2Size && - V1Size <= Modulo - ModOffset) + V2Size != MemoryLocation::UnknownSize && ModOffset.uge(V2Size) && + (Modulo - ModOffset).uge(V1Size)) return NoAlias; // If we know all the variables are positive, then GEP1 >= GEP1BasePtr. // If GEP1BasePtr > V2 (GEP1BaseOffset > 0) then we know the pointers // don't alias if V2Size can fit in the gap between V2 and GEP1BasePtr. - if (AllPositive && GEP1BaseOffset > 0 && V2Size <= (uint64_t)GEP1BaseOffset) + if (AllPositive && GEP1BaseOffset.sgt(0) && GEP1BaseOffset.uge(V2Size)) return NoAlias; if (constantOffsetHeuristic(DecompGEP1.VarIndices, V1Size, V2Size, @@ -1726,7 +1773,7 @@ for (unsigned i = 0, e = Src.size(); i != e; ++i) { const Value *V = Src[i].V; unsigned ZExtBits = Src[i].ZExtBits, SExtBits = Src[i].SExtBits; - int64_t Scale = Src[i].Scale; + APInt Scale = Src[i].Scale; // Find V in Dest. This is N^2, but pointer indices almost never have more // than a few variable indexes. @@ -1746,7 +1793,7 @@ } // If we didn't consume this entry, add it to the end of the Dest list. - if (Scale) { + if (!!Scale) { VariableGEPIndex Entry = {V, ZExtBits, SExtBits, -Scale}; Dest.push_back(Entry); } @@ -1755,7 +1802,7 @@ bool BasicAAResult::constantOffsetHeuristic( const SmallVectorImpl &VarIndices, uint64_t V1Size, - uint64_t V2Size, int64_t BaseOffset, AssumptionCache *AC, + uint64_t V2Size, APInt BaseOffset, AssumptionCache *AC, DominatorTree *DT) { if (VarIndices.size() != 2 || V1Size == MemoryLocation::UnknownSize || V2Size == MemoryLocation::UnknownSize) @@ -1797,14 +1844,15 @@ // the minimum distance between %i and %i + 5 is 3. APInt MinDiff = V0Offset - V1Offset, Wrapped = -MinDiff; MinDiff = APIntOps::umin(MinDiff, Wrapped); - uint64_t MinDiffBytes = MinDiff.getZExtValue() * std::abs(Var0.Scale); + APInt MinDiffBytes = + MinDiff.zextOrTrunc(Var0.Scale.getBitWidth()) * Var0.Scale.abs(); // We can't definitely say whether GEP1 is before or after V2 due to wrapping // arithmetic (i.e. for some values of GEP1 and V2 GEP1 < V2, and for other // values GEP1 > V2). We'll therefore only declare NoAlias if both V1Size and // V2Size can fit in the MinDiffBytes gap. - return V1Size + std::abs(BaseOffset) <= MinDiffBytes && - V2Size + std::abs(BaseOffset) <= MinDiffBytes; + return MinDiffBytes.uge(V1Size + BaseOffset.abs()) && + MinDiffBytes.uge(V2Size + BaseOffset.abs()); } //===----------------------------------------------------------------------===// Index: lib/IR/DataLayout.cpp =================================================================== --- lib/IR/DataLayout.cpp +++ lib/IR/DataLayout.cpp @@ -611,6 +611,14 @@ return I->TypeByteWidth; } +unsigned DataLayout::getMaxPointerSize() const { + unsigned MaxPointerSize = 0; + for (auto &P : Pointers) + MaxPointerSize = std::max(MaxPointerSize, P.TypeByteWidth); + + return MaxPointerSize; +} + unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const { assert(Ty->isPtrOrPtrVectorTy() && "This should only be called with a pointer or pointer vector type"); Index: test/Analysis/BasicAA/gep-and-alias-64.ll =================================================================== --- /dev/null +++ test/Analysis/BasicAA/gep-and-alias-64.ll @@ -0,0 +1,43 @@ +; RUN: opt -S -basicaa -gvn < %s | FileCheck %s + +target datalayout = "e-m:o-p:64:64-f64:32:64-f80:128-n8:16:32-S128" +target triple = "x86_64-apple-macosx10.6.0" + +; The load and store address in the loop body could alias so the load +; can't be hoisted above the store and out of the loop. + +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + +define i64 @foo(i64 %x, i64 %z, i64 %n) { +entry: + %pool = alloca [59 x i64], align 4 + %tmp = bitcast [59 x i64]* %pool to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + %cmp3 = icmp eq i64 %n, 0 + br i1 %cmp3, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + %add = add i64 %z, %x + %and = and i64 %add, 9223372036854775807 + %sub = add nsw i64 %and, -9223372036844814062 + %arrayidx = getelementptr inbounds [59 x i64], [59 x i64]* %pool, i64 0, i64 %sub + %arrayidx1 = getelementptr inbounds [59 x i64], [59 x i64]* %pool, i64 0, i64 42 + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.04 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + store i64 %i.04, i64* %arrayidx, align 4 + %tmp1 = load i64, i64* %arrayidx1, align 4 + %inc = add nuw i64 %i.04, 1 + %exitcond = icmp ne i64 %inc, %n + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + %lcssa = phi i64 [ %tmp1, %for.body ] + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + %s = phi i64 [ 0, %entry ], [ %lcssa, %for.end.loopexit ] +; CHECK: ret i64 %s + ret i64 %s +} Index: test/Analysis/BasicAA/gep-and-alias.ll =================================================================== --- test/Analysis/BasicAA/gep-and-alias.ll +++ test/Analysis/BasicAA/gep-and-alias.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -basicaa -gvn < %s | FileCheck %s +; RUN: opt -S -basicaa -gvn -basicaa-force-at-least-64b=0 < %s | FileCheck %s target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" target triple = "i386-apple-macosx10.6.0"