Index: llvm/trunk/include/llvm/Analysis/BasicAliasAnalysis.h
===================================================================
--- llvm/trunk/include/llvm/Analysis/BasicAliasAnalysis.h
+++ llvm/trunk/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -115,7 +115,7 @@
     unsigned ZExtBits;
     unsigned SExtBits;
 
-    int64_t Scale;
+    APInt Scale;
 
     bool operator==(const VariableGEPIndex &Other) const {
       return V == Other.V && ZExtBits == Other.ZExtBits &&
@@ -133,10 +133,10 @@
     // Base pointer of the GEP
     const Value *Base;
     // Total constant offset w.r.t the base from indexing into structs
-    int64_t StructOffset;
+    APInt StructOffset;
     // Total constant offset w.r.t the base from indexing through
     // pointers/arrays/vectors
-    int64_t OtherOffset;
+    APInt OtherOffset;
     // Scaled variable (non-constant) indices.
     SmallVector<VariableGEPIndex, 4> VarIndices;
   };
@@ -189,7 +189,7 @@
   bool
   constantOffsetHeuristic(const SmallVectorImpl<VariableGEPIndex> &VarIndices,
                           LocationSize V1Size, LocationSize V2Size,
-                          int64_t BaseOffset, AssumptionCache *AC,
+                          APInt BaseOffset, AssumptionCache *AC,
                           DominatorTree *DT);
 
   bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2);
Index: llvm/trunk/include/llvm/IR/DataLayout.h
===================================================================
--- llvm/trunk/include/llvm/IR/DataLayout.h
+++ llvm/trunk/include/llvm/IR/DataLayout.h
@@ -334,6 +334,9 @@
   /// the backends/clients are updated.
   unsigned getPointerSize(unsigned AS = 0) const;
 
+  /// Returns the maximum pointer size over all address spaces.
+  unsigned getMaxPointerSize() const;
+
   // Index size used for address calculation.
   unsigned getIndexSize(unsigned AS) const;
 
@@ -361,6 +364,11 @@
     return getPointerSize(AS) * 8;
   }
 
+  /// Returns the maximum pointer size over all address spaces.
+  unsigned getMaxPointerSizeInBits() const {
+    return getMaxPointerSize() * 8;
+  }
+
   /// Size in bits of index used for address calculation in getelementptr.
   unsigned getIndexSizeInBits(unsigned AS) const {
     return getIndexSize(AS) * 8;
Index: llvm/trunk/lib/Analysis/BasicAliasAnalysis.cpp
===================================================================
--- llvm/trunk/lib/Analysis/BasicAliasAnalysis.cpp
+++ llvm/trunk/lib/Analysis/BasicAliasAnalysis.cpp
@@ -68,6 +68,16 @@
 /// Enable analysis of recursive PHI nodes.
 static cl::opt<bool> EnableRecPhiAnalysis("basicaa-recphi", cl::Hidden,
                                           cl::init(false));
+
+/// By default, even on 32-bit architectures we use 64-bit integers for
+/// calculations. This will allow us to more-aggressively decompose indexing
+/// expressions calculated using i64 values (e.g., long long in C) which is
+/// common enough to worry about.
+static cl::opt<bool> ForceAtLeast64Bits("basicaa-force-at-least-64b",
+                                        cl::Hidden, cl::init(true));
+static cl::opt<bool> DoubleCalcBits("basicaa-double-calc-bits",
+                                    cl::Hidden, cl::init(false));
+
 /// SearchLimitReached / SearchTimes shows how often the limit of
 /// to decompose GEPs is reached. It will affect the precision
 /// of basic alias analysis.
@@ -381,13 +391,22 @@
 }
 
 /// To ensure a pointer offset fits in an integer of size PointerSize
-/// (in bits) when that size is smaller than 64. This is an issue in
-/// particular for 32b programs with negative indices that rely on two's
-/// complement wrap-arounds for precise alias information.
-static int64_t adjustToPointerSize(int64_t Offset, unsigned PointerSize) {
-  assert(PointerSize <= 64 && "Invalid PointerSize!");
-  unsigned ShiftBits = 64 - PointerSize;
-  return (int64_t)((uint64_t)Offset << ShiftBits) >> ShiftBits;
+/// (in bits) when that size is smaller than the maximum pointer size. This is
+/// an issue, for example, in particular for 32b pointers with negative indices
+/// that rely on two's complement wrap-arounds for precise alias information
+/// where the maximum pointer size is 64b.
+static APInt adjustToPointerSize(APInt Offset, unsigned PointerSize) {
+  assert(PointerSize <= Offset.getBitWidth() && "Invalid PointerSize!");
+  unsigned ShiftBits = Offset.getBitWidth() - PointerSize;
+  return (Offset << ShiftBits).ashr(ShiftBits);
+}
+
+static unsigned getMaxPointerSize(const DataLayout &DL) {
+  unsigned MaxPointerSize = DL.getMaxPointerSizeInBits();
+  if (MaxPointerSize < 64 && ForceAtLeast64Bits) MaxPointerSize = 64;
+  if (DoubleCalcBits) MaxPointerSize *= 2;
+
+  return MaxPointerSize;
 }
 
 /// If V is a symbolic pointer expression, decompose it into a base pointer
@@ -410,8 +429,7 @@
   unsigned MaxLookup = MaxLookupSearchDepth;
   SearchTimes++;
 
-  Decomposed.StructOffset = 0;
-  Decomposed.OtherOffset = 0;
+  unsigned MaxPointerSize = getMaxPointerSize(DL);
   Decomposed.VarIndices.clear();
   do {
     // See if this is a bitcast or GEP.
@@ -501,13 +519,15 @@
         if (CIdx->isZero())
           continue;
         Decomposed.OtherOffset +=
-          DL.getTypeAllocSize(GTI.getIndexedType()) * CIdx->getSExtValue();
+          (DL.getTypeAllocSize(GTI.getIndexedType()) *
+            CIdx->getValue().sextOrSelf(MaxPointerSize))
+              .sextOrTrunc(MaxPointerSize);
         continue;
       }
 
       GepHasConstantOffset = false;
 
-      uint64_t Scale = DL.getTypeAllocSize(GTI.getIndexedType());
+      APInt Scale(MaxPointerSize, DL.getTypeAllocSize(GTI.getIndexedType()));
       unsigned ZExtBits = 0, SExtBits = 0;
 
       // If the integer type is smaller than the pointer size, it is implicitly
@@ -519,20 +539,34 @@
       // Use GetLinearExpression to decompose the index into a C1*V+C2 form.
       APInt IndexScale(Width, 0), IndexOffset(Width, 0);
       bool NSW = true, NUW = true;
+      const Value *OrigIndex = Index;
       Index = GetLinearExpression(Index, IndexScale, IndexOffset, ZExtBits,
                                   SExtBits, DL, 0, AC, DT, NSW, NUW);
 
-      // All GEP math happens in the width of the pointer type,
-      // so we can truncate the value to 64-bits as we don't handle
-      // currently pointers larger than 64 bits and we would crash
-      // later. TODO: Make `Scale` an APInt to avoid this problem.
-      if (IndexScale.getBitWidth() > 64)
-        IndexScale = IndexScale.sextOrTrunc(64);
-
       // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
       // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
-      Decomposed.OtherOffset += IndexOffset.getSExtValue() * Scale;
-      Scale *= IndexScale.getSExtValue();
+
+      // It can be the case that, even through C1*V+C2 does not overflow for
+      // relevant values of V, (C2*Scale) can overflow. In that case, we cannot
+      // decompose the expression in this way.
+      //
+      // FIXME: C1*Scale and the other operations in the decomposed
+      // (C1*Scale)*V+C2*Scale can also overflow. We should check for this
+      // possibility.
+      APInt WideScaledOffset = IndexOffset.sextOrTrunc(MaxPointerSize*2) *
+                                 Scale.sext(MaxPointerSize*2);
+      if (WideScaledOffset.getMinSignedBits() > MaxPointerSize) {
+        Index = OrigIndex;
+        IndexScale = 1;
+        IndexOffset = 0;
+
+        ZExtBits = SExtBits = 0;
+        if (PointerSize > Width)
+          SExtBits += PointerSize - Width;
+      } else {
+        Decomposed.OtherOffset += IndexOffset.sextOrTrunc(MaxPointerSize) * Scale;
+        Scale *= IndexScale.sextOrTrunc(MaxPointerSize);
+      }
 
       // If we already had an occurrence of this index variable, merge this
       // scale into it.  For example, we want to handle:
@@ -552,9 +586,8 @@
       // pointer size.
       Scale = adjustToPointerSize(Scale, PointerSize);
 
-      if (Scale) {
-        VariableGEPIndex Entry = {Index, ZExtBits, SExtBits,
-                                  static_cast<int64_t>(Scale)};
+      if (!!Scale) {
+        VariableGEPIndex Entry = {Index, ZExtBits, SExtBits, Scale};
         Decomposed.VarIndices.push_back(Entry);
       }
     }
@@ -1039,8 +1072,12 @@
 
   // If the last (struct) indices are constants and are equal, the other indices
   // might be also be dynamically equal, so the GEPs can alias.
-  if (C1 && C2 && C1->getSExtValue() == C2->getSExtValue())
-    return MayAlias;
+  if (C1 && C2) {
+    unsigned BitWidth = std::max(C1->getBitWidth(), C2->getBitWidth());
+    if (C1->getValue().sextOrSelf(BitWidth) ==
+        C2->getValue().sextOrSelf(BitWidth))
+      return MayAlias;
+  }
 
   // Find the last-indexed type of the GEP, i.e., the type you'd get if
   // you stripped the last index.
@@ -1123,6 +1160,10 @@
     return MayAlias;
   }
 
+  if (C1->getValue().getActiveBits() > 64 ||
+      C2->getValue().getActiveBits() > 64)
+    return MayAlias;
+
   // We know that:
   // - both GEPs begin indexing from the exact same pointer;
   // - the last indices in both GEPs are constants, indexing into a struct;
@@ -1203,8 +1244,8 @@
       !DecompObject.VarIndices.empty())
     return false;
 
-  int64_t ObjectBaseOffset = DecompObject.StructOffset +
-                             DecompObject.OtherOffset;
+  APInt ObjectBaseOffset = DecompObject.StructOffset +
+                           DecompObject.OtherOffset;
 
   // If the GEP has no variable indices, we know the precise offset
   // from the base, then use it. If the GEP has variable indices,
@@ -1212,10 +1253,11 @@
   // false in that case.
   if (!DecompGEP.VarIndices.empty())
     return false;
-  int64_t GEPBaseOffset = DecompGEP.StructOffset;
+
+  APInt GEPBaseOffset = DecompGEP.StructOffset;
   GEPBaseOffset += DecompGEP.OtherOffset;
 
-  return (GEPBaseOffset >= ObjectBaseOffset + (int64_t)ObjectAccessSize);
+  return GEPBaseOffset.sge(ObjectBaseOffset + (int64_t)ObjectAccessSize);
 }
 
 /// Provides a bunch of ad-hoc rules to disambiguate a GEP instruction against
@@ -1230,13 +1272,17 @@
                         LocationSize V2Size, const AAMDNodes &V2AAInfo,
                         const Value *UnderlyingV1, const Value *UnderlyingV2) {
   DecomposedGEP DecompGEP1, DecompGEP2;
+  unsigned MaxPointerSize = getMaxPointerSize(DL);
+  DecompGEP1.StructOffset = DecompGEP1.OtherOffset = APInt(MaxPointerSize, 0);
+  DecompGEP2.StructOffset = DecompGEP2.OtherOffset = APInt(MaxPointerSize, 0);
+
   bool GEP1MaxLookupReached =
     DecomposeGEPExpression(GEP1, DecompGEP1, DL, &AC, DT);
   bool GEP2MaxLookupReached =
     DecomposeGEPExpression(V2, DecompGEP2, DL, &AC, DT);
 
-  int64_t GEP1BaseOffset = DecompGEP1.StructOffset + DecompGEP1.OtherOffset;
-  int64_t GEP2BaseOffset = DecompGEP2.StructOffset + DecompGEP2.OtherOffset;
+  APInt GEP1BaseOffset = DecompGEP1.StructOffset + DecompGEP1.OtherOffset;
+  APInt GEP2BaseOffset = DecompGEP2.StructOffset + DecompGEP2.OtherOffset;
 
   assert(DecompGEP1.Base == UnderlyingV1 && DecompGEP2.Base == UnderlyingV2 &&
          "DecomposeGEPExpression returned a result different from "
@@ -1354,9 +1400,9 @@
   // that the objects are partially overlapping.  If the difference is
   // greater, we know they do not overlap.
   if (GEP1BaseOffset != 0 && DecompGEP1.VarIndices.empty()) {
-    if (GEP1BaseOffset >= 0) {
+    if (GEP1BaseOffset.sge(0)) {
       if (V2Size != LocationSize::unknown()) {
-        if ((uint64_t)GEP1BaseOffset < V2Size.getValue())
+        if (GEP1BaseOffset.ult(V2Size.getValue()))
           return PartialAlias;
         return NoAlias;
       }
@@ -1371,7 +1417,7 @@
       // stripped a gep with negative index ('gep <ptr>, -1, ...).
       if (V1Size != LocationSize::unknown() &&
           V2Size != LocationSize::unknown()) {
-        if (-(uint64_t)GEP1BaseOffset < V1Size.getValue())
+        if ((-GEP1BaseOffset).ult(V1Size.getValue()))
           return PartialAlias;
         return NoAlias;
       }
@@ -1379,7 +1425,7 @@
   }
 
   if (!DecompGEP1.VarIndices.empty()) {
-    uint64_t Modulo = 0;
+    APInt Modulo(MaxPointerSize, 0);
     bool AllPositive = true;
     for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) {
 
@@ -1387,7 +1433,7 @@
       // Grab the least significant bit set in any of the scales. We
       // don't need std::abs here (even if the scale's negative) as we'll
       // be ^'ing Modulo with itself later.
-      Modulo |= (uint64_t)DecompGEP1.VarIndices[i].Scale;
+      Modulo |= DecompGEP1.VarIndices[i].Scale;
 
       if (AllPositive) {
         // If the Value could change between cycles, then any reasoning about
@@ -1408,9 +1454,9 @@
         // If the variable begins with a zero then we know it's
         // positive, regardless of whether the value is signed or
         // unsigned.
-        int64_t Scale = DecompGEP1.VarIndices[i].Scale;
+        APInt Scale = DecompGEP1.VarIndices[i].Scale;
         AllPositive =
-            (SignKnownZero && Scale >= 0) || (SignKnownOne && Scale < 0);
+            (SignKnownZero && Scale.sge(0)) || (SignKnownOne && Scale.slt(0));
       }
     }
 
@@ -1419,18 +1465,18 @@
     // We can compute the difference between the two addresses
     // mod Modulo. Check whether that difference guarantees that the
     // two locations do not alias.
-    uint64_t ModOffset = (uint64_t)GEP1BaseOffset & (Modulo - 1);
+    APInt ModOffset = GEP1BaseOffset & (Modulo - 1);
     if (V1Size != LocationSize::unknown() &&
-        V2Size != LocationSize::unknown() && ModOffset >= V2Size.getValue() &&
-        V1Size.getValue() <= Modulo - ModOffset)
+        V2Size != LocationSize::unknown() && ModOffset.uge(V2Size.getValue()) &&
+        (Modulo - ModOffset).uge(V1Size.getValue()))
       return NoAlias;
 
     // If we know all the variables are positive, then GEP1 >= GEP1BasePtr.
     // If GEP1BasePtr > V2 (GEP1BaseOffset > 0) then we know the pointers
     // don't alias if V2Size can fit in the gap between V2 and GEP1BasePtr.
-    if (AllPositive && GEP1BaseOffset > 0 &&
+    if (AllPositive && GEP1BaseOffset.sgt(0) &&
         V2Size != LocationSize::unknown() &&
-        V2Size.getValue() <= (uint64_t)GEP1BaseOffset)
+        GEP1BaseOffset.uge(V2Size.getValue()))
       return NoAlias;
 
     if (constantOffsetHeuristic(DecompGEP1.VarIndices, V1Size, V2Size,
@@ -1836,7 +1882,7 @@
   for (unsigned i = 0, e = Src.size(); i != e; ++i) {
     const Value *V = Src[i].V;
     unsigned ZExtBits = Src[i].ZExtBits, SExtBits = Src[i].SExtBits;
-    int64_t Scale = Src[i].Scale;
+    APInt Scale = Src[i].Scale;
 
     // Find V in Dest.  This is N^2, but pointer indices almost never have more
     // than a few variable indexes.
@@ -1856,7 +1902,7 @@
     }
 
     // If we didn't consume this entry, add it to the end of the Dest list.
-    if (Scale) {
+    if (!!Scale) {
       VariableGEPIndex Entry = {V, ZExtBits, SExtBits, -Scale};
       Dest.push_back(Entry);
     }
@@ -1865,7 +1911,7 @@
 
 bool BasicAAResult::constantOffsetHeuristic(
     const SmallVectorImpl<VariableGEPIndex> &VarIndices,
-    LocationSize MaybeV1Size, LocationSize MaybeV2Size, int64_t BaseOffset,
+    LocationSize MaybeV1Size, LocationSize MaybeV2Size, APInt BaseOffset,
     AssumptionCache *AC, DominatorTree *DT) {
   if (VarIndices.size() != 2 || MaybeV1Size == LocationSize::unknown() ||
       MaybeV2Size == LocationSize::unknown())
@@ -1910,14 +1956,15 @@
   // the minimum distance between %i and %i + 5 is 3.
   APInt MinDiff = V0Offset - V1Offset, Wrapped = -MinDiff;
   MinDiff = APIntOps::umin(MinDiff, Wrapped);
-  uint64_t MinDiffBytes = MinDiff.getZExtValue() * std::abs(Var0.Scale);
+  APInt MinDiffBytes =
+    MinDiff.zextOrTrunc(Var0.Scale.getBitWidth()) * Var0.Scale.abs();
 
   // We can't definitely say whether GEP1 is before or after V2 due to wrapping
   // arithmetic (i.e. for some values of GEP1 and V2 GEP1 < V2, and for other
   // values GEP1 > V2). We'll therefore only declare NoAlias if both V1Size and
   // V2Size can fit in the MinDiffBytes gap.
-  return V1Size + std::abs(BaseOffset) <= MinDiffBytes &&
-         V2Size + std::abs(BaseOffset) <= MinDiffBytes;
+  return MinDiffBytes.uge(V1Size + BaseOffset.abs()) &&
+         MinDiffBytes.uge(V2Size + BaseOffset.abs());
 }
 
 //===----------------------------------------------------------------------===//
Index: llvm/trunk/lib/IR/DataLayout.cpp
===================================================================
--- llvm/trunk/lib/IR/DataLayout.cpp
+++ llvm/trunk/lib/IR/DataLayout.cpp
@@ -635,6 +635,14 @@
   return I->TypeByteWidth;
 }
 
+unsigned DataLayout::getMaxPointerSize() const {
+  unsigned MaxPointerSize = 0;
+  for (auto &P : Pointers)
+    MaxPointerSize = std::max(MaxPointerSize, P.TypeByteWidth);
+
+  return MaxPointerSize;
+}
+
 unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const {
   assert(Ty->isPtrOrPtrVectorTy() &&
          "This should only be called with a pointer or pointer vector type");
Index: llvm/trunk/test/Analysis/BasicAA/128-bit-ptr.ll
===================================================================
--- llvm/trunk/test/Analysis/BasicAA/128-bit-ptr.ll
+++ llvm/trunk/test/Analysis/BasicAA/128-bit-ptr.ll
@@ -0,0 +1,60 @@
+; This testcase consists of alias relations on 128-bit pointers that
+; should be completely resolvable by basicaa.
+
+; RUN: opt < %s -basicaa -aa-eval -print-no-aliases -print-may-aliases -print-must-aliases -disable-output 2>&1 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128-p100:128:64:64-p101:128:64:64"
+
+
+; test0 is similar to SimpleCases.ll
+
+%T = type { i32, [10 x i8] }
+
+; CHECK:     Function: test0
+; CHECK-NOT:   MayAlias:
+define void @test0(%T addrspace(100)* %P) {
+  %A = getelementptr %T, %T addrspace(100)* %P, i64 0
+  %B = getelementptr %T, %T addrspace(100)* %P, i64 0, i32 0
+  %C = getelementptr %T, %T addrspace(100)* %P, i64 0, i32 1
+  %D = getelementptr %T, %T addrspace(100)* %P, i64 0, i32 1, i64 0
+  %E = getelementptr %T, %T addrspace(100)* %P, i64 0, i32 1, i64 5
+  ret void
+}
+
+; test1 checks that >64 bits of index can be considered.
+; If BasicAA is truncating the arithmetic, it will conclude
+; that %A and %B must alias when in fact they must not.
+
+; CHECK:     Function: test1
+; CHECK-NOT:   MustAlias:
+; CHECK:       NoAlias:
+; CHECK-SAME:  %A
+; CHECK-SAME:  %B
+define void @test1(double addrspace(100)* %P, i128 %i) {
+  ; 1180591620717411303424 is 2**70
+  ;  590295810358705651712 is 2**69
+  %i70 = add i128 %i, 1180591620717411303424 
+  %i69 = add i128 %i, 590295810358705651712
+  %A = getelementptr double, double addrspace(100)* %P, i128 %i70
+  %B = getelementptr double, double addrspace(100)* %P, i128 %i69
+  ret void
+}
+
+; test2 checks that >64 bits of index can be considered
+; and computes the same address in two ways to ensure that
+; they are considered equivalent.
+
+; CHECK: Function: test2
+; CHECK: MustAlias:
+; CHECK-SAME: %A
+; CHECK-SAME: %C
+define void @test2(double addrspace(100)* %P, i128 %i) {
+  ; 1180591620717411303424 is 2**70
+  ;  590295810358705651712 is 2**69
+  %i70 = add i128 %i, 1180591620717411303424 
+  %i69 = add i128 %i, 590295810358705651712
+  %j70 = add i128 %i69, 590295810358705651712 
+  %A = getelementptr double, double addrspace(100)* %P, i128 %i70
+  %C = getelementptr double, double addrspace(100)* %P, i128 %j70
+  ret void
+}
Index: llvm/trunk/test/Analysis/BasicAA/gep-and-alias-64.ll
===================================================================
--- llvm/trunk/test/Analysis/BasicAA/gep-and-alias-64.ll
+++ llvm/trunk/test/Analysis/BasicAA/gep-and-alias-64.ll
@@ -0,0 +1,43 @@
+; RUN: opt -S -basicaa -gvn < %s | FileCheck %s
+
+target datalayout = "e-m:o-p:64:64-f64:32:64-f80:128-n8:16:32-S128"
+target triple = "x86_64-apple-macosx10.6.0"
+
+; The load and store address in the loop body could alias so the load
+; can't be hoisted above the store and out of the loop.
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
+
+define i64 @foo(i64 %x, i64 %z, i64 %n) {
+entry:
+  %pool = alloca [59 x i64], align 4
+  %tmp = bitcast [59 x i64]* %pool to i8*
+  call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false)
+  %cmp3 = icmp eq i64 %n, 0
+  br i1 %cmp3, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  %add = add i64 %z, %x
+  %and = and i64 %add, 9223372036854775807
+  %sub = add nsw i64 %and, -9223372036844814062
+  %arrayidx = getelementptr inbounds [59 x i64], [59 x i64]* %pool, i64 0, i64 %sub
+  %arrayidx1 = getelementptr inbounds [59 x i64], [59 x i64]* %pool, i64 0, i64 42
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  store i64 %i.04, i64* %arrayidx, align 4
+  %tmp1 = load i64, i64* %arrayidx1, align 4
+  %inc = add nuw i64 %i.04, 1
+  %exitcond = icmp ne i64 %inc, %n
+  br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  %lcssa = phi i64 [ %tmp1, %for.body ]
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  %s = phi i64 [ 0, %entry ], [ %lcssa, %for.end.loopexit ]
+; CHECK: ret i64 %s
+  ret i64 %s
+}
Index: llvm/trunk/test/Analysis/BasicAA/gep-and-alias.ll
===================================================================
--- llvm/trunk/test/Analysis/BasicAA/gep-and-alias.ll
+++ llvm/trunk/test/Analysis/BasicAA/gep-and-alias.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -S -basicaa -gvn < %s | FileCheck %s
+; RUN: opt -S -basicaa -gvn -basicaa-force-at-least-64b=0 < %s | FileCheck %s
 
 target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
 target triple = "i386-apple-macosx10.6.0"