Index: llvm/include/llvm/Analysis/DependenceAnalysis.h
===================================================================
--- llvm/include/llvm/Analysis/DependenceAnalysis.h
+++ llvm/include/llvm/Analysis/DependenceAnalysis.h
@@ -282,12 +282,11 @@
     /// depends - Tests for a dependence between the Src and Dst instructions.
     /// Returns NULL if no dependence; otherwise, returns a Dependence (or a
     /// FullDependence) with as much information as can be gleaned.
-    /// The flag PossiblyLoopIndependent should be set by the caller
-    /// if it appears that control flow can reach from Src to Dst
-    /// without traversing a loop back edge.
+    /// If PossiblyIntraIteration is true will also test for intra-iteration
+    /// dependencies.
     std::unique_ptr<Dependence> depends(Instruction *Src,
                                         Instruction *Dst,
-                                        bool PossiblyLoopIndependent);
+                                        bool PossiblyIntraIteration);
 
     /// getSplitIteration - Give a dependence that's splittable at some
     /// particular level, return the iteration that should be used to split
Index: llvm/lib/Analysis/DependenceAnalysis.cpp
===================================================================
--- llvm/lib/Analysis/DependenceAnalysis.cpp
+++ llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -259,10 +259,10 @@
 // FullDependence methods
 
 FullDependence::FullDependence(Instruction *Source, Instruction *Destination,
-                               bool PossiblyLoopIndependent,
+                               bool PossiblyIntraIteration,
                                unsigned CommonLevels)
     : Dependence(Source, Destination), Levels(CommonLevels),
-      LoopIndependent(PossiblyLoopIndependent) {
+      LoopIndependent(PossiblyIntraIteration) {
   Consistent = true;
   if (CommonLevels)
     DV = std::make_unique<DVEntry[]>(CommonLevels);
@@ -3512,6 +3512,8 @@
 // depends -
 // Returns NULL if there is no dependence.
 // Otherwise, return a Dependence with as many details as possible.
+// If PossiblyIntraIteration is true will also test for intra-iteration 
+// dependencies.
 // Corresponds to Section 3.1 in the paper
 //
 //            Practical Dependence Testing
@@ -3522,9 +3524,9 @@
 // up to date with respect to this routine.
 std::unique_ptr<Dependence>
 DependenceInfo::depends(Instruction *Src, Instruction *Dst,
-                        bool PossiblyLoopIndependent) {
+                        bool PossiblyIntraIteration) {
   if (Src == Dst)
-    PossiblyLoopIndependent = false;
+    PossiblyIntraIteration = false;
 
   if (!(Src->mayReadOrWriteMemory() && Dst->mayReadOrWriteMemory()))
     // if both instructions don't reference memory, there's no dependence
@@ -3562,9 +3564,14 @@
   LLVM_DEBUG(dbgs() << "    common nesting levels = " << CommonLevels << "\n");
   LLVM_DEBUG(dbgs() << "    maximum nesting levels = " << MaxLevels << "\n");
 
-  FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
+  FullDependence Result(Src, Dst, PossiblyIntraIteration, CommonLevels);
   ++TotalArrayPairs;
 
+  if (!PossiblyIntraIteration && CommonLevels == 0) {
+    LLVM_DEBUG(dbgs() << "no common loops and no intra-iteration analysis\n");
+    return nullptr;
+  }
+
   unsigned Pairs = 1;
   SmallVector<Subscript, 2> Pair(Pairs);
   const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
@@ -3583,6 +3590,40 @@
   }
   Pair[0].Src = SrcSCEV;
   Pair[0].Dst = DstSCEV;
+  const DataLayout &DL = F->getParent()->getDataLayout();
+
+  // DependenceAnalysis isn't able to deal with different
+  // access widths. If access widths are the same, but alignment is
+  // smaller than the store size, accesses could overlap.
+  // Reject these cases.
+  auto IsUnalignedLoadStore = [&](const Instruction *I) {
+    const LoadInst *LI = dyn_cast<LoadInst>(I);
+    const StoreInst *SI = dyn_cast<StoreInst>(I);
+    Type *IType = (LI) ? LI->getType() : SI->getValueOperand()->getType();
+    uint64_t Width = DL.getTypeStoreSize(IType);
+    uint64_t Alignment = (LI) ? LI->getAlign().value() : SI->getAlign().value();
+
+    return ((Alignment % Width) != 0);
+  };
+
+  bool ModifiedGEPOp =
+      !dyn_cast<GEPOperator>(SrcPtr) || !dyn_cast<GEPOperator>(DstPtr);
+  Type *SrcOpType =
+      static_cast<PointerType *>(MemoryLocation::get(Src).Ptr->getType())
+          ->getElementType();
+  Type *DstOpType =
+      static_cast<PointerType *>(MemoryLocation::get(Dst).Ptr->getType())
+          ->getElementType();
+  TypeSize SrcWriteSize = DL.getTypeStoreSize(SrcOpType);
+  TypeSize DstWriteSize = DL.getTypeStoreSize(DstOpType);
+
+  if ((IsUnalignedLoadStore(Src) || IsUnalignedLoadStore(Dst) ||
+       (SrcWriteSize != DstWriteSize)) &&
+      ModifiedGEPOp) {
+    LLVM_DEBUG(
+        dbgs() << "Memory accesses are different widths from non-GEPs\n");
+    return std::make_unique<Dependence>(Src, Dst);
+  }
 
   if (Delinearize) {
     if (tryDelinearize(Src, Dst, Pair)) {
@@ -3880,7 +3921,7 @@
     if (CompleteLoops[II])
       Result.DV[II - 1].Scalar = false;
 
-  if (PossiblyLoopIndependent) {
+  if (PossiblyIntraIteration) {
     // Make sure the LoopIndependent flag is set correctly.
     // All directions must include equal, otherwise no
     // loop-independent dependence is possible.
Index: llvm/test/Analysis/DependenceAnalysis/OverlappingAddresses.ll
===================================================================
--- /dev/null
+++ llvm/test/Analysis/DependenceAnalysis/OverlappingAddresses.ll
@@ -0,0 +1,35 @@
+; Verify that we do not consider overlapping accesses to be independent,
+; even though the raw pointer values are different
+; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa 2>&1 \
+; RUN: | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.6.0"
+
+define void @overlapping0(i32* %A, i64 %n) nounwind uwtable ssp {
+entry:
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %n
+  %arrayidx_cast = bitcast i32* %arrayidx to i64*
+  store i64 0, i64* %arrayidx_cast, align 4
+
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+
+  %add1 = add i64 %n, 1
+  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %add1
+  %0 = load i32, i32* %arrayidx2, align 4
+ 
+  ; Even if the load and store types are the same, if their
+  ; alignments are smaller than the store size and they are
+  ; accesses through something other than a straightforward
+  ; gep, they can still overlap, as this access shows
+  %arrayidx2_cast = bitcast i32* %arrayidx2 to i64*
+  %1 = load i64, i64* %arrayidx2_cast, align 4
+  ret void
+}
+