Index: lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- lib/Analysis/LoopAccessAnalysis.cpp +++ lib/Analysis/LoopAccessAnalysis.cpp @@ -845,6 +845,7 @@ if (Lp != AR->getLoop()) { DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " << *Ptr << " SCEV: " << *PtrScev << "\n"); + return 0; } // The address calculation must not wrap. Otherwise, a dependence could be Index: test/Analysis/LoopAccessAnalysis/interleave_innermost.ll =================================================================== --- /dev/null +++ test/Analysis/LoopAccessAnalysis/interleave_innermost.ll @@ -0,0 +1,54 @@ +; RUN: opt -loop-vectorize -force-vector-interleave=1 -instcombine -S < %s | FileCheck %s +; CHECK: %[[ENTRY:[^ ]+]] = getelementptr inbounds %StructA, %StructA* %{{[^ ]+}}, i64 -1, i32 1 +; CHECK-NOT: bitcast i32* %[[ENTRY]] to <{{[0-9]+}} x i32>* + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +%StructA = type { i8, i32 } + +@GlobalB = external global [1 x %StructA*], align 8 + +define void @TestFoo() { +bb: + br label %.loopexit5.outer + +.loopexit5.outer.loopexit: ; preds = %.loopexit + br label %.loopexit5.outer + +.loopexit5.outer: ; preds = %.loopexit5.outer.loopexit, %bb + %.pre = load %StructA*, %StructA** getelementptr ([1 x %StructA*], [1 x %StructA*]* @GlobalB, i64 0, i64 undef), align 8 + br label %.lr.ph12 + +.loopexit.loopexit: ; preds = %bb4 + %scevgep19 = getelementptr i32, i32* %r.19, i64 undef + br label %.loopexit + +.loopexit: ; preds = %.lr.ph12, %.loopexit.loopexit + %j.2.lcssa = phi i32 [ %j.111, %.lr.ph12 ], [ 0, %.loopexit.loopexit ] + %r.2.lcssa = phi i32* [ %r.19, %.lr.ph12 ], [ %scevgep19, %.loopexit.loopexit ] + %tmp = icmp eq %StructA* %tmp1, %.pre + br i1 %tmp, label %.loopexit5.outer.loopexit, label %.lr.ph12 + +.lr.ph12: ; preds = %.loopexit, %.loopexit5.outer + %j.111 = phi i32 [ %j.2.lcssa, %.loopexit ], [ undef, %.loopexit5.outer ] + %f.110 = phi %StructA* [ %tmp1, %.loopexit ], [ undef, %.loopexit5.outer ] + %r.19 = phi i32* [ %r.2.lcssa, %.loopexit ], [ undef, %.loopexit5.outer ] + %tmp1 = getelementptr inbounds %StructA, %StructA* %f.110, i64 -1 + %tmp2 = icmp slt i32 %j.111, 0 + br i1 %tmp2, label %.lr.ph, label %.loopexit + +.lr.ph: ; preds = %.lr.ph12 + %tmp3 = getelementptr inbounds %StructA, %StructA* %f.110, i64 -1, i32 1 + br label %bb4 + +bb4: ; preds = %bb4, %.lr.ph + %j.27 = phi i32 [ %j.111, %.lr.ph ], [ %tmp7, %bb4 ] + %r.26 = phi i32* [ %r.19, %.lr.ph ], [ %tmp6, %bb4 ] + %tmp5 = load i32, i32* %tmp3, align 4 + %tmp6 = getelementptr inbounds i32, i32* %r.26, i64 1 + store i32 %tmp5, i32* %r.26, align 4 + %tmp7 = add nsw i32 %j.27, 1 + %exitcond = icmp eq i32 %tmp7, 0 + br i1 %exitcond, label %.loopexit.loopexit, label %bb4 +}