Index: llvm/trunk/include/llvm/Analysis/DependenceAnalysis.h =================================================================== --- llvm/trunk/include/llvm/Analysis/DependenceAnalysis.h +++ llvm/trunk/include/llvm/Analysis/DependenceAnalysis.h @@ -557,6 +557,12 @@ const SCEV *X, const SCEV *Y) const; + /// isKnownLessThan - Compare to see if S is less than Size + /// Another wrapper for isKnownNegative(S - max(Size, 1)) with some extra + /// checking if S is an AddRec and we can prove lessthan using the loop + /// bounds. + bool isKnownLessThan(const SCEV *S, const SCEV *Size) const; + /// collectUpperBound - All subscripts are the same type (on my machine, /// an i64). The loop bound may be a smaller type. collectUpperBound /// find the bound, if available, and zero extends it to the Type T. Index: llvm/trunk/lib/Analysis/DependenceAnalysis.cpp =================================================================== --- llvm/trunk/lib/Analysis/DependenceAnalysis.cpp +++ llvm/trunk/lib/Analysis/DependenceAnalysis.cpp @@ -108,8 +108,8 @@ STATISTIC(BanerjeeSuccesses, "Banerjee successes"); static cl::opt -Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore, - cl::desc("Try to delinearize array references.")); + Delinearize("da-delinearize", cl::init(true), cl::Hidden, cl::ZeroOrMore, + cl::desc("Try to delinearize array references.")); //===----------------------------------------------------------------------===// // basics @@ -994,6 +994,38 @@ } } +/// Compare to see if S is less than Size, using isKnownNegative(S - max(Size, 1)) +/// with some extra checking if S is an AddRec and we can prove less-than using +/// the loop bounds. +bool DependenceInfo::isKnownLessThan(const SCEV *S, const SCEV *Size) const { + // First unify to the same type + auto *SType = dyn_cast(S->getType()); + auto *SizeType = dyn_cast(Size->getType()); + if (!SType || !SizeType) + return false; + Type *MaxType = + (SType->getBitWidth() >= SizeType->getBitWidth()) ? SType : SizeType; + S = SE->getTruncateOrZeroExtend(S, MaxType); + Size = SE->getTruncateOrZeroExtend(Size, MaxType); + + // Special check for addrecs using BE taken count + const SCEV *Bound = SE->getMinusSCEV(S, Size); + if (const SCEVAddRecExpr *AddRec = dyn_cast(Bound)) { + if (AddRec->isAffine()) { + const SCEV *BECount = SE->getBackedgeTakenCount(AddRec->getLoop()); + if (!isa(BECount)) { + const SCEV *Limit = AddRec->evaluateAtIteration(BECount, *SE); + if (SE->isKnownNegative(Limit)) + return true; + } + } + } + + // Check using normal isKnownNegative + const SCEV *LimitedBound = + SE->getMinusSCEV(S, SE->getSMaxExpr(Size, SE->getOne(Size->getType()))); + return SE->isKnownNegative(LimitedBound); +} // All subscripts are all the same type. // Loop bound may be smaller (e.g., a char). @@ -3253,6 +3285,26 @@ int size = SrcSubscripts.size(); + // Statically check that the array bounds are in-range. The first subscript we + // don't have a size for and it cannot overflow into another subscript, so is + // always safe. The others need to be 0 <= subscript[i] < bound, for both src + // and dst. + // FIXME: It may be better to record these sizes and add them as constraints + // to the dependency checks. + for (int i = 1; i < size; ++i) { + if (!SE->isKnownNonNegative(SrcSubscripts[i])) + return false; + + if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1])) + return false; + + if (!SE->isKnownNonNegative(DstSubscripts[i])) + return false; + + if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1])) + return false; + } + LLVM_DEBUG({ dbgs() << "\nSrcSubscripts: "; for (int i = 0; i < size; i++) @@ -3271,13 +3323,6 @@ Pair[i].Src = SrcSubscripts[i]; Pair[i].Dst = DstSubscripts[i]; unifySubscriptType(&Pair[i]); - - // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the - // delinearization has found, and add these constraints to the dependence - // check to avoid memory accesses overflow from one dimension into another. - // This is related to the problem of determining the existence of data - // dependences in array accesses using a different number of subscripts: in - // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc. } return true; Index: llvm/trunk/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll =================================================================== --- llvm/trunk/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll +++ llvm/trunk/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll @@ -1,4 +1,4 @@ -; RUN: opt -basicaa -da -analyze -da-delinearize < %s +; RUN: opt -basicaa -da -analyze < %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: llvm/trunk/test/Analysis/DependenceAnalysis/Banerjee.ll =================================================================== --- llvm/trunk/test/Analysis/DependenceAnalysis/Banerjee.ll +++ llvm/trunk/test/Analysis/DependenceAnalysis/Banerjee.ll @@ -1,7 +1,6 @@ ; RUN: opt < %s -analyze -basicaa -da -da-delinearize=false | FileCheck %s -; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s -check-prefix=DELIN +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s -check-prefix=DELIN -; ModuleID = 'Banerjee.bc' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.6.0" Index: llvm/trunk/test/Analysis/DependenceAnalysis/Coupled.ll =================================================================== --- llvm/trunk/test/Analysis/DependenceAnalysis/Coupled.ll +++ llvm/trunk/test/Analysis/DependenceAnalysis/Coupled.ll @@ -1,6 +1,5 @@ -; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s -; ModuleID = 'Coupled.bc' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.6.0" Index: llvm/trunk/test/Analysis/DependenceAnalysis/DADelin.ll =================================================================== --- llvm/trunk/test/Analysis/DependenceAnalysis/DADelin.ll +++ llvm/trunk/test/Analysis/DependenceAnalysis/DADelin.ll @@ -0,0 +1,556 @@ +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8m.main-arm-none-eabi" + +; CHECK-LABEL: t1 +;; for (int i = 0; i < n; i++) +;; for (int j = 0; j < m; j++) +;; for (int k = 0; k < o; k++) +;; = A[i*m*o + j*o + k] +;; A[i*m*o + j*o + k] = +define void @t1(i32 %n, i32 %m, i32 %o, i32* nocapture %A) { +; CHECK: da analyze - none! +; CHECK: da analyze - consistent anti [0 0 0|<]! +; CHECK: da analyze - none! +entry: + %cmp49 = icmp sgt i32 %n, 0 + br i1 %cmp49, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup + +for.cond1.preheader.lr.ph: ; preds = %entry + %cmp247 = icmp sgt i32 %m, 0 + %cmp645 = icmp sgt i32 %o, 0 + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond.cleanup3, %for.cond1.preheader.lr.ph + %i.050 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc23, %for.cond.cleanup3 ] + br i1 %cmp247, label %for.cond5.preheader.lr.ph, label %for.cond.cleanup3 + +for.cond5.preheader.lr.ph: ; preds = %for.cond1.preheader + %mul = mul nsw i32 %i.050, %m + br label %for.cond5.preheader + +for.cond5.preheader: ; preds = %for.cond.cleanup7, %for.cond5.preheader.lr.ph + %j.048 = phi i32 [ 0, %for.cond5.preheader.lr.ph ], [ %inc20, %for.cond.cleanup7 ] + br i1 %cmp645, label %for.body8.lr.ph, label %for.cond.cleanup7 + +for.body8.lr.ph: ; preds = %for.cond5.preheader + %mul944 = add i32 %j.048, %mul + %add = mul i32 %mul944, %o + br label %for.body8 + +for.body8: ; preds = %for.body8, %for.body8.lr.ph + %k.046 = phi i32 [ 0, %for.body8.lr.ph ], [ %inc, %for.body8 ] + %add11 = add nsw i32 %k.046, %add + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add11 + %0 = load i32, i32* %arrayidx, align 4 + %add12 = add nsw i32 %0, 1 + store i32 %add12, i32* %arrayidx, align 4 + %inc = add nuw nsw i32 %k.046, 1 + %exitcond = icmp eq i32 %inc, %o + br i1 %exitcond, label %for.cond.cleanup7, label %for.body8 + +for.cond.cleanup7: ; preds = %for.body8, %for.cond5.preheader + %inc20 = add nuw nsw i32 %j.048, 1 + %exitcond51 = icmp eq i32 %inc20, %m + br i1 %exitcond51, label %for.cond.cleanup3, label %for.cond5.preheader + +for.cond.cleanup3: ; preds = %for.cond.cleanup7, %for.cond1.preheader + %inc23 = add nuw nsw i32 %i.050, 1 + %exitcond52 = icmp eq i32 %inc23, %n + br i1 %exitcond52, label %for.cond.cleanup, label %for.cond1.preheader + +for.cond.cleanup: ; preds = %for.cond.cleanup3, %entry + ret void +} + +; CHECK-LABEL: t2 +;; for (int i = 0; i < n; i++) +;; for (int j = 0; j < m; j++) +;; for (int k = 0; k < o; k++) +;; = A[i*m*o + j*o + k] +;; A[i*m*o + j*o + k + 1] = +define void @t2(i32 %n, i32 %m, i32 %o, i32* nocapture %A) { +; CHECK: da analyze - none! +; CHECK: da analyze - anti [* * *|<]! +; CHECK: da analyze - output [* * *]! +entry: + %cmp49 = icmp sgt i32 %n, 0 + br i1 %cmp49, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup + +for.cond1.preheader.lr.ph: ; preds = %entry + %cmp247 = icmp sgt i32 %m, 0 + %cmp645 = icmp sgt i32 %o, 0 + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond.cleanup3, %for.cond1.preheader.lr.ph + %i.050 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc23, %for.cond.cleanup3 ] + br i1 %cmp247, label %for.cond5.preheader.lr.ph, label %for.cond.cleanup3 + +for.cond5.preheader.lr.ph: ; preds = %for.cond1.preheader + %mul = mul nsw i32 %i.050, %m + br label %for.cond5.preheader + +for.cond5.preheader: ; preds = %for.cond.cleanup7, %for.cond5.preheader.lr.ph + %j.048 = phi i32 [ 0, %for.cond5.preheader.lr.ph ], [ %inc20, %for.cond.cleanup7 ] + br i1 %cmp645, label %for.body8.lr.ph, label %for.cond.cleanup7 + +for.body8.lr.ph: ; preds = %for.cond5.preheader + %mul944 = add i32 %j.048, %mul + %add = mul i32 %mul944, %o + br label %for.body8 + +for.body8: ; preds = %for.body8, %for.body8.lr.ph + %k.046 = phi i32 [ 0, %for.body8.lr.ph ], [ %inc, %for.body8 ] + %add11 = add nsw i32 %k.046, %add + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add11 + %0 = load i32, i32* %arrayidx, align 4 + %add12 = add nsw i32 %0, 1 + %add111 = add nsw i32 %add11, 1 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %add111 + store i32 %add12, i32* %arrayidx2, align 4 + %inc = add nuw nsw i32 %k.046, 1 + %exitcond = icmp eq i32 %inc, %o + br i1 %exitcond, label %for.cond.cleanup7, label %for.body8 + +for.cond.cleanup7: ; preds = %for.body8, %for.cond5.preheader + %inc20 = add nuw nsw i32 %j.048, 1 + %exitcond51 = icmp eq i32 %inc20, %m + br i1 %exitcond51, label %for.cond.cleanup3, label %for.cond5.preheader + +for.cond.cleanup3: ; preds = %for.cond.cleanup7, %for.cond1.preheader + %inc23 = add nuw nsw i32 %i.050, 1 + %exitcond52 = icmp eq i32 %inc23, %n + br i1 %exitcond52, label %for.cond.cleanup, label %for.cond1.preheader + +for.cond.cleanup: ; preds = %for.cond.cleanup3, %entry + ret void +} + +; CHECK-LABEL: t3 +;; for (int i = 0; i < n; i++) +;; for (int j = 0; j < m; j++) +;; for (int k = 0; k < o; k++) +;; = A[i*m*o + j*o + k] +;; A[i*m*o + j*o + k - 1] = +define void @t3(i32 %n, i32 %m, i32 %o, i32* nocapture %A) { +; CHECK: da analyze - none! +; CHECK: da analyze - anti [* * *|<]! +; CHECK: da analyze - output [* * *]! +entry: + %cmp49 = icmp sgt i32 %n, 0 + br i1 %cmp49, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup + +for.cond1.preheader.lr.ph: ; preds = %entry + %cmp247 = icmp sgt i32 %m, 0 + %cmp645 = icmp sgt i32 %o, 0 + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond.cleanup3, %for.cond1.preheader.lr.ph + %i.050 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc23, %for.cond.cleanup3 ] + br i1 %cmp247, label %for.cond5.preheader.lr.ph, label %for.cond.cleanup3 + +for.cond5.preheader.lr.ph: ; preds = %for.cond1.preheader + %mul = mul nsw i32 %i.050, %m + br label %for.cond5.preheader + +for.cond5.preheader: ; preds = %for.cond.cleanup7, %for.cond5.preheader.lr.ph + %j.048 = phi i32 [ 0, %for.cond5.preheader.lr.ph ], [ %inc20, %for.cond.cleanup7 ] + br i1 %cmp645, label %for.body8.lr.ph, label %for.cond.cleanup7 + +for.body8.lr.ph: ; preds = %for.cond5.preheader + %mul944 = add i32 %j.048, %mul + %add = mul i32 %mul944, %o + br label %for.body8 + +for.body8: ; preds = %for.body8, %for.body8.lr.ph + %k.046 = phi i32 [ 0, %for.body8.lr.ph ], [ %inc, %for.body8 ] + %add11 = add nsw i32 %k.046, %add + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add11 + %0 = load i32, i32* %arrayidx, align 4 + %add12 = add nsw i32 %0, 1 + %add111 = sub nsw i32 %add11, 1 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %add111 + store i32 %add12, i32* %arrayidx2, align 4 + %inc = add nuw nsw i32 %k.046, 1 + %exitcond = icmp eq i32 %inc, %o + br i1 %exitcond, label %for.cond.cleanup7, label %for.body8 + +for.cond.cleanup7: ; preds = %for.body8, %for.cond5.preheader + %inc20 = add nuw nsw i32 %j.048, 1 + %exitcond51 = icmp eq i32 %inc20, %m + br i1 %exitcond51, label %for.cond.cleanup3, label %for.cond5.preheader + +for.cond.cleanup3: ; preds = %for.cond.cleanup7, %for.cond1.preheader + %inc23 = add nuw nsw i32 %i.050, 1 + %exitcond52 = icmp eq i32 %inc23, %n + br i1 %exitcond52, label %for.cond.cleanup, label %for.cond1.preheader + +for.cond.cleanup: ; preds = %for.cond.cleanup3, %entry + ret void +} + +; CHECK-LABEL: t4 +;; for (int i = 0; i < n; i++) +;; for (int j = 0; j < m; j++) +;; for (int k = 0; k < o; k++) +;; = A[i*m*o + j*o + k] +;; A[i*m*o + j*o + k + o] = +define void @t4(i32 %n, i32 %m, i32 %o, i32* nocapture %A) { +; CHECK: da analyze - none! +; CHECK: da analyze - anti [* * *|<]! +; CHECK: da analyze - output [* * *]! +entry: + %cmp49 = icmp sgt i32 %n, 0 + br i1 %cmp49, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup + +for.cond1.preheader.lr.ph: ; preds = %entry + %cmp247 = icmp sgt i32 %m, 0 + %cmp645 = icmp sgt i32 %o, 0 + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond.cleanup3, %for.cond1.preheader.lr.ph + %i.050 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc23, %for.cond.cleanup3 ] + br i1 %cmp247, label %for.cond5.preheader.lr.ph, label %for.cond.cleanup3 + +for.cond5.preheader.lr.ph: ; preds = %for.cond1.preheader + %mul = mul nsw i32 %i.050, %m + br label %for.cond5.preheader + +for.cond5.preheader: ; preds = %for.cond.cleanup7, %for.cond5.preheader.lr.ph + %j.048 = phi i32 [ 0, %for.cond5.preheader.lr.ph ], [ %inc20, %for.cond.cleanup7 ] + br i1 %cmp645, label %for.body8.lr.ph, label %for.cond.cleanup7 + +for.body8.lr.ph: ; preds = %for.cond5.preheader + %mul944 = add i32 %j.048, %mul + %add = mul i32 %mul944, %o + br label %for.body8 + +for.body8: ; preds = %for.body8, %for.body8.lr.ph + %k.046 = phi i32 [ 0, %for.body8.lr.ph ], [ %inc, %for.body8 ] + %add11 = add nsw i32 %k.046, %add + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add11 + %0 = load i32, i32* %arrayidx, align 4 + %add12 = add nsw i32 %0, 1 + %add111 = add nsw i32 %add11, %o + %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %add111 + store i32 %add12, i32* %arrayidx2, align 4 + %inc = add nuw nsw i32 %k.046, 1 + %exitcond = icmp eq i32 %inc, %o + br i1 %exitcond, label %for.cond.cleanup7, label %for.body8 + +for.cond.cleanup7: ; preds = %for.body8, %for.cond5.preheader + %inc20 = add nuw nsw i32 %j.048, 1 + %exitcond51 = icmp eq i32 %inc20, %m + br i1 %exitcond51, label %for.cond.cleanup3, label %for.cond5.preheader + +for.cond.cleanup3: ; preds = %for.cond.cleanup7, %for.cond1.preheader + %inc23 = add nuw nsw i32 %i.050, 1 + %exitcond52 = icmp eq i32 %inc23, %n + br i1 %exitcond52, label %for.cond.cleanup, label %for.cond1.preheader + +for.cond.cleanup: ; preds = %for.cond.cleanup3, %entry + ret void +} + +; CHECK-LABEL: t5 +;; for (int i = 0; i < n; i++) +;; for (int j = 0; j < m; j++) +;; for (int k = 0; k < o; k++) +;; = A[i*m*o + j*o + k] +;; A[i*m*o + j*o + k - o] = +define void @t5(i32 %n, i32 %m, i32 %o, i32* nocapture %A) { +; CHECK: da analyze - none! +; CHECK: da analyze - anti [* * *|<]! +; CHECK: da analyze - output [* * *]! +entry: + %cmp49 = icmp sgt i32 %n, 0 + br i1 %cmp49, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup + +for.cond1.preheader.lr.ph: ; preds = %entry + %cmp247 = icmp sgt i32 %m, 0 + %cmp645 = icmp sgt i32 %o, 0 + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond.cleanup3, %for.cond1.preheader.lr.ph + %i.050 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc23, %for.cond.cleanup3 ] + br i1 %cmp247, label %for.cond5.preheader.lr.ph, label %for.cond.cleanup3 + +for.cond5.preheader.lr.ph: ; preds = %for.cond1.preheader + %mul = mul nsw i32 %i.050, %m + br label %for.cond5.preheader + +for.cond5.preheader: ; preds = %for.cond.cleanup7, %for.cond5.preheader.lr.ph + %j.048 = phi i32 [ 0, %for.cond5.preheader.lr.ph ], [ %inc20, %for.cond.cleanup7 ] + br i1 %cmp645, label %for.body8.lr.ph, label %for.cond.cleanup7 + +for.body8.lr.ph: ; preds = %for.cond5.preheader + %mul944 = add i32 %j.048, %mul + %add = mul i32 %mul944, %o + br label %for.body8 + +for.body8: ; preds = %for.body8, %for.body8.lr.ph + %k.046 = phi i32 [ 0, %for.body8.lr.ph ], [ %inc, %for.body8 ] + %add11 = add nsw i32 %k.046, %add + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add11 + %0 = load i32, i32* %arrayidx, align 4 + %add12 = add nsw i32 %0, 1 + %add111 = sub nsw i32 %add11, %o + %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %add111 + store i32 %add12, i32* %arrayidx2, align 4 + %inc = add nuw nsw i32 %k.046, 1 + %exitcond = icmp eq i32 %inc, %o + br i1 %exitcond, label %for.cond.cleanup7, label %for.body8 + +for.cond.cleanup7: ; preds = %for.body8, %for.cond5.preheader + %inc20 = add nuw nsw i32 %j.048, 1 + %exitcond51 = icmp eq i32 %inc20, %m + br i1 %exitcond51, label %for.cond.cleanup3, label %for.cond5.preheader + +for.cond.cleanup3: ; preds = %for.cond.cleanup7, %for.cond1.preheader + %inc23 = add nuw nsw i32 %i.050, 1 + %exitcond52 = icmp eq i32 %inc23, %n + br i1 %exitcond52, label %for.cond.cleanup, label %for.cond1.preheader + +for.cond.cleanup: ; preds = %for.cond.cleanup3, %entry + ret void +} + +; CHECK-LABEL: t6 +;; for (int i = 0; i < n; i++) +;; for (int j = 0; j < m; j++) +;; for (int k = 0; k < o; k++) +;; = A[i*m*o + j*o + k] +;; A[i*m*o + j*o + k + m*o] = +define void @t6(i32 %n, i32 %m, i32 %o, i32* nocapture %A) { +; CHECK: da analyze - none! +; CHECK: da analyze - consistent anti [-1 0 0]! +; CHECK: da analyze - none! +entry: + %cmp49 = icmp sgt i32 %n, 0 + br i1 %cmp49, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup + +for.cond1.preheader.lr.ph: ; preds = %entry + %cmp247 = icmp sgt i32 %m, 0 + %cmp645 = icmp sgt i32 %o, 0 + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond.cleanup3, %for.cond1.preheader.lr.ph + %i.050 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc23, %for.cond.cleanup3 ] + br i1 %cmp247, label %for.cond5.preheader.lr.ph, label %for.cond.cleanup3 + +for.cond5.preheader.lr.ph: ; preds = %for.cond1.preheader + %mul = mul nsw i32 %i.050, %m + br label %for.cond5.preheader + +for.cond5.preheader: ; preds = %for.cond.cleanup7, %for.cond5.preheader.lr.ph + %j.048 = phi i32 [ 0, %for.cond5.preheader.lr.ph ], [ %inc20, %for.cond.cleanup7 ] + br i1 %cmp645, label %for.body8.lr.ph, label %for.cond.cleanup7 + +for.body8.lr.ph: ; preds = %for.cond5.preheader + %mul944 = add i32 %j.048, %mul + %add = mul i32 %mul944, %o + br label %for.body8 + +for.body8: ; preds = %for.body8, %for.body8.lr.ph + %k.046 = phi i32 [ 0, %for.body8.lr.ph ], [ %inc, %for.body8 ] + %add11 = add nsw i32 %k.046, %add + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add11 + %0 = load i32, i32* %arrayidx, align 4 + %add12 = add nsw i32 %0, 1 + %mo = mul i32 %m, %o + %add111 = add nsw i32 %add11, %mo + %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %add111 + store i32 %add12, i32* %arrayidx2, align 4 + %inc = add nuw nsw i32 %k.046, 1 + %exitcond = icmp eq i32 %inc, %o + br i1 %exitcond, label %for.cond.cleanup7, label %for.body8 + +for.cond.cleanup7: ; preds = %for.body8, %for.cond5.preheader + %inc20 = add nuw nsw i32 %j.048, 1 + %exitcond51 = icmp eq i32 %inc20, %m + br i1 %exitcond51, label %for.cond.cleanup3, label %for.cond5.preheader + +for.cond.cleanup3: ; preds = %for.cond.cleanup7, %for.cond1.preheader + %inc23 = add nuw nsw i32 %i.050, 1 + %exitcond52 = icmp eq i32 %inc23, %n + br i1 %exitcond52, label %for.cond.cleanup, label %for.cond1.preheader + +for.cond.cleanup: ; preds = %for.cond.cleanup3, %entry + ret void +} + +; CHECK-LABEL: t7 +;; for (int i = 0; i < n; i++) +;; for (int j = 0; j < m; j++) +;; for (int k = 0; k < o; k++) +;; = A[i*m*o + j*o + k] +;; A[i*m*o + j*o + k - m*o] = +define void @t7(i32 %n, i32 %m, i32 %o, i32* nocapture %A) { +; CHECK: da analyze - none! +; CHECK: da analyze - consistent anti [1 0 0]! +; CHECK: da analyze - none! +entry: + %cmp49 = icmp sgt i32 %n, 0 + br i1 %cmp49, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup + +for.cond1.preheader.lr.ph: ; preds = %entry + %cmp247 = icmp sgt i32 %m, 0 + %cmp645 = icmp sgt i32 %o, 0 + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond.cleanup3, %for.cond1.preheader.lr.ph + %i.050 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc23, %for.cond.cleanup3 ] + br i1 %cmp247, label %for.cond5.preheader.lr.ph, label %for.cond.cleanup3 + +for.cond5.preheader.lr.ph: ; preds = %for.cond1.preheader + %mul = mul nsw i32 %i.050, %m + br label %for.cond5.preheader + +for.cond5.preheader: ; preds = %for.cond.cleanup7, %for.cond5.preheader.lr.ph + %j.048 = phi i32 [ 0, %for.cond5.preheader.lr.ph ], [ %inc20, %for.cond.cleanup7 ] + br i1 %cmp645, label %for.body8.lr.ph, label %for.cond.cleanup7 + +for.body8.lr.ph: ; preds = %for.cond5.preheader + %mul944 = add i32 %j.048, %mul + %add = mul i32 %mul944, %o + br label %for.body8 + +for.body8: ; preds = %for.body8, %for.body8.lr.ph + %k.046 = phi i32 [ 0, %for.body8.lr.ph ], [ %inc, %for.body8 ] + %add11 = add nsw i32 %k.046, %add + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add11 + %0 = load i32, i32* %arrayidx, align 4 + %add12 = add nsw i32 %0, 1 + %mo = mul i32 %m, %o + %add111 = sub nsw i32 %add11, %mo + %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %add111 + store i32 %add12, i32* %arrayidx2, align 4 + %inc = add nuw nsw i32 %k.046, 1 + %exitcond = icmp eq i32 %inc, %o + br i1 %exitcond, label %for.cond.cleanup7, label %for.body8 + +for.cond.cleanup7: ; preds = %for.body8, %for.cond5.preheader + %inc20 = add nuw nsw i32 %j.048, 1 + %exitcond51 = icmp eq i32 %inc20, %m + br i1 %exitcond51, label %for.cond.cleanup3, label %for.cond5.preheader + +for.cond.cleanup3: ; preds = %for.cond.cleanup7, %for.cond1.preheader + %inc23 = add nuw nsw i32 %i.050, 1 + %exitcond52 = icmp eq i32 %inc23, %n + br i1 %exitcond52, label %for.cond.cleanup, label %for.cond1.preheader + +for.cond.cleanup: ; preds = %for.cond.cleanup3, %entry + ret void +} + +; CHECK-LABEL: t8 +;; for (int i = 0; i < n; i++) +;; for (int j = 0; j < m; j++) +;; for (int k = 1; k < o; k++) +;; = A[i*m*o + j*o + k] +;; A[i*m*o + j*o + k - 1] = +define void @t8(i32 %n, i32 %m, i32 %o, i32* nocapture %A) { +; CHECK: da analyze - none! +; CHECK: da analyze - consistent anti [0 0 1]! +; CHECK: da analyze - none! +entry: + %cmp49 = icmp sgt i32 %n, 0 + br i1 %cmp49, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup + +for.cond1.preheader.lr.ph: ; preds = %entry + %cmp247 = icmp sgt i32 %m, 0 + %cmp645 = icmp sgt i32 %o, 0 + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond.cleanup3, %for.cond1.preheader.lr.ph + %i.050 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc23, %for.cond.cleanup3 ] + br i1 %cmp247, label %for.cond5.preheader.lr.ph, label %for.cond.cleanup3 + +for.cond5.preheader.lr.ph: ; preds = %for.cond1.preheader + %mul = mul nsw i32 %i.050, %m + br label %for.cond5.preheader + +for.cond5.preheader: ; preds = %for.cond.cleanup7, %for.cond5.preheader.lr.ph + %j.048 = phi i32 [ 0, %for.cond5.preheader.lr.ph ], [ %inc20, %for.cond.cleanup7 ] + br i1 %cmp645, label %for.body8.lr.ph, label %for.cond.cleanup7 + +for.body8.lr.ph: ; preds = %for.cond5.preheader + %mul944 = add i32 %j.048, %mul + %add = mul i32 %mul944, %o + br label %for.body8 + +for.body8: ; preds = %for.body8, %for.body8.lr.ph + %k.046 = phi i32 [ 1, %for.body8.lr.ph ], [ %inc, %for.body8 ] + %add11 = add nsw i32 %k.046, %add + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %add11 + %0 = load i32, i32* %arrayidx, align 4 + %add12 = add nsw i32 %0, 1 + %add111 = sub nsw i32 %add11, 1 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %add111 + store i32 %add12, i32* %arrayidx2, align 4 + %inc = add nuw nsw i32 %k.046, 1 + %exitcond = icmp eq i32 %inc, %o + br i1 %exitcond, label %for.cond.cleanup7, label %for.body8 + +for.cond.cleanup7: ; preds = %for.body8, %for.cond5.preheader + %inc20 = add nuw nsw i32 %j.048, 1 + %exitcond51 = icmp eq i32 %inc20, %m + br i1 %exitcond51, label %for.cond.cleanup3, label %for.cond5.preheader + +for.cond.cleanup3: ; preds = %for.cond.cleanup7, %for.cond1.preheader + %inc23 = add nuw nsw i32 %i.050, 1 + %exitcond52 = icmp eq i32 %inc23, %n + br i1 %exitcond52, label %for.cond.cleanup, label %for.cond1.preheader + +for.cond.cleanup: ; preds = %for.cond.cleanup3, %entry + ret void +} + + +; CHECK-LABEL: test_sizes +define double @test_sizes(i16 %h, i16 %N, i16* nocapture %array) { +; CHECK: da analyze - consistent input [0 S]! +; CHECK: da analyze - anti [* *|<]! +; CHECK: da analyze - output [* *]! +entry: + %cmp28 = icmp sgt i16 %N, 1 + br i1 %cmp28, label %for.body.lr.ph, label %for.end12 + +for.body.lr.ph: ; preds = %entry + %cmp425 = icmp slt i16 %h, 0 + %0 = add i16 %h, 1 + %wide.trip.count = zext i16 %N to i32 + br label %for.body + +for.body: ; preds = %for.inc10, %for.body.lr.ph + %indvars.iv32 = phi i32 [ 1, %for.body.lr.ph ], [ %indvars.iv.next33, %for.inc10 ] + %indvars.iv = phi i16 [ 2, %for.body.lr.ph ], [ %indvars.iv.next, %for.inc10 ] + br i1 %cmp425, label %for.inc10, label %for.body5.lr.ph + +for.body5.lr.ph: ; preds = %for.body + %1 = sext i16 %indvars.iv to i32 + %arrayidx = getelementptr inbounds i16, i16* %array, i32 %indvars.iv32 + br label %for.body5 + +for.body5: ; preds = %for.body5, %for.body5.lr.ph + %indvars.iv30 = phi i32 [ %indvars.iv.next31, %for.body5 ], [ %1, %for.body5.lr.ph ] + %j.027 = phi i16 [ %inc, %for.body5 ], [ 0, %for.body5.lr.ph ] + %2 = load i16, i16* %arrayidx, align 4 + %add6 = add nsw i16 %2, %j.027 + %arrayidx8 = getelementptr inbounds i16, i16* %array, i32 %indvars.iv30 + store i16 %add6, i16* %arrayidx8, align 4 + %inc = add nuw nsw i16 %j.027, 1 + %indvars.iv.next31 = add nsw i32 %indvars.iv30, 1 + %exitcond = icmp eq i16 %inc, %0 + br i1 %exitcond, label %for.inc10, label %for.body5 + +for.inc10: ; preds = %for.body5, %for.body + %indvars.iv.next33 = add nuw nsw i32 %indvars.iv32, 1 + %indvars.iv.next = add i16 %indvars.iv, %0 + %exitcond34 = icmp eq i32 %indvars.iv.next33, %wide.trip.count + br i1 %exitcond34, label %for.end12, label %for.body + +for.end12: ; preds = %for.inc10, %entry + ret double undef +} \ No newline at end of file Index: llvm/trunk/test/Analysis/DependenceAnalysis/ExactSIV.ll =================================================================== --- llvm/trunk/test/Analysis/DependenceAnalysis/ExactSIV.ll +++ llvm/trunk/test/Analysis/DependenceAnalysis/ExactSIV.ll @@ -1,6 +1,5 @@ -; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s -; ModuleID = 'ExactSIV.bc' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.6.0" Index: llvm/trunk/test/Analysis/DependenceAnalysis/GCD.ll =================================================================== --- llvm/trunk/test/Analysis/DependenceAnalysis/GCD.ll +++ llvm/trunk/test/Analysis/DependenceAnalysis/GCD.ll @@ -1,6 +1,5 @@ -; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s -check-prefix=DELIN +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s -check-prefix=DELIN -; ModuleID = 'GCD.bc' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.6.0" @@ -228,10 +227,10 @@ br label %for.cond1.preheader ; DELIN-LABEL: gcd4 -; DELIN: da analyze - none! +; DELIN: da analyze - output [* *]! ; DELIN: da analyze - none! ; DELIN: da analyze - confused! -; DELIN: da analyze - none! +; DELIN: da analyze - input [* *]! ; DELIN: da analyze - confused! ; DELIN: da analyze - none! @@ -290,10 +289,10 @@ br label %for.cond1.preheader ; DELIN-LABEL: gcd5 -; DELIN: da analyze - none! -; DELIN: da analyze - flow [> *]! +; DELIN: da analyze - output [* *]! +; DELIN: da analyze - flow [<> *]! ; DELIN: da analyze - confused! -; DELIN: da analyze - none! +; DELIN: da analyze - input [* *]! ; DELIN: da analyze - confused! ; DELIN: da analyze - none! @@ -353,10 +352,10 @@ br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end12 ; DELIN-LABEL: gcd6 -; DELIN: da analyze - none! +; DELIN: da analyze - output [* *]! ; DELIN: da analyze - none! ; DELIN: da analyze - confused! -; DELIN: da analyze - none! +; DELIN: da analyze - input [* *]! ; DELIN: da analyze - confused! ; DELIN: da analyze - output [* *]! Index: llvm/trunk/test/Analysis/DependenceAnalysis/Invariant.ll =================================================================== --- llvm/trunk/test/Analysis/DependenceAnalysis/Invariant.ll +++ llvm/trunk/test/Analysis/DependenceAnalysis/Invariant.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s ; Test for a bug, which caused an assert when an invalid ; SCEVAddRecExpr is created in addToCoefficient. Index: llvm/trunk/test/Analysis/DependenceAnalysis/NonAffineExpr.ll =================================================================== --- llvm/trunk/test/Analysis/DependenceAnalysis/NonAffineExpr.ll +++ llvm/trunk/test/Analysis/DependenceAnalysis/NonAffineExpr.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -analyze -basicaa -da-delinearize -da +; RUN: opt < %s -analyze -basicaa -da ; ; CHECK: da analyze - consistent input [S S]! ; CHECK: da analyze - confused! Index: llvm/trunk/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll =================================================================== --- llvm/trunk/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll +++ llvm/trunk/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s -check-prefix=DELIN +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s -check-prefix=DELIN target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.6.0" Index: llvm/trunk/test/Analysis/DependenceAnalysis/Preliminary.ll =================================================================== --- llvm/trunk/test/Analysis/DependenceAnalysis/Preliminary.ll +++ llvm/trunk/test/Analysis/DependenceAnalysis/Preliminary.ll @@ -1,6 +1,5 @@ -; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s -; ModuleID = 'Preliminary.bc' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.6.0" Index: llvm/trunk/test/Analysis/DependenceAnalysis/Propagating.ll =================================================================== --- llvm/trunk/test/Analysis/DependenceAnalysis/Propagating.ll +++ llvm/trunk/test/Analysis/DependenceAnalysis/Propagating.ll @@ -1,6 +1,5 @@ -; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s -; ModuleID = 'Propagating.bc' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.6.0" Index: llvm/trunk/test/Analysis/DependenceAnalysis/Separability.ll =================================================================== --- llvm/trunk/test/Analysis/DependenceAnalysis/Separability.ll +++ llvm/trunk/test/Analysis/DependenceAnalysis/Separability.ll @@ -1,6 +1,5 @@ -; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s -; ModuleID = 'Separability.bc' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.6.0" Index: llvm/trunk/test/Analysis/DependenceAnalysis/StrongSIV.ll =================================================================== --- llvm/trunk/test/Analysis/DependenceAnalysis/StrongSIV.ll +++ llvm/trunk/test/Analysis/DependenceAnalysis/StrongSIV.ll @@ -1,6 +1,5 @@ -; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s -; ModuleID = 'StrongSIV.bc' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.6.0" Index: llvm/trunk/test/Analysis/DependenceAnalysis/SymbolicSIV.ll =================================================================== --- llvm/trunk/test/Analysis/DependenceAnalysis/SymbolicSIV.ll +++ llvm/trunk/test/Analysis/DependenceAnalysis/SymbolicSIV.ll @@ -1,6 +1,5 @@ -; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s -; ModuleID = 'SymbolicSIV.bc' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.6.0" @@ -336,7 +335,7 @@ ; CHECK-LABEL: symbolicsiv6 ; CHECK: da analyze - none! -; CHECK: da analyze - flow [0|<]! +; CHECK: da analyze - none! ; CHECK: da analyze - confused! ; CHECK: da analyze - none! ; CHECK: da analyze - confused! @@ -385,7 +384,7 @@ br i1 %cmp1, label %for.end, label %for.body.preheader ; CHECK-LABEL: symbolicsiv7 ; CHECK: da analyze - none! -; CHECK: da analyze - flow [0|<]! +; CHECK: da analyze - flow [<>]! ; CHECK: da analyze - confused! ; CHECK: da analyze - none! ; CHECK: da analyze - confused! Index: llvm/trunk/test/Transforms/LoopInterchange/interchange-insts-between-indvar.ll =================================================================== --- llvm/trunk/test/Transforms/LoopInterchange/interchange-insts-between-indvar.ll +++ llvm/trunk/test/Transforms/LoopInterchange/interchange-insts-between-indvar.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -da-delinearize -loop-interchange -verify-dom-info -verify-loop-info \ +; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -verify-loop-info \ ; RUN: -S -pass-remarks=loop-interchange 2>&1 | FileCheck %s @A10 = local_unnamed_addr global [3 x [3 x i32]] zeroinitializer, align 16