Index: polly/trunk/lib/Analysis/ScopBuilder.cpp =================================================================== --- polly/trunk/lib/Analysis/ScopBuilder.cpp +++ polly/trunk/lib/Analysis/ScopBuilder.cpp @@ -29,6 +29,17 @@ STATISTIC(ScopFound, "Number of valid Scops"); STATISTIC(RichScopFound, "Number of Scops containing a loop"); +// If the loop is nonaffine/boxed, return the first non-boxed surrounding loop +// for Polly. If the loop is affine, return the loop itself. Do not call +// `getSCEVAtScope()` on the result of `getFirstNonBoxedLoopFor()`, as we need +// to analyze the memory accesses of the nonaffine/boxed loops. +static Loop *getFirstNonBoxedLoopFor(Loop *L, LoopInfo &LI, + const BoxedLoopsSetTy &BoxedLoops) { + while (BoxedLoops.count(L)) + L = L->getParentLoop(); + return L; +} + static cl::opt ModelReadOnlyScalars( "polly-analyze-read-only-scalars", cl::desc("Model read-only scalar values in the scop description"), @@ -150,9 +161,12 @@ std::vector SizesSCEV; const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads(); + + Loop *SurroundingLoop = getFirstNonBoxedLoopFor(L, LI, scop->getBoxedLoops()); for (auto *Subscript : Subscripts) { InvariantLoadsSetTy AccessILS; - if (!isAffineExpr(&scop->getRegion(), L, Subscript, SE, &AccessILS)) + if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE, + &AccessILS)) return false; for (LoadInst *LInst : AccessILS) @@ -226,8 +240,10 @@ // Check if the length val is actually affine or if we overapproximate it InvariantLoadsSetTy AccessILS; const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads(); - bool LengthIsAffine = - isAffineExpr(&scop->getRegion(), L, LengthVal, SE, &AccessILS); + + Loop *SurroundingLoop = getFirstNonBoxedLoopFor(L, LI, scop->getBoxedLoops()); + bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop, + LengthVal, SE, &AccessILS); for (LoadInst *LInst : AccessILS) if (!ScopRIL.count(LInst)) LengthIsAffine = false; @@ -345,9 +361,11 @@ isVariantInNonAffineLoop = true; InvariantLoadsSetTy AccessILS; - bool IsAffine = - !isVariantInNonAffineLoop && - isAffineExpr(&scop->getRegion(), L, AccessFunction, SE, &AccessILS); + + Loop *SurroundingLoop = getFirstNonBoxedLoopFor(L, LI, BoxedLoops); + bool IsAffine = !isVariantInNonAffineLoop && + isAffineExpr(&scop->getRegion(), SurroundingLoop, + AccessFunction, SE, &AccessILS); const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads(); for (LoadInst *LInst : AccessILS) Index: polly/trunk/lib/Analysis/ScopInfo.cpp =================================================================== --- polly/trunk/lib/Analysis/ScopInfo.cpp +++ polly/trunk/lib/Analysis/ScopInfo.cpp @@ -2246,6 +2246,10 @@ return true; } +// If the loop is nonaffine/boxed, return the first non-boxed surrounding loop +// for Polly. If the loop is affine, return the loop itself. Do not call +// `getSCEVAtScope()` on the result of `getFirstNonBoxedLoopFor()`, as we need +// to analyze the memory accesses of the nonaffine/boxed loops. static Loop *getFirstNonBoxedLoopFor(BasicBlock *BB, LoopInfo &LI, const BoxedLoopsSetTy &BoxedLoops) { auto *L = LI.getLoopFor(BB); Index: polly/trunk/lib/Support/SCEVValidator.cpp =================================================================== --- polly/trunk/lib/Support/SCEVValidator.cpp +++ polly/trunk/lib/Support/SCEVValidator.cpp @@ -214,8 +214,9 @@ auto *L = Expr->getLoop(); if (R->contains(L) && (!Scope || !L->contains(Scope))) { - DEBUG(dbgs() << "INVALID: AddRec out of a loop whose exit value is not " - "synthesizable"); + DEBUG(dbgs() << "INVALID: Loop of AddRec expression boxed in an a " + "non-affine subregion or has a non-synthesizable exit " + "value."); return ValidatorResult(SCEVType::INVALID); } Index: polly/trunk/test/DependenceInfo/nonaffine-condition-buildMemoryAccess.ll =================================================================== --- polly/trunk/test/DependenceInfo/nonaffine-condition-buildMemoryAccess.ll +++ polly/trunk/test/DependenceInfo/nonaffine-condition-buildMemoryAccess.ll @@ -0,0 +1,74 @@ +; RUN: opt %loadPolly -polly-codegen -polly-allow-nonaffine-loops -polly-allow-nonaffine -debug-only=polly-dependence < %s 2>&1 | FileCheck %s +; REQUIRES: asserts + +; CHECK: MayWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body__TO__for_inc11[i0] -> MemRef_A[o0] : 0 <= o0 <= 699 }; +; CHECK-NEXT: MayWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body__TO__for_inc11[i0] -> MemRef_B[700] }; + +; The if condition C[i] is a non-affine condition, which make the nested loop boxed. The memory access for A should be a range A[0...699]. The memory access for B should be simplified to B[700]. +; +; int A[1000], B[1000], C[1000]; +; +; void foo(int n, int m, int N) { +; for (int i = 0; i < 500; i+=1) { /* affine loop */ +; C[i] += i; +; if (C[i]) { /* non-affine subregion */ +; int j; +; for (j = 0; j < 700; j+=1) { /* boxed loop */ +; A[j] = 1; +; } +; B[j] = 2; +; } +; } +; } + + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" + +@C = common global [1000 x i32] zeroinitializer, align 4 +@A = common global [1000 x i32] zeroinitializer, align 4 +@B = common global [1000 x i32] zeroinitializer, align 4 + +; Function Attrs: norecurse nounwind +define void @foo(i32 %n, i32 %m, i32 %N) #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.cond.cleanup: ; preds = %for.inc11 + ret void + +for.body: ; preds = %for.inc11, %entry.split + %indvars.iv25 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next26, %for.inc11 ] + %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* @C, i64 0, i64 %indvars.iv25 + %0 = load i32, i32* %arrayidx, align 4 + %1 = trunc i64 %indvars.iv25 to i32 + %add = add nsw i32 %0, %1 + store i32 %add, i32* %arrayidx, align 4 + %tobool = icmp eq i32 %add, 0 + br i1 %tobool, label %for.inc11, label %for.body5.preheader + +for.body5.preheader: ; preds = %for.body + br label %for.body5 + +for.body5: ; preds = %for.body5.preheader, %for.body5 + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body5 ], [ 0, %for.body5.preheader ] + %arrayidx7 = getelementptr inbounds [1000 x i32], [1000 x i32]* @A, i64 0, i64 %indvars.iv + store i32 1, i32* %arrayidx7, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv, 699 + br i1 %exitcond, label %for.end, label %for.body5 + +for.end: ; preds = %for.body5 + store i32 2, i32* getelementptr inbounds ([1000 x i32], [1000 x i32]* @B, i64 0, i64 700), align 4 + br label %for.inc11 + +for.inc11: ; preds = %for.body, %for.end + %indvars.iv.next26 = add nuw nsw i64 %indvars.iv25, 1 + %exitcond27 = icmp eq i64 %indvars.iv25, 499 + br i1 %exitcond27, label %for.cond.cleanup, label %for.body +} + Index: polly/trunk/test/ScopInfo/nonaffine-buildMemoryAccess.ll =================================================================== --- polly/trunk/test/ScopInfo/nonaffine-buildMemoryAccess.ll +++ polly/trunk/test/ScopInfo/nonaffine-buildMemoryAccess.ll @@ -0,0 +1,45 @@ +; RUN: opt %loadPolly -polly-allow-nonaffine-loops -polly-scops -analyze < %s | FileCheck %s +; +; CHECK: Domain := +; CHECK-NEXT: { Stmt_while_cond_i__TO__while_end_i[] }; +; +define i32 @func(i32 %param0, i32 %param1, i64* %param2) #3 { + +entry: + %var0 = alloca i32 + %var1 = alloca i32 + br label %while.cond.i + +while.cond.i: ; preds = %while.cond.i.backedge, %entry + %var2 = phi i32 [ %param0, %entry ], [ %var3, %while.cond.i.backedge ] + %var3 = add nsw i32 %var2, 1 + %var4 = icmp slt i32 %var2, -1 + br i1 %var4, label %while.cond.i.backedge, label %if.end.i1.i + +if.end.i1.i: ; preds = %while.cond.i + %var5 = sdiv i32 %var3, 64 + %var6 = icmp sgt i32 %param1, %var5 + br i1 %var6, label %exit1.i, label %while.cond.i.backedge + +exit1.i: ; preds = %if.end.i1.i + %var7 = srem i32 %var3, 64 + %var8 = sext i32 %var5 to i64 + %var9 = getelementptr inbounds i64, i64* %param2, i64 %var8 + %var10 = load i64, i64* %var9, align 8 + %var11 = zext i32 %var7 to i64 + %var12 = shl i64 1, %var11 + %var13 = and i64 %var10, %var12 + %var14 = icmp eq i64 %var13, 0 + store i32 %var2, i32* %var1 + store i32 %var3, i32* %var0 + br i1 %var14, label %while.cond.i.backedge, label %while.end.i + +while.cond.i.backedge: ; preds = %exit1.i, %while.cond.i, %if.end.i1.i + br label %while.cond.i + +while.end.i: + %var15 = load i32, i32* %var0 + %var16 = load i32, i32* %var1 + %var17 = add i32 %var15, %var16 + ret i32 %var17 +}