Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -2320,6 +2320,13 @@ return L; } +static Loop *getFirstNonBoxedLoopFor(Loop *L, LoopInfo &LI, + const BoxedLoopsSetTy &BoxedLoops) { + while (BoxedLoops.count(L)) + L = L->getParentLoop(); + return L; +} + /// @brief Adjust the dimensions of @p Dom that was constructed for @p OldL /// to be compatible to domains constructed for loop @p NewL. /// @@ -4362,6 +4369,11 @@ std::vector SizesSCEV; const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads(); + // If the loop is nonaffine/boxed, return the first non-boxed surrounding loop + // for polly. If the loop is affine, return the loop itself. Do not call + // `getSCEVAtScope()` on the result of `getFirstNonBoxedLoopFor()`, as we need + // to analyze memory access of the nonaffine/boxed loops. + L = getFirstNonBoxedLoopFor(L, LI, scop->getBoxedLoops()); for (auto *Subscript : Subscripts) { InvariantLoadsSetTy AccessILS; if (!isAffineExpr(&scop->getRegion(), L, Subscript, SE, &AccessILS)) @@ -4438,8 +4450,13 @@ // Check if the length val is actually affine or if we overapproximate it InvariantLoadsSetTy AccessILS; const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads(); - bool LengthIsAffine = - isAffineExpr(&scop->getRegion(), L, LengthVal, SE, &AccessILS); + // If the loop is nonaffine/boxed, return the first non-boxed surrounding loop + // for polly. If the loop is affine, return the loop itself. Do not call + // `getSCEVAtScope()` on the result of `getFirstNonBoxedLoopFor()`, as we need + // to analyze memory access of the nonaffine/boxed loops. + bool LengthIsAffine = isAffineExpr( + &scop->getRegion(), getFirstNonBoxedLoopFor(L, LI, scop->getBoxedLoops()), + LengthVal, SE, &AccessILS); for (LoadInst *LInst : AccessILS) if (!ScopRIL.count(LInst)) LengthIsAffine = false; @@ -4557,6 +4574,11 @@ isVariantInNonAffineLoop = true; InvariantLoadsSetTy AccessILS; + // If the loop is nonaffine/boxed, return the first non-boxed surrounding loop + // for polly. If the loop is affine, return the loop itself. Do not call + // `getSCEVAtScope()` on the result of `getFirstNonBoxedLoopFor()`, as we need + // to analyze memory access of the nonaffine/boxed loops. + L = getFirstNonBoxedLoopFor(L, LI, BoxedLoops); bool IsAffine = !isVariantInNonAffineLoop && isAffineExpr(&scop->getRegion(), L, AccessFunction, SE, &AccessILS); Index: lib/Support/SCEVValidator.cpp =================================================================== --- lib/Support/SCEVValidator.cpp +++ lib/Support/SCEVValidator.cpp @@ -214,8 +214,9 @@ auto *L = Expr->getLoop(); if (R->contains(L) && (!Scope || !L->contains(Scope))) { - DEBUG(dbgs() << "INVALID: AddRec out of a loop whose exit value is not " - "synthesizable"); + DEBUG(dbgs() << "INVALID: Loop of AddRec expression boxed in an a " + "non-affine subregion or has a non-synthesizable exit " + "value."); return ValidatorResult(SCEVType::INVALID); } Index: test/DependenceInfo/nonaffine-condition-buildMemoryAccess.ll =================================================================== --- /dev/null +++ test/DependenceInfo/nonaffine-condition-buildMemoryAccess.ll @@ -0,0 +1,74 @@ +; RUN: opt %loadPolly -polly-codegen -polly-allow-nonaffine-loops -polly-allow-nonaffine -debug-only=polly-dependence < %s 2>&1 | FileCheck %s + +; CHECK: MayWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body__TO__for_inc11[i0] -> MemRef_A[o0] : 0 <= o0 <= 699 }; +; CHECK-NEXT: MayWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body__TO__for_inc11[i0] -> MemRef_B[700] }; + + +; The if condition C[i] is a non-affine condition, which make the nested loop boxed. The memory access for A should be a range A[0...699]. The memory access for B should be simplified to B[700]. +; +; int A[1000], B[1000], C[1000]; +; +; void foo(int n, int m, int N) { +; for (int i = 0; i < 500; i+=1) { /* affine loop */ +; C[i] += i; +; if (C[i]) { /* non-affine subregion */ +; int j; +; for (j = 0; j < 700; j+=1) { /* boxed loop */ +; A[j] = 1; +; } +; B[j] = 2; +; } +; } +; } + + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" + +@C = common global [1000 x i32] zeroinitializer, align 4 +@A = common global [1000 x i32] zeroinitializer, align 4 +@B = common global [1000 x i32] zeroinitializer, align 4 + +; Function Attrs: norecurse nounwind +define void @foo(i32 %n, i32 %m, i32 %N) #0 { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.cond.cleanup: ; preds = %for.inc11 + ret void + +for.body: ; preds = %for.inc11, %entry.split + %indvars.iv25 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next26, %for.inc11 ] + %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* @C, i64 0, i64 %indvars.iv25 + %0 = load i32, i32* %arrayidx, align 4 + %1 = trunc i64 %indvars.iv25 to i32 + %add = add nsw i32 %0, %1 + store i32 %add, i32* %arrayidx, align 4 + %tobool = icmp eq i32 %add, 0 + br i1 %tobool, label %for.inc11, label %for.body5.preheader + +for.body5.preheader: ; preds = %for.body + br label %for.body5 + +for.body5: ; preds = %for.body5.preheader, %for.body5 + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body5 ], [ 0, %for.body5.preheader ] + %arrayidx7 = getelementptr inbounds [1000 x i32], [1000 x i32]* @A, i64 0, i64 %indvars.iv + store i32 1, i32* %arrayidx7, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv, 699 + br i1 %exitcond, label %for.end, label %for.body5 + +for.end: ; preds = %for.body5 + store i32 2, i32* getelementptr inbounds ([1000 x i32], [1000 x i32]* @B, i64 0, i64 700), align 4 + br label %for.inc11 + +for.inc11: ; preds = %for.body, %for.end + %indvars.iv.next26 = add nuw nsw i64 %indvars.iv25, 1 + %exitcond27 = icmp eq i64 %indvars.iv25, 499 + br i1 %exitcond27, label %for.cond.cleanup, label %for.body +} + Index: test/ScopInfo/nonaffine-buildMemoryAccess.ll =================================================================== --- /dev/null +++ test/ScopInfo/nonaffine-buildMemoryAccess.ll @@ -0,0 +1,45 @@ +; RUN: opt %loadPolly -polly-allow-nonaffine-loops -polly-scops -analyze < %s | FileCheck %s +; +; CHECK: Domain := +; CHECK-NEXT: { Stmt_while_cond_i__TO__while_end_i[] }; +; +define i32 @func(i32 %param0, i32 %param1, i64* %param2) #3 { + +entry: + %var0 = alloca i32 + %var1 = alloca i32 + br label %while.cond.i + +while.cond.i: ; preds = %while.cond.i.backedge, %entry + %var2 = phi i32 [ %param0, %entry ], [ %var3, %while.cond.i.backedge ] + %var3 = add nsw i32 %var2, 1 + %var4 = icmp slt i32 %var2, -1 + br i1 %var4, label %while.cond.i.backedge, label %if.end.i1.i + +if.end.i1.i: ; preds = %while.cond.i + %var5 = sdiv i32 %var3, 64 + %var6 = icmp sgt i32 %param1, %var5 + br i1 %var6, label %exit1.i, label %while.cond.i.backedge + +exit1.i: ; preds = %if.end.i1.i + %var7 = srem i32 %var3, 64 + %var8 = sext i32 %var5 to i64 + %var9 = getelementptr inbounds i64, i64* %param2, i64 %var8 + %var10 = load i64, i64* %var9, align 8 + %var11 = zext i32 %var7 to i64 + %var12 = shl i64 1, %var11 + %var13 = and i64 %var10, %var12 + %var14 = icmp eq i64 %var13, 0 + store i32 %var2, i32* %var1 + store i32 %var3, i32* %var0 + br i1 %var14, label %while.cond.i.backedge, label %while.end.i + +while.cond.i.backedge: ; preds = %exit1.i, %while.cond.i, %if.end.i1.i + br label %while.cond.i + +while.end.i: + %var15 = load i32, i32* %var0 + %var16 = load i32, i32* %var1 + %var17 = add i32 %var15, %var16 + ret i32 %var17 +}