Index: include/polly/TempScopInfo.h =================================================================== --- include/polly/TempScopInfo.h +++ include/polly/TempScopInfo.h @@ -258,13 +258,15 @@ /// @brief Build an instance of IRAccess from the Load/Store instruction. /// - /// @param Inst The Load/Store instruction that access the memory - /// @param L The parent loop of the instruction - /// @param R The region on which we are going to build a TempScop + /// @param Inst The Load/Store instruction that access the memory + /// @param L The parent loop of the instruction + /// @param R The region on which we are going to build a TempScop + /// @param BoxedLoops The set of loops that are overapproximated in @p R. /// /// @return The IRAccess to describe the access function of the /// instruction. - IRAccess buildIRAccess(Instruction *Inst, Loop *L, Region *R); + IRAccess buildIRAccess(Instruction *Inst, Loop *L, Region *R, + const ScopDetection::BoxedLoopsSetTy *BoxedLoops); /// @brief Analyze and extract the cross-BB scalar dependences (or, /// dataflow dependencies) of an instruction. Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -1563,12 +1563,18 @@ return Valid; } -static unsigned getMaxLoopDepthInRegion(const Region &R, LoopInfo &LI) { +static unsigned getMaxLoopDepthInRegion(const Region &R, LoopInfo &LI, + ScopDetection &SD) { + + const ScopDetection::BoxedLoopsSetTy *BoxedLoops = SD.getBoxedLoops(&R); + unsigned MinLD = INT_MAX, MaxLD = 0; for (BasicBlock *BB : R.blocks()) { if (Loop *L = LI.getLoopFor(BB)) { if (!R.contains(L)) continue; + if (BoxedLoops && BoxedLoops->count(L)) + continue; unsigned LD = L->getLoopDepth(); MinLD = std::min(MinLD, LD); MaxLD = std::max(MaxLD, LD); @@ -1621,7 +1627,7 @@ Scop::Scop(TempScop &tempScop, LoopInfo &LI, ScalarEvolution &ScalarEvolution, ScopDetection &SD, isl_ctx *Context) : SE(&ScalarEvolution), R(tempScop.getMaxRegion()), IsOptimized(false), - MaxLoopDepth(getMaxLoopDepthInRegion(tempScop.getMaxRegion(), LI)) { + MaxLoopDepth(getMaxLoopDepthInRegion(tempScop.getMaxRegion(), LI, SD)) { IslCtx = Context; buildContext(); Index: lib/Analysis/TempScopInfo.cpp =================================================================== --- lib/Analysis/TempScopInfo.cpp +++ lib/Analysis/TempScopInfo.cpp @@ -212,7 +212,9 @@ extern MapInsnToMemAcc InsnToMemAcc; -IRAccess TempScopInfo::buildIRAccess(Instruction *Inst, Loop *L, Region *R) { +IRAccess +TempScopInfo::buildIRAccess(Instruction *Inst, Loop *L, Region *R, + const ScopDetection::BoxedLoopsSetTy *BoxedLoops) { unsigned Size; Type *SizeType; enum IRAccess::TypeKind Type; @@ -240,7 +242,18 @@ return IRAccess(Type, BasePointer->getValue(), AccessFunction, Size, true, Acc->DelinearizedSubscripts, Acc->Shape->DelinearizedSizes); - bool IsAffine = isAffineExpr(R, AccessFunction, *SE, BasePointer->getValue()); + // Check if the access depends on a loop contained in a non-affine subregion. + bool isVariantInNonAffineLoop = false; + if (BoxedLoops) { + SetVector Loops; + findLoops(AccessFunction, Loops); + for (const Loop *L : Loops) + if (BoxedLoops->count(L)) + isVariantInNonAffineLoop = true; + } + + bool IsAffine = !isVariantInNonAffineLoop && + isAffineExpr(R, AccessFunction, *SE, BasePointer->getValue()); SmallVector Subscripts, Sizes; Subscripts.push_back(AccessFunction); @@ -273,10 +286,14 @@ AccFuncSetType Functions; Loop *L = LI->getLoopFor(&BB); + // The set of loops contained in non-affine subregions that are part of R. + const ScopDetection::BoxedLoopsSetTy *BoxedLoops = SD->getBoxedLoops(&R); + for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) { Instruction *Inst = I; if (isa(Inst) || isa(Inst)) - Functions.push_back(std::make_pair(buildIRAccess(Inst, L, &R), Inst)); + Functions.push_back( + std::make_pair(buildIRAccess(Inst, L, &R, BoxedLoops), Inst)); if (PHINode *PHI = dyn_cast(Inst)) buildPHIAccesses(PHI, R, Functions, NonAffineSubRegion); Index: test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_1.ll =================================================================== --- /dev/null +++ test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_1.ll @@ -0,0 +1,75 @@ +; RUN: opt %loadPolly -basicaa -polly-scops -polly-allow-nonaffine -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=true -analyze < %s | FileCheck %s +; +; CHECK: Function: f +; CHECK: Region: %bb1---%bb13 +; CHECK: Max Loop Depth: 1 +; CHECK: Context: +; CHECK: { : } +; CHECK: Assumed Context: +; CHECK: { : } +; CHECK: Alias Groups (0): +; CHECK: n/a +; CHECK: Statements { +; CHECK: Stmt_(bb3 => bb11) +; CHECK: Domain := +; CHECK: { Stmt_(bb3 => bb11)[i0] : i0 >= 0 and i0 <= 1023 }; +; CHECK: Scattering := +; CHECK: { Stmt_(bb3 => bb11)[i0] -> [i0] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_(bb3 => bb11)[i0] -> MemRef_C[i0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_(bb3 => bb11)[i0] -> MemRef_tmp4_s2a[0] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_(bb3 => bb11)[i0] -> MemRef_tmp4_s2a[0] }; +; CHECK: ReadAccess := [Reduction Type: +] [Scalar: 0] +; CHECK: { Stmt_(bb3 => bb11)[i0] -> MemRef_A[o0] : o0 <= 2147483645 and o0 >= -2147483648 }; +; CHECK: MayWriteAccess := [Reduction Type: +] [Scalar: 0] +; CHECK: { Stmt_(bb3 => bb11)[i0] -> MemRef_A[o0] : o0 <= 2147483645 and o0 >= -2147483648 }; +; CHECK: } +; +; void f(int * restrict A, int * restrict C) { +; int j; +; for (int i = 0; i < 1024; i++) { +; while ((j = C[i])) +; A[j]++; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* noalias %A, i32* noalias %C) { +bb: + br label %bb1 + +bb1: ; preds = %bb12, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb12 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb2, label %bb13 + +bb2: ; preds = %bb1 + br label %bb3 + +bb3: ; preds = %bb6, %bb2 + %tmp = getelementptr inbounds i32, i32* %C, i64 %indvars.iv + %tmp4 = load i32, i32* %tmp, align 4 + %tmp5 = icmp eq i32 %tmp4, 0 + br i1 %tmp5, label %bb11, label %bb6 + +bb6: ; preds = %bb3 + %tmp7 = sext i32 %tmp4 to i64 + %tmp8 = getelementptr inbounds i32, i32* %A, i64 %tmp7 + %tmp9 = load i32, i32* %tmp8, align 4 + %tmp10 = add nsw i32 %tmp9, 1 + store i32 %tmp10, i32* %tmp8, align 4 + br label %bb3 + +bb11: ; preds = %bb3 + br label %bb12 + +bb12: ; preds = %bb11 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb13: ; preds = %bb1 + ret void +} Index: test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_2.ll =================================================================== --- /dev/null +++ test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_2.ll @@ -0,0 +1,132 @@ +; RUN: opt %loadPolly -basicaa -polly-scops -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=false -analyze < %s | FileCheck %s --check-prefix=INNERMOST +; RUN: opt %loadPolly -basicaa -polly-scops -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=true -analyze < %s | FileCheck %s --check-prefix=INNERMOST +; RUN: opt %loadPolly -basicaa -polly-scops -polly-allow-nonaffine -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=true -analyze < %s | FileCheck %s --check-prefix=ALL +; +; Here we have a non-affine loop (in the context of the loop nest) +; and also a non-affine access (A[k]). While we can always model the +; innermost loop as a SCoP of depth 1, we can overapproximate the +; innermost loop in the whole loop nest and model A[k] as a non-affine +; access. +; +; INNERMOST: Function: f +; INNERMOST: Region: %bb15---%bb26 +; INNERMOST: Max Loop Depth: 1 +; INNERMOST: p0: {0,+,{0,+,-1}<%bb11>}<%bb13> +; INNERMOST: p1: {0,+,{0,+,1}<%bb11>}<%bb13> +; INNERMOST: p2: {0,+,4}<%bb11> +; INNERMOST: p3: {0,+,4}<%bb13> +; INNERMOST: p4: {0,+,{0,+,4}<%bb11>}<%bb13> +; INNERMOST: Alias Groups (0): +; INNERMOST: n/a +; INNERMOST: Statements { +; INNERMOST: Stmt_bb16 +; INNERMOST: Domain := +; INNERMOST: [p_0, p_1, p_2, p_3, p_4] -> { Stmt_bb16[i0] : (i0 <= 1023 - p_1 and i0 >= 0 and i0 <= 1024 + p_0) or (i0 >= 0 and i0 >= 1025 - p_1 and i0 <= 1024 + p_0) }; +; INNERMOST: Scattering := +; INNERMOST: [p_0, p_1, p_2, p_3, p_4] -> { Stmt_bb16[i0] -> [i0] }; +; INNERMOST: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; INNERMOST: [p_0, p_1, p_2, p_3, p_4] -> { Stmt_bb16[i0] -> MemRef_A[o0] : 4o0 = p_2 }; +; INNERMOST: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; INNERMOST: [p_0, p_1, p_2, p_3, p_4] -> { Stmt_bb16[i0] -> MemRef_A[o0] : 4o0 = p_3 }; +; INNERMOST: ReadAccess := [Reduction Type: +] [Scalar: 0] +; INNERMOST: [p_0, p_1, p_2, p_3, p_4] -> { Stmt_bb16[i0] -> MemRef_A[o0] : 4o0 = p_4 + 4i0 }; +; INNERMOST: MustWriteAccess := [Reduction Type: +] [Scalar: 0] +; INNERMOST: [p_0, p_1, p_2, p_3, p_4] -> { Stmt_bb16[i0] -> MemRef_A[o0] : 4o0 = p_4 + 4i0 }; +; INNERMOST: } +; +; ALL: Function: f +; ALL: Region: %bb11---%bb29 +; ALL: Max Loop Depth: 2 +; ALL: Context: +; ALL: { : } +; ALL: Assumed Context: +; ALL: { : } +; ALL: Alias Groups (0): +; ALL: n/a +; ALL: Statements { +; ALL: Stmt_(bb15 => bb25) +; ALL: Domain := +; ALL: { Stmt_(bb15 => bb25)[i0, i1] : i0 >= 0 and i0 <= 1023 and i1 >= 0 and i1 <= 1023 }; +; ALL: Scattering := +; ALL: { Stmt_(bb15 => bb25)[i0, i1] -> [i0, i1] }; +; ALL: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; ALL: { Stmt_(bb15 => bb25)[i0, i1] -> MemRef_A[i0] }; +; ALL: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; ALL: { Stmt_(bb15 => bb25)[i0, i1] -> MemRef_A[i1] }; +; ALL: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; ALL: { Stmt_(bb15 => bb25)[i0, i1] -> MemRef_A[o0] : o0 <= 2305843009213693949 and o0 >= 0 }; +; ALL: MayWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; ALL: { Stmt_(bb15 => bb25)[i0, i1] -> MemRef_A[o0] : o0 <= 2305843009213693949 and o0 >= 0 }; +; ALL: } +; +; void f(int *A) { +; for (int i = 0; i < 1024; i++) +; for (int j = 0; j < 1024; j++) +; for (int k = i *j; k < 1024; k++) +; A[k] += A[i] + A[j]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +bb: + br label %bb11 + +bb11: ; preds = %bb28, %bb + %indvars.iv8 = phi i64 [ %indvars.iv.next9, %bb28 ], [ 0, %bb ] + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %bb28 ], [ 0, %bb ] + %exitcond10 = icmp ne i64 %indvars.iv8, 1024 + br i1 %exitcond10, label %bb12, label %bb29 + +bb12: ; preds = %bb11 + br label %bb13 + +bb13: ; preds = %bb26, %bb12 + %indvars.iv5 = phi i64 [ %indvars.iv.next6, %bb26 ], [ 0, %bb12 ] + %indvars.iv3 = phi i64 [ %indvars.iv.next4, %bb26 ], [ 0, %bb12 ] + %exitcond7 = icmp ne i64 %indvars.iv5, 1024 + br i1 %exitcond7, label %bb14, label %bb27 + +bb14: ; preds = %bb13 + br label %bb15 + +bb15: ; preds = %bb24, %bb14 + %indvars.iv = phi i64 [ %indvars.iv.next, %bb24 ], [ %indvars.iv3, %bb14 ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb16, label %bb25 + +bb16: ; preds = %bb15 + %tmp = getelementptr inbounds i32, i32* %A, i64 %indvars.iv8 + %tmp17 = load i32, i32* %tmp, align 4 + %tmp18 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv5 + %tmp19 = load i32, i32* %tmp18, align 4 + %tmp20 = add nsw i32 %tmp17, %tmp19 + %tmp21 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp22 = load i32, i32* %tmp21, align 4 + %tmp23 = add nsw i32 %tmp22, %tmp20 + store i32 %tmp23, i32* %tmp21, align 4 + br label %bb24 + +bb24: ; preds = %bb16 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb15 + +bb25: ; preds = %bb15 + br label %bb26 + +bb26: ; preds = %bb25 + %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1 + %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, %indvars.iv1 + br label %bb13 + +bb27: ; preds = %bb13 + br label %bb28 + +bb28: ; preds = %bb27 + %indvars.iv.next9 = add nuw nsw i64 %indvars.iv8, 1 + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + br label %bb11 + +bb29: ; preds = %bb11 + ret void +} Index: test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll =================================================================== --- /dev/null +++ test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll @@ -0,0 +1,135 @@ +; RUN: opt %loadPolly -basicaa -polly-scops -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=false -analyze < %s | FileCheck %s --check-prefix=INNERMOST +; RUN: opt %loadPolly -basicaa -polly-scops -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=true -analyze < %s | FileCheck %s --check-prefix=INNERMOST +; RUN: opt %loadPolly -basicaa -polly-scops -polly-allow-nonaffine -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=true -analyze < %s | FileCheck %s --check-prefix=ALL +; +; Here we have a non-affine loop (in the context of the loop nest) +; and also a non-affine access (A[k]). While we can always model the +; innermost loop as a SCoP of depth 1, we can overapproximate the +; innermost loop in the whole loop nest and model A[k] as a non-affine +; access. +; +; INNERMOST: Function: f +; INNERMOST: Region: %bb15---%bb26 +; INNERMOST: Max Loop Depth: 1 +; INNERMOST: Context: +; INNERMOST: [p_0, p_1, p_2] -> { : p_0 >= 0 and p_0 <= 2147483647 and p_1 >= 0 and p_1 <= 4096 and p_2 >= 0 and p_2 <= 4096 } +; INNERMOST: Assumed Context: +; INNERMOST: [p_0, p_1, p_2] -> { : } +; INNERMOST: p0: {0,+,{0,+,1}<%bb11>}<%bb13> +; INNERMOST: p1: {0,+,4}<%bb11> +; INNERMOST: p2: {0,+,4}<%bb13> +; INNERMOST: Alias Groups (0): +; INNERMOST: n/a +; INNERMOST: Statements { +; INNERMOST: Stmt_bb16 +; INNERMOST: Domain := +; INNERMOST: [p_0, p_1, p_2] -> { Stmt_bb16[i0] : i0 >= 0 and i0 <= -1 + p_0 }; +; INNERMOST: Scattering := +; INNERMOST: [p_0, p_1, p_2] -> { Stmt_bb16[i0] -> [i0] }; +; INNERMOST: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; INNERMOST: [p_0, p_1, p_2] -> { Stmt_bb16[i0] -> MemRef_A[o0] : 4o0 = p_1 }; +; INNERMOST: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; INNERMOST: [p_0, p_1, p_2] -> { Stmt_bb16[i0] -> MemRef_A[o0] : 4o0 = p_2 }; +; INNERMOST: ReadAccess := [Reduction Type: +] [Scalar: 0] +; INNERMOST: [p_0, p_1, p_2] -> { Stmt_bb16[i0] -> MemRef_A[i0] }; +; INNERMOST: MustWriteAccess := [Reduction Type: +] [Scalar: 0] +; INNERMOST: [p_0, p_1, p_2] -> { Stmt_bb16[i0] -> MemRef_A[i0] }; +; INNERMOST: } +; +; ALL: Function: f +; ALL: Region: %bb11---%bb29 +; ALL: Max Loop Depth: 2 +; ALL: Context: +; ALL: { : } +; ALL: Assumed Context: +; ALL: { : } +; ALL: Alias Groups (0): +; ALL: n/a +; ALL: Statements { +; ALL: Stmt_(bb15 => bb25) +; ALL: Domain := +; ALL: { Stmt_(bb15 => bb25)[i0, i1] : i0 >= 0 and i0 <= 1023 and i1 >= 0 and i1 <= 1023 }; +; ALL: Scattering := +; ALL: { Stmt_(bb15 => bb25)[i0, i1] -> [i0, i1] }; +; ALL: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; ALL: { Stmt_(bb15 => bb25)[i0, i1] -> MemRef_A[i0] }; +; ALL: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; ALL: { Stmt_(bb15 => bb25)[i0, i1] -> MemRef_A[i1] }; +; ALL: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; ALL: { Stmt_(bb15 => bb25)[i0, i1] -> MemRef_A[o0] : o0 <= 4294967293 and o0 >= 0 }; +; ALL: MayWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; ALL: { Stmt_(bb15 => bb25)[i0, i1] -> MemRef_A[o0] : o0 <= 4294967293 and o0 >= 0 }; +; ALL: } +; +; void f(int *A) { +; for (int i = 0; i < 1024; i++) +; for (int j = 0; j < 1024; j++) +; for (int k = 0; k < i * j; k++) +; A[k] += A[i] + A[j]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +bb: + br label %bb11 + +bb11: ; preds = %bb28, %bb + %indvars.iv8 = phi i64 [ %indvars.iv.next9, %bb28 ], [ 0, %bb ] + %indvars.iv1 = phi i32 [ %indvars.iv.next2, %bb28 ], [ 0, %bb ] + %exitcond10 = icmp ne i64 %indvars.iv8, 1024 + br i1 %exitcond10, label %bb12, label %bb29 + +bb12: ; preds = %bb11 + br label %bb13 + +bb13: ; preds = %bb26, %bb12 + %indvars.iv5 = phi i64 [ %indvars.iv.next6, %bb26 ], [ 0, %bb12 ] + %indvars.iv3 = phi i32 [ %indvars.iv.next4, %bb26 ], [ 0, %bb12 ] + %exitcond7 = icmp ne i64 %indvars.iv5, 1024 + br i1 %exitcond7, label %bb14, label %bb27 + +bb14: ; preds = %bb13 + br label %bb15 + +bb15: ; preds = %bb24, %bb14 + %indvars.iv = phi i64 [ %indvars.iv.next, %bb24 ], [ 0, %bb14 ] + %lftr.wideiv = trunc i64 %indvars.iv to i32 + %exitcond = icmp ne i32 %lftr.wideiv, %indvars.iv3 + br i1 %exitcond, label %bb16, label %bb25 + +bb16: ; preds = %bb15 + %tmp = getelementptr inbounds i32, i32* %A, i64 %indvars.iv8 + %tmp17 = load i32, i32* %tmp, align 4 + %tmp18 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv5 + %tmp19 = load i32, i32* %tmp18, align 4 + %tmp20 = add nsw i32 %tmp17, %tmp19 + %tmp21 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp22 = load i32, i32* %tmp21, align 4 + %tmp23 = add nsw i32 %tmp22, %tmp20 + store i32 %tmp23, i32* %tmp21, align 4 + br label %bb24 + +bb24: ; preds = %bb16 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb15 + +bb25: ; preds = %bb15 + br label %bb26 + +bb26: ; preds = %bb25 + %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1 + %indvars.iv.next4 = add nuw nsw i32 %indvars.iv3, %indvars.iv1 + br label %bb13 + +bb27: ; preds = %bb13 + br label %bb28 + +bb28: ; preds = %bb27 + %indvars.iv.next9 = add nuw nsw i64 %indvars.iv8, 1 + %indvars.iv.next2 = add nuw nsw i32 %indvars.iv1, 1 + br label %bb11 + +bb29: ; preds = %bb11 + ret void +} Index: test/ScopInfo/NonAffine/non_affine_conditional_surrounding_affine_loop.ll =================================================================== --- /dev/null +++ test/ScopInfo/NonAffine/non_affine_conditional_surrounding_affine_loop.ll @@ -0,0 +1,105 @@ +; RUN: opt %loadPolly -polly-scops -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=true -analyze < %s | FileCheck %s --check-prefix=INNERMOST +; RUN: opt %loadPolly -polly-scops -polly-allow-nonaffine -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=true -analyze < %s | FileCheck %s --check-prefix=ALL +; +; INNERMOST: Function: f +; INNERMOST: Region: %bb9---%bb17 +; INNERMOST: Max Loop Depth: 1 +; INNERMOST: Context: +; INNERMOST: [N] -> { : } +; INNERMOST: Assumed Context: +; INNERMOST: [N] -> { : } +; INNERMOST: p0: %N +; INNERMOST: Alias Groups (0): +; INNERMOST: n/a +; INNERMOST: Statements { +; INNERMOST: Stmt_bb11 +; INNERMOST: Domain := +; INNERMOST: [N] -> { Stmt_bb11[i0] : i0 >= 0 and N >= 1 and i0 <= -1 + N }; +; INNERMOST: Scattering := +; INNERMOST: [N] -> { Stmt_bb11[i0] -> [i0] }; +; INNERMOST: ReadAccess := [Reduction Type: +] [Scalar: 0] +; INNERMOST: [N] -> { Stmt_bb11[i0] -> MemRef_A[i0] }; +; INNERMOST: MustWriteAccess := [Reduction Type: +] [Scalar: 0] +; INNERMOST: [N] -> { Stmt_bb11[i0] -> MemRef_A[i0] }; +; INNERMOST: } +; +; ALL: Function: f +; ALL: Region: %bb3---%bb19 +; ALL: Max Loop Depth: 1 +; ALL: Context: +; ALL: { : } +; ALL: Assumed Context: +; ALL: { : } +; ALL: Alias Groups (0): +; ALL: n/a +; ALL: Statements { +; ALL: Stmt_(bb4 => bb17) +; ALL: Domain := +; ALL: { Stmt_(bb4 => bb17)[i0] : i0 >= 0 and i0 <= 1023 }; +; ALL: Scattering := +; ALL: { Stmt_(bb4 => bb17)[i0] -> [i0] }; +; ALL: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; ALL: { Stmt_(bb4 => bb17)[i0] -> MemRef_A[i0] }; +; ALL: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; ALL: { Stmt_(bb4 => bb17)[i0] -> MemRef_A[o0] : o0 <= 2147483645 and o0 >= 0 }; +; ALL: MayWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; ALL: { Stmt_(bb4 => bb17)[i0] -> MemRef_A[o0] : o0 <= 2147483645 and o0 >= 0 }; +; ALL: } +; +; void f(int *A, int N) { +; for (int i = 0; i < 1024; i++) +; if (A[i]) +; for (int j = 0; j < N; j++) +; A[j]++; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32 %N) { +bb: + %tmp = sext i32 %N to i64 + br label %bb3 + +bb3: ; preds = %bb18, %bb + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %bb18 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv1, 1024 + br i1 %exitcond, label %bb4, label %bb19 + +bb4: ; preds = %bb3 + %tmp5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1 + %tmp6 = load i32, i32* %tmp5, align 4 + %tmp7 = icmp eq i32 %tmp6, 0 + br i1 %tmp7, label %bb17, label %bb8 + +bb8: ; preds = %bb4 + br label %bb9 + +bb9: ; preds = %bb15, %bb8 + %indvars.iv = phi i64 [ %indvars.iv.next, %bb15 ], [ 0, %bb8 ] + %tmp10 = icmp slt i64 %indvars.iv, %tmp + br i1 %tmp10, label %bb11, label %bb16 + +bb11: ; preds = %bb9 + %tmp12 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp13 = load i32, i32* %tmp12, align 4 + %tmp14 = add nsw i32 %tmp13, 1 + store i32 %tmp14, i32* %tmp12, align 4 + br label %bb15 + +bb15: ; preds = %bb11 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb9 + +bb16: ; preds = %bb9 + br label %bb17 + +bb17: ; preds = %bb4, %bb16 + br label %bb18 + +bb18: ; preds = %bb17 + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + br label %bb3 + +bb19: ; preds = %bb3 + ret void +} Index: test/ScopInfo/NonAffine/non_affine_conditional_surrounding_non_affine_loop.ll =================================================================== --- /dev/null +++ test/ScopInfo/NonAffine/non_affine_conditional_surrounding_non_affine_loop.ll @@ -0,0 +1,106 @@ +; RUN: opt %loadPolly -polly-scops -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=true -analyze < %s | FileCheck %s --check-prefix=INNERMOST +; RUN: opt %loadPolly -polly-scops -polly-allow-nonaffine -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=true -analyze < %s | FileCheck %s --check-prefix=ALL +; +; INNERMOST: Function: f +; INNERMOST: Region: %bb9---%bb18 +; INNERMOST: Max Loop Depth: 1 +; INNERMOST: Context: +; INNERMOST: [p_0] -> { : p_0 >= -2199023255552 and p_0 <= 2199023254528 } +; INNERMOST: Assumed Context: +; INNERMOST: [p_0] -> { : } +; INNERMOST: p0: {0,+,(sext i32 %N to i64)}<%bb3> +; INNERMOST: Alias Groups (0): +; INNERMOST: n/a +; INNERMOST: Statements { +; INNERMOST: Stmt_bb12 +; INNERMOST: Domain := +; INNERMOST: [p_0] -> { Stmt_bb12[i0] : i0 >= 0 and p_0 >= 1 and i0 <= -1 + p_0 }; +; INNERMOST: Scattering := +; INNERMOST: [p_0] -> { Stmt_bb12[i0] -> [i0] }; +; INNERMOST: ReadAccess := [Reduction Type: +] [Scalar: 0] +; INNERMOST: [p_0] -> { Stmt_bb12[i0] -> MemRef_A[i0] }; +; INNERMOST: MustWriteAccess := [Reduction Type: +] [Scalar: 0] +; INNERMOST: [p_0] -> { Stmt_bb12[i0] -> MemRef_A[i0] }; +; INNERMOST: } +; +; ALL: Function: f +; ALL: Region: %bb3---%bb20 +; ALL: Max Loop Depth: 1 +; ALL: Context: +; ALL: { : } +; ALL: Assumed Context: +; ALL: { : } +; ALL: Alias Groups (0): +; ALL: n/a +; ALL: Statements { +; ALL: Stmt_(bb4 => bb18) +; ALL: Domain := +; ALL: { Stmt_(bb4 => bb18)[i0] : i0 >= 0 and i0 <= 1023 }; +; ALL: Scattering := +; ALL: { Stmt_(bb4 => bb18)[i0] -> [i0] }; +; ALL: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; ALL: { Stmt_(bb4 => bb18)[i0] -> MemRef_A[i0] }; +; ALL: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; ALL: { Stmt_(bb4 => bb18)[i0] -> MemRef_A[o0] : o0 <= 2199023254526 and o0 >= 0 }; +; ALL: MayWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; ALL: { Stmt_(bb4 => bb18)[i0] -> MemRef_A[o0] : o0 <= 2199023254526 and o0 >= 0 }; +; ALL: } +; +; void f(int *A, int N) { +; for (int i = 0; i < 1024; i++) +; if (A[i]) +; for (int j = 0; j < N * i; j++) +; A[j]++; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32 %N) { +bb: + %tmp = sext i32 %N to i64 + br label %bb3 + +bb3: ; preds = %bb19, %bb + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %bb19 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv1, 1024 + br i1 %exitcond, label %bb4, label %bb20 + +bb4: ; preds = %bb3 + %tmp5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1 + %tmp6 = load i32, i32* %tmp5, align 4 + %tmp7 = icmp eq i32 %tmp6, 0 + br i1 %tmp7, label %bb18, label %bb8 + +bb8: ; preds = %bb4 + br label %bb9 + +bb9: ; preds = %bb16, %bb8 + %indvars.iv = phi i64 [ %indvars.iv.next, %bb16 ], [ 0, %bb8 ] + %tmp10 = mul nsw i64 %indvars.iv1, %tmp + %tmp11 = icmp slt i64 %indvars.iv, %tmp10 + br i1 %tmp11, label %bb12, label %bb17 + +bb12: ; preds = %bb9 + %tmp13 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp14 = load i32, i32* %tmp13, align 4 + %tmp15 = add nsw i32 %tmp14, 1 + store i32 %tmp15, i32* %tmp13, align 4 + br label %bb16 + +bb16: ; preds = %bb12 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb9 + +bb17: ; preds = %bb9 + br label %bb18 + +bb18: ; preds = %bb4, %bb17 + br label %bb19 + +bb19: ; preds = %bb18 + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + br label %bb3 + +bb20: ; preds = %bb3 + ret void +} Index: test/ScopInfo/NonAffine/non_affine_loop_condition.ll =================================================================== --- /dev/null +++ test/ScopInfo/NonAffine/non_affine_loop_condition.ll @@ -0,0 +1,63 @@ +; RUN: opt %loadPolly -polly-scops -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops -analyze < %s | FileCheck %s +; +; void f(int *A, int *C) { +; for (int i = 0; i < 1024; i++) { +; while (C[i]) +; A[i]++; +; } +; } +; +; CHECK: Function: f +; CHECK: Region: %bb1---%bb12 +; CHECK: Max Loop Depth: 1 +; CHECK: Statements { +; CHECK: Stmt_(bb3 => bb10) +; CHECK: Domain := +; CHECK: { Stmt_(bb3 => bb10)[i0] : i0 >= 0 and i0 <= 1023 }; +; CHECK: Scattering := +; CHECK: { Stmt_(bb3 => bb10)[i0] -> [i0] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_(bb3 => bb10)[i0] -> MemRef_C[i0] }; +; CHECK: ReadAccess := [Reduction Type: +] [Scalar: 0] +; CHECK: { Stmt_(bb3 => bb10)[i0] -> MemRef_A[i0] }; +; CHECK: MayWriteAccess := [Reduction Type: +] [Scalar: 0] +; CHECK: { Stmt_(bb3 => bb10)[i0] -> MemRef_A[i0] }; +; CHECK: } + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32* %C) { +bb: + br label %bb1 + +bb1: ; preds = %bb11, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb11 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb2, label %bb12 + +bb2: ; preds = %bb1 + br label %bb3 + +bb3: ; preds = %bb6, %bb2 + %tmp = getelementptr inbounds i32, i32* %C, i64 %indvars.iv + %tmp4 = load i32, i32* %tmp, align 4 + %tmp5 = icmp eq i32 %tmp4, 0 + br i1 %tmp5, label %bb10, label %bb6 + +bb6: ; preds = %bb3 + %tmp7 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp8 = load i32, i32* %tmp7, align 4 + %tmp9 = add nsw i32 %tmp8, 1 + store i32 %tmp9, i32* %tmp7, align 4 + br label %bb3 + +bb10: ; preds = %bb3 + br label %bb11 + +bb11: ; preds = %bb10 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb12: ; preds = %bb1 + ret void +} Index: test/ScopInfo/NonAffine/non_affine_loop_used_later.ll =================================================================== --- /dev/null +++ test/ScopInfo/NonAffine/non_affine_loop_used_later.ll @@ -0,0 +1,137 @@ +; RUN: opt %loadPolly -polly-scops -polly-model-phi-nodes -disable-polly-intra-scop-scalar-to-array -polly-allow-nonaffine -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops -analyze < %s | FileCheck %s +; +; Verify that we over approximate the read acces of A[j] in the last statement as j is +; computed in a non-affine loop we do not model. +; +; CHECK: Function: f +; CHECK: Region: %bb2---%bb24 +; CHECK: Max Loop Depth: 1 +; CHECK: Context: +; CHECK: [N] -> { : } +; CHECK: Assumed Context: +; CHECK: [N] -> { : } +; CHECK: p0: %N +; CHECK: Alias Groups (0): +; CHECK: n/a +; CHECK: Statements { +; CHECK: Stmt_bb2 +; CHECK: Domain := +; CHECK: [N] -> { Stmt_bb2[i0] : i0 >= 0 and N >= 1 and i0 <= N; Stmt_bb2[0] : N <= 0 }; +; CHECK: Scattering := +; CHECK: [N] -> { Stmt_bb2[i0] -> [i0, 0] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK: [N] -> { Stmt_bb2[i0] -> MemRef_j_0[] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK: [N] -> { Stmt_bb2[i0] -> MemRef_j_0[] }; +; CHECK: Stmt_(bb4 => bb18) +; CHECK: Domain := +; CHECK: [N] -> { Stmt_(bb4 => bb18)[i0] : i0 >= 0 and N >= 1 and i0 <= -1 + N }; +; CHECK: Scattering := +; CHECK: [N] -> { Stmt_(bb4 => bb18)[i0] -> [i0, 1] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [N] -> { Stmt_(bb4 => bb18)[i0] -> MemRef_A[i0] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK: [N] -> { Stmt_(bb4 => bb18)[i0] -> MemRef_j_0[] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK: [N] -> { Stmt_(bb4 => bb18)[i0] -> MemRef_j_2[] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [N] -> { Stmt_(bb4 => bb18)[i0] -> MemRef_A[i0] }; +; CHECK: MayWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [N] -> { Stmt_(bb4 => bb18)[i0] -> MemRef_A[i0] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK: [N] -> { Stmt_(bb4 => bb18)[i0] -> MemRef_smax[] }; +; CHECK: MayWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK: [N] -> { Stmt_(bb4 => bb18)[i0] -> MemRef_j_2[] }; +; CHECK: Stmt_bb18 +; CHECK: Domain := +; CHECK: [N] -> { Stmt_bb18[i0] : i0 >= 0 and N >= 1 and i0 <= -1 + N }; +; CHECK: Scattering := +; CHECK: [N] -> { Stmt_bb18[i0] -> [i0, 2] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK: [N] -> { Stmt_bb18[i0] -> MemRef_j_2[] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK: [N] -> { Stmt_bb18[i0] -> MemRef_j_2[] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [N] -> { Stmt_bb18[i0] -> MemRef_A[o0] : o0 >= -2147483648 and o0 <= 2147483645 }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [N] -> { Stmt_bb18[i0] -> MemRef_A[i0] }; +; CHECK: Stmt_bb23 +; CHECK: Domain := +; CHECK: [N] -> { Stmt_bb23[i0] : i0 >= 0 and N >= 1 and i0 <= -1 + N }; +; CHECK: Scattering := +; CHECK: [N] -> { Stmt_bb23[i0] -> [i0, 3] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK: [N] -> { Stmt_bb23[i0] -> MemRef_j_2[] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK: [N] -> { Stmt_bb23[i0] -> MemRef_j_0[] }; +; CHECK: } +; +; void f(int *A, int N, int M) { +; int i = 0, j = 0; +; for (i = 0; i < N; i++) { +; if (A[i]) +; for (j = 0; j < M; j++) +; A[i]++; +; A[i] = A[j]; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32 %N, i32 %M) { +bb: + %tmp = icmp sgt i32 %M, 0 + %smax = select i1 %tmp, i32 %M, i32 0 + %tmp1 = sext i32 %N to i64 + br label %bb2 + +bb2: ; preds = %bb23, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb23 ], [ 0, %bb ] + %j.0 = phi i32 [ 0, %bb ], [ %j.2, %bb23 ] + %tmp3 = icmp slt i64 %indvars.iv, %tmp1 + br i1 %tmp3, label %bb4, label %bb24 + +bb4: ; preds = %bb2 + %tmp5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp6 = load i32, i32* %tmp5, align 4 + %tmp7 = icmp eq i32 %tmp6, 0 + br i1 %tmp7, label %bb18, label %bb8 + +bb8: ; preds = %bb4 + br label %bb9 + +bb9: ; preds = %bb15, %bb8 + %j.1 = phi i32 [ 0, %bb8 ], [ %tmp16, %bb15 ] + %tmp10 = icmp slt i32 %j.1, %M + br i1 %tmp10, label %bb11, label %bb17 + +bb11: ; preds = %bb9 + %tmp12 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp13 = load i32, i32* %tmp12, align 4 + %tmp14 = add nsw i32 %tmp13, 1 + store i32 %tmp14, i32* %tmp12, align 4 + br label %bb15 + +bb15: ; preds = %bb11 + %tmp16 = add nuw nsw i32 %j.1, 1 + br label %bb9 + +bb17: ; preds = %bb9 + br label %bb18 + +bb18: ; preds = %bb4, %bb17 + %j.2 = phi i32 [ %smax, %bb17 ], [ %j.0, %bb4 ] + %tmp19 = sext i32 %j.2 to i64 + %tmp20 = getelementptr inbounds i32, i32* %A, i64 %tmp19 + %tmp21 = load i32, i32* %tmp20, align 4 + %tmp22 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %tmp21, i32* %tmp22, align 4 + br label %bb23 + +bb23: ; preds = %bb18 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb2 + +bb24: ; preds = %bb2 + ret void +} Index: test/ScopInfo/non_affine_parametric_loop.ll =================================================================== --- /dev/null +++ test/ScopInfo/non_affine_parametric_loop.ll @@ -1,37 +0,0 @@ -; RUN: opt %loadPolly -polly-detect-unprofitable -basicaa -polly-scops -analyze -polly-allow-nonaffine < %s | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; void foo(long n, double A[], int INDEX[]) { -; for (long i = 0; i < n; i++) -; A[INDEX[i]] = i; -; } - -define void @foo(i64 %n, double* noalias %A, i64* noalias %INDEX) { -entry: - br label %for.body - -for.body: - %i = phi i64 [ %inc, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i64, i64* %INDEX, i64 %i - %val = load i64, i64* %arrayidx - %arrayidx1 = getelementptr inbounds double, double* %A, i64 %val - store double 1.0, double* %arrayidx1 - %inc = add nsw i64 %i, 1 - %exitcond = icmp eq i64 %inc, %n - br i1 %exitcond, label %for.end, label %for.body - -for.end: - ret void -} - -; CHECK: p0: %n - -; CHECK: Domain -; CHECK: [n] -> { Stmt_for_body[i0] : i0 >= 0 and i0 <= -1 + n }; -; CHECK: Scattering -; CHECK: [n] -> { Stmt_for_body[i0] -> [i0] }; -; CHECK: ReadAccess -; CHECK: [n] -> { Stmt_for_body[i0] -> MemRef_INDEX[i0] }; -; CHECK: WriteAccess -; CHECK: [n] -> { Stmt_for_body[i0] -> MemRef_A[o0] : o0 >= -1152921504606846976 and o0 <= 1152921504606846973 };