Index: include/polly/ScopDetection.h =================================================================== --- include/polly/ScopDetection.h +++ include/polly/ScopDetection.h @@ -123,6 +123,9 @@ public: typedef SetVector RegionSet; + /// @brief Set of loops (used to remember loops in non-affine subregions). + using BoxedLoopsSetTy = SetVector; + private: //===--------------------------------------------------------------------===// ScopDetection(const ScopDetection &) = delete; @@ -142,6 +145,10 @@ DenseMap; NonAffineSubRegionMapTy NonAffineSubRegionMap; + /// @brief Map to remeber loops in non-affine regions. + using BoxedLoopsMapTy = DenseMap; + BoxedLoopsMapTy BoxedLoopsMap; + /// @brief Context variables for SCoP detection. struct DetectionContext { Region &CurRegion; // The region to check. @@ -168,13 +175,21 @@ /// @brief The region has at least one store instruction. bool hasStores; + /// @brief The region has at least one loop that is not overapproximated. + bool hasAffineLoops; + /// @brief The set of non-affine subregions in the region we analyze. NonAffineSubRegionSetTy &NonAffineSubRegionSet; + /// @brief The sef of loops contained in non-affine regions. + BoxedLoopsSetTy &BoxedLoopsSet; + DetectionContext(Region &R, AliasAnalysis &AA, - NonAffineSubRegionSetTy &NABS, bool Verify) + NonAffineSubRegionSetTy &NASRS, BoxedLoopsSetTy &BLS, + bool Verify) : CurRegion(R), AST(AA), Verifying(Verify), Log(&R), hasLoads(false), - hasStores(false), NonAffineSubRegionSet(NABS) {} + hasStores(false), hasAffineLoops(false), NonAffineSubRegionSet(NASRS), + BoxedLoopsSet(BLS) {} }; // Remember the valid regions @@ -183,6 +198,14 @@ // Remember a list of errors for every region. mutable RejectLogsContainer RejectLogs; + /// @brief Add the region @p AR as over approximated sub-region in @p Context. + /// + /// @param AR The non-affine subregion. + /// @param Context The current detection context. + /// + /// @returns True if the subregion can be over approximated, false otherwise. + bool addOverApproximatedRegion(Region *AR, DetectionContext &Context) const; + // Delinearize all non affine memory accesses and return false when there // exists a non affine memory access that cannot be delinearized. Return true // when all array accesses are affine after delinearization. @@ -310,6 +333,9 @@ /// @return Return true if R is the maximum Region in a Scop, false otherwise. bool isMaxRegionInScop(const Region &R, bool Verify = true) const; + /// @brief Return the set of loops in non-affine subregions for @p R. + const BoxedLoopsSetTy *getBoxedLoops(const Region *R) const; + /// @brief Return true if @p SubR is a non-affine subregion in @p ScopR. bool isNonAffineSubRegion(const Region *SubR, const Region *ScopR) const; Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -131,6 +131,12 @@ cl::desc("Allow non affine conditions for branches"), cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); +static cl::opt + AllowNonAffineSubLoops("polly-allow-nonaffine-loops", + cl::desc("Allow non affine conditions for loops"), + cl::Hidden, cl::init(false), cl::ZeroOrMore, + cl::cat(PollyCategory)); + static cl::opt AllowUnsigned("polly-allow-unsigned", cl::desc("Allow unsigned expressions"), cl::Hidden, cl::init(false), cl::ZeroOrMore, @@ -257,9 +263,11 @@ return false; if (Verify) { + BoxedLoopsSetTy DummyBoxedLoopsSet; NonAffineSubRegionSetTy DummyNonAffineSubRegionSet; DetectionContext Context(const_cast(R), *AA, - DummyNonAffineSubRegionSet, false /*verifying*/); + DummyNonAffineSubRegionSet, DummyBoxedLoopsSet, + false /*verifying*/); return isValidRegion(Context); } @@ -283,13 +291,22 @@ return RR->getMessage(); } -static bool containsLoop(Region *R, LoopInfo *LI) { - for (BasicBlock *BB : R->blocks()) { +bool ScopDetection::addOverApproximatedRegion(Region *AR, + DetectionContext &Context) const { + + // If we already know about Ar we can exit. + if (!Context.NonAffineSubRegionSet.insert(AR)) + return true; + + // All loops in the region have to be overapproximated too if there + // are accesses that depend on the iteration count. + for (BasicBlock *BB : AR->blocks()) { Loop *L = LI->getLoopFor(BB); - if (R->contains(L)) - return true; + if (AR->contains(L)) + Context.BoxedLoopsSet.insert(L); } - return false; + + return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); } bool ScopDetection::isValidCFG(BasicBlock &BB, @@ -318,9 +335,8 @@ // Only Constant and ICmpInst are allowed as condition. if (!(isa(Condition) || isa(Condition))) { - if (AllowNonAffineSubRegions && !containsLoop(RI->getRegionFor(&BB), LI)) - Context.NonAffineSubRegionSet.insert(RI->getRegionFor(&BB)); - else + if (!AllowNonAffineSubRegions || + !addOverApproximatedRegion(RI->getRegionFor(&BB), Context)) return invalid(Context, /*Assert=*/true, Br, &BB); } @@ -347,9 +363,8 @@ if (!isAffineExpr(&CurRegion, LHS, *SE) || !isAffineExpr(&CurRegion, RHS, *SE)) { - if (AllowNonAffineSubRegions && !containsLoop(RI->getRegionFor(&BB), LI)) - Context.NonAffineSubRegionSet.insert(RI->getRegionFor(&BB)); - else + if (!AllowNonAffineSubRegions || + !addOverApproximatedRegion(RI->getRegionFor(&BB), Context)) return invalid(Context, /*Assert=*/true, &BB, LHS, RHS, ICmp); } @@ -579,13 +594,21 @@ Context.ElementSize[BasePointer] = Size; } - if (PollyDelinearize) { + bool isVariantInNonAffineLoop = false; + SetVector Loops; + findLoops(AccessFunction, Loops); + for (const Loop *L : Loops) + if (Context.BoxedLoopsSet.count(L)) + isVariantInNonAffineLoop = true; + + if (PollyDelinearize && !isVariantInNonAffineLoop) { Context.Accesses[BasePointer].push_back({&Inst, AccessFunction}); if (!isAffineExpr(&CurRegion, AccessFunction, *SE, BaseValue)) Context.NonAffineAccesses.insert(BasePointer); } else if (!AllowNonAffine) { - if (!isAffineExpr(&CurRegion, AccessFunction, *SE, BaseValue)) + if (isVariantInNonAffineLoop || + !isAffineExpr(&CurRegion, AccessFunction, *SE, BaseValue)) return invalid(Context, /*Assert=*/true, AccessFunction, &Inst, BaseValue); } @@ -672,8 +695,17 @@ bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const { // Is the loop count affine? const SCEV *LoopCount = SE->getBackedgeTakenCount(L); - if (isAffineExpr(&Context.CurRegion, LoopCount, *SE)) + if (isAffineExpr(&Context.CurRegion, LoopCount, *SE)) { + Context.hasAffineLoops = true; return true; + } + + if (AllowNonAffineSubRegions) { + Region *R = RI->getRegionFor(L->getHeader()); + if (R->contains(L)) + if (addOverApproximatedRegion(R, Context)) + return true; + } return invalid(Context, /*Assert=*/true, L, LoopCount); } @@ -686,9 +718,9 @@ DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); while (ExpandedRegion) { - DetectionContext Context(*ExpandedRegion, *AA, - NonAffineSubRegionMap[ExpandedRegion], - false /* verifying */); + DetectionContext Context( + *ExpandedRegion, *AA, NonAffineSubRegionMap[ExpandedRegion], + BoxedLoopsMap[ExpandedRegion], false /* verifying */); DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); // Only expand when we did not collect errors. @@ -761,7 +793,7 @@ } void ScopDetection::findScops(Region &R) { - DetectionContext Context(R, *AA, NonAffineSubRegionMap[&R], + DetectionContext Context(R, *AA, NonAffineSubRegionMap[&R], BoxedLoopsMap[&R], false /*verifying*/); bool RegionIsValid = false; @@ -910,6 +942,11 @@ if (!DetectUnprofitable && (!Context.hasStores || !Context.hasLoads)) invalid(Context, /*Assert=*/true, &CurRegion); + // Check if there was at least one non-overapproximated loop in the region or + // we allow regions without loops. + if (!DetectRegionsWithoutLoops && !Context.hasAffineLoops) + invalid(Context, /*Assert=*/true, &CurRegion); + DEBUG(dbgs() << "OK\n"); return true; } @@ -1000,11 +1037,22 @@ return NonAffineSubRegionMap.lookup(ScopR).count(SubR); } +const ScopDetection::BoxedLoopsSetTy * +ScopDetection::getBoxedLoops(const Region *R) const { + auto BLMIt = BoxedLoopsMap.find(R); + if (BLMIt == BoxedLoopsMap.end()) + return nullptr; + return &BLMIt->second; +} + void polly::ScopDetection::verifyRegion(const Region &R) const { assert(isMaxRegionInScop(R) && "Expect R is a valid region."); + + BoxedLoopsSetTy DummyBoxedLoopsSet; NonAffineSubRegionSetTy DummyNonAffineSubRegionSet; DetectionContext Context(const_cast(R), *AA, - DummyNonAffineSubRegionSet, true /*verifying*/); + DummyNonAffineSubRegionSet, DummyBoxedLoopsSet, + true /*verifying*/); isValidRegion(Context); } Index: test/ScopDetect/non-affine-loop-condition-dependent-access.ll =================================================================== --- /dev/null +++ test/ScopDetect/non-affine-loop-condition-dependent-access.ll @@ -0,0 +1,57 @@ +; RUN: opt %loadPolly -basicaa -polly-detect -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=false -analyze < %s | FileCheck %s --check-prefix=REJECTNONAFFINELOOPS +; RUN: opt %loadPolly -basicaa -polly-detect -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=true -analyze < %s | FileCheck %s --check-prefix=ALLOWNONAFFINELOOPS +; RUN: opt %loadPolly -basicaa -polly-detect -polly-allow-nonaffine -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=true -analyze < %s | FileCheck %s --check-prefix=ALLOWNONAFFINELOOPSANDACCESSES +; +; Here we have a non-affine loop but also a non-affine access which should +; be rejected as long as -polly-allow-nonaffine isn't given. +; +; REJECTNONAFFINELOOPS-NOT: Valid +; ALLOWNONAFFINELOOPS-NOT: Valid +; ALLOWNONAFFINELOOPSANDACCESSES: Valid Region for Scop: bb1 => bb13 +; +; void f(int * restrict A, int * restrict C) { +; int j; +; for (int i = 0; i < 1024; i++) { +; while ((j = C[i])) +; A[j]++; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* noalias %A, i32* noalias %C) { +bb: + br label %bb1 + +bb1: ; preds = %bb12, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb12 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb2, label %bb13 + +bb2: ; preds = %bb1 + br label %bb3 + +bb3: ; preds = %bb6, %bb2 + %tmp = getelementptr inbounds i32, i32* %C, i64 %indvars.iv + %tmp4 = load i32, i32* %tmp, align 4 + %tmp5 = icmp eq i32 %tmp4, 0 + br i1 %tmp5, label %bb11, label %bb6 + +bb6: ; preds = %bb3 + %tmp7 = sext i32 %tmp4 to i64 + %tmp8 = getelementptr inbounds i32, i32* %A, i64 %tmp7 + %tmp9 = load i32, i32* %tmp8, align 4 + %tmp10 = add nsw i32 %tmp9, 1 + store i32 %tmp10, i32* %tmp8, align 4 + br label %bb3 + +bb11: ; preds = %bb3 + br label %bb12 + +bb12: ; preds = %bb11 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb13: ; preds = %bb1 + ret void +} Index: test/ScopDetect/non-affine-loop-condition-dependent-access_2.ll =================================================================== --- /dev/null +++ test/ScopDetect/non-affine-loop-condition-dependent-access_2.ll @@ -0,0 +1,86 @@ +; RUN: opt %loadPolly -basicaa -polly-detect -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=false -analyze < %s | FileCheck %s --check-prefix=REJECTNONAFFINELOOPS +; RUN: opt %loadPolly -basicaa -polly-detect -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=true -analyze < %s | FileCheck %s --check-prefix=ALLOWNONAFFINELOOPS +; RUN: opt %loadPolly -basicaa -polly-detect -polly-allow-nonaffine -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=true -analyze < %s | FileCheck %s --check-prefix=ALLOWNONAFFINELOOPSANDACCESSES +; +; Here we have a non-affine loop (in the context of the loop nest) +; and also a non-affine access (A[k]). While we can always detect the +; innermost loop as a SCoP of depth 1, we have to reject the loop nest if not +; both, non-affine loops as well as non-affine accesses are allowed. +; +; REJECTNONAFFINELOOPS: Valid Region for Scop: bb15 => bb26 +; REJECTNONAFFINELOOPS-NOT: Valid +; ALLOWNONAFFINELOOPS: Valid Region for Scop: bb15 => bb26 +; ALLOWNONAFFINELOOPS-NOT: Valid +; ALLOWNONAFFINELOOPSANDACCESSES: Valid Region for Scop: bb11 => bb29 +; +; void f(int *A) { +; for (int i = 0; i < 1024; i++) +; for (int j = 0; j < 1024; j++) +; for (int k = i *j; k < 1024; k++) +; A[k] += A[i] + A[j]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +bb: + br label %bb11 + +bb11: ; preds = %bb28, %bb + %indvars.iv8 = phi i64 [ %indvars.iv.next9, %bb28 ], [ 0, %bb ] + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %bb28 ], [ 0, %bb ] + %exitcond10 = icmp ne i64 %indvars.iv8, 1024 + br i1 %exitcond10, label %bb12, label %bb29 + +bb12: ; preds = %bb11 + br label %bb13 + +bb13: ; preds = %bb26, %bb12 + %indvars.iv5 = phi i64 [ %indvars.iv.next6, %bb26 ], [ 0, %bb12 ] + %indvars.iv3 = phi i64 [ %indvars.iv.next4, %bb26 ], [ 0, %bb12 ] + %exitcond7 = icmp ne i64 %indvars.iv5, 1024 + br i1 %exitcond7, label %bb14, label %bb27 + +bb14: ; preds = %bb13 + br label %bb15 + +bb15: ; preds = %bb24, %bb14 + %indvars.iv = phi i64 [ %indvars.iv.next, %bb24 ], [ %indvars.iv3, %bb14 ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb16, label %bb25 + +bb16: ; preds = %bb15 + %tmp = getelementptr inbounds i32, i32* %A, i64 %indvars.iv8 + %tmp17 = load i32, i32* %tmp, align 4 + %tmp18 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv5 + %tmp19 = load i32, i32* %tmp18, align 4 + %tmp20 = add nsw i32 %tmp17, %tmp19 + %tmp21 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp22 = load i32, i32* %tmp21, align 4 + %tmp23 = add nsw i32 %tmp22, %tmp20 + store i32 %tmp23, i32* %tmp21, align 4 + br label %bb24 + +bb24: ; preds = %bb16 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb15 + +bb25: ; preds = %bb15 + br label %bb26 + +bb26: ; preds = %bb25 + %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1 + %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, %indvars.iv1 + br label %bb13 + +bb27: ; preds = %bb13 + br label %bb28 + +bb28: ; preds = %bb27 + %indvars.iv.next9 = add nuw nsw i64 %indvars.iv8, 1 + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + br label %bb11 + +bb29: ; preds = %bb11 + ret void +} Index: test/ScopDetect/non-affine-loop-condition-dependent-access_3.ll =================================================================== --- /dev/null +++ test/ScopDetect/non-affine-loop-condition-dependent-access_3.ll @@ -0,0 +1,87 @@ +; RUN: opt %loadPolly -basicaa -polly-detect -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=false -analyze < %s | FileCheck %s --check-prefix=REJECTNONAFFINELOOPS +; RUN: opt %loadPolly -basicaa -polly-detect -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=true -analyze < %s | FileCheck %s --check-prefix=ALLOWNONAFFINELOOPS +; RUN: opt %loadPolly -basicaa -polly-detect -polly-allow-nonaffine -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=true -analyze < %s | FileCheck %s --check-prefix=ALLOWNONAFFINELOOPSANDACCESSES +; +; Here we have a non-affine loop (in the context of the loop nest) +; and also a non-affine access (A[k]). While we can always detect the +; innermost loop as a SCoP of depth 1, we have to reject the loop nest if not +; both, non-affine loops as well as non-affine accesses are allowed. +; +; REJECTNONAFFINELOOPS: Valid Region for Scop: bb15 => bb26 +; REJECTNONAFFINELOOPS-NOT: Valid +; ALLOWNONAFFINELOOPS: Valid Region for Scop: bb15 => bb26 +; ALLOWNONAFFINELOOPS-NOT: Valid +; ALLOWNONAFFINELOOPSANDACCESSES: Valid Region for Scop: bb11 => bb29 +; +; void f(int *A) { +; for (int i = 0; i < 1024; i++) +; for (int j = 0; j < 1024; j++) +; for (int k = 0; k < i * j; k++) +; A[k] += A[i] + A[j]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +bb: + br label %bb11 + +bb11: ; preds = %bb28, %bb + %indvars.iv8 = phi i64 [ %indvars.iv.next9, %bb28 ], [ 0, %bb ] + %indvars.iv1 = phi i32 [ %indvars.iv.next2, %bb28 ], [ 0, %bb ] + %exitcond10 = icmp ne i64 %indvars.iv8, 1024 + br i1 %exitcond10, label %bb12, label %bb29 + +bb12: ; preds = %bb11 + br label %bb13 + +bb13: ; preds = %bb26, %bb12 + %indvars.iv5 = phi i64 [ %indvars.iv.next6, %bb26 ], [ 0, %bb12 ] + %indvars.iv3 = phi i32 [ %indvars.iv.next4, %bb26 ], [ 0, %bb12 ] + %exitcond7 = icmp ne i64 %indvars.iv5, 1024 + br i1 %exitcond7, label %bb14, label %bb27 + +bb14: ; preds = %bb13 + br label %bb15 + +bb15: ; preds = %bb24, %bb14 + %indvars.iv = phi i64 [ %indvars.iv.next, %bb24 ], [ 0, %bb14 ] + %lftr.wideiv = trunc i64 %indvars.iv to i32 + %exitcond = icmp ne i32 %lftr.wideiv, %indvars.iv3 + br i1 %exitcond, label %bb16, label %bb25 + +bb16: ; preds = %bb15 + %tmp = getelementptr inbounds i32, i32* %A, i64 %indvars.iv8 + %tmp17 = load i32, i32* %tmp, align 4 + %tmp18 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv5 + %tmp19 = load i32, i32* %tmp18, align 4 + %tmp20 = add nsw i32 %tmp17, %tmp19 + %tmp21 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp22 = load i32, i32* %tmp21, align 4 + %tmp23 = add nsw i32 %tmp22, %tmp20 + store i32 %tmp23, i32* %tmp21, align 4 + br label %bb24 + +bb24: ; preds = %bb16 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb15 + +bb25: ; preds = %bb15 + br label %bb26 + +bb26: ; preds = %bb25 + %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1 + %indvars.iv.next4 = add nuw nsw i32 %indvars.iv3, %indvars.iv1 + br label %bb13 + +bb27: ; preds = %bb13 + br label %bb28 + +bb28: ; preds = %bb27 + %indvars.iv.next9 = add nuw nsw i64 %indvars.iv8, 1 + %indvars.iv.next2 = add nuw nsw i32 %indvars.iv1, 1 + br label %bb11 + +bb29: ; preds = %bb11 + ret void +} Index: test/ScopDetect/non-affine-loop.ll =================================================================== --- /dev/null +++ test/ScopDetect/non-affine-loop.ll @@ -0,0 +1,48 @@ +; RUN: opt %loadPolly -polly-detect -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=false -analyze < %s | FileCheck %s --check-prefix=REJECTNONAFFINELOOPS +; RUN: opt %loadPolly -polly-detect -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=true -analyze < %s | FileCheck %s --check-prefix=ALLOWNONAFFINELOOPS +; RUN: opt %loadPolly -polly-detect -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=true -polly-allow-nonaffine -analyze < %s | FileCheck %s --check-prefix=ALLOWNONAFFINELOOPSANDACCESSES +; RUN: opt %loadPolly -polly-detect -polly-allow-nonaffine-branches -polly-allow-nonaffine-loops=true -polly-allow-nonaffine -polly-detect-scops-in-regions-without-loops -analyze < %s | FileCheck %s --check-prefix=ALLOWNONAFFINELOOPSANDACCESSESANDNOLOOPS +; +; This function/region does contain a loop, however it is non-affine, hence the access +; A[i] is also. Furthermore, it is the only loop, thus when we over approximate +; non-affine loops __and__ accesses __and__ allow regins without a (affine) loop we will +; detect it, otherwise we won't. +; +; void f(int *A) { +; for (int i = 0; i < A[i]; i++) +; A[-1]++; +; } +; +; REJECTNONAFFINELOOPS-NOT: Valid +; ALLOWNONAFFINELOOPS-NOT: Valid +; ALLOWNONAFFINELOOPSANDACCESSES-NOT: Valid +; ALLOWNONAFFINELOOPSANDACCESSESANDNOLOOPS: Valid Region for Scop: bb1 => bb10 +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +bb: + br label %bb1 + +bb1: ; preds = %bb9, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb9 ], [ 0, %bb ] + %tmp = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp2 = load i32, i32* %tmp, align 4 + %tmp3 = sext i32 %tmp2 to i64 + %tmp4 = icmp slt i64 %indvars.iv, %tmp3 + br i1 %tmp4, label %bb5, label %bb10 + +bb5: ; preds = %bb1 + %tmp6 = getelementptr inbounds i32, i32* %A, i64 -1 + %tmp7 = load i32, i32* %tmp6, align 4 + %tmp8 = add nsw i32 %tmp7, 1 + store i32 %tmp8, i32* %tmp6, align 4 + br label %bb9 + +bb9: ; preds = %bb5 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb10: ; preds = %bb1 + ret void +} Index: test/ScopDetect/non_affine_loop_condition.ll =================================================================== --- /dev/null +++ test/ScopDetect/non_affine_loop_condition.ll @@ -0,0 +1,47 @@ +; RUN: opt %loadPolly -polly-detect -polly-allow-nonaffine-loops -analyze < %s | FileCheck %s +; +; void f(int *A) { +; for (int i = 0; i < 1024; i++) { +; while (A[i]) +; A[i]--; +; } +; } +; +; CHECK: Valid Region for Scop: bb1 => bb12 +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +bb: + br label %bb1 + +bb1: ; preds = %bb11, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb11 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb2, label %bb12 + +bb2: ; preds = %bb1 + br label %bb3 + +bb3: ; preds = %bb6, %bb2 + %tmp = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp4 = load i32, i32* %tmp, align 4 + %tmp5 = icmp eq i32 %tmp4, 0 + br i1 %tmp5, label %bb10, label %bb6 + +bb6: ; preds = %bb3 + %tmp7 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp8 = load i32, i32* %tmp7, align 4 + %tmp9 = add nsw i32 %tmp8, -1 + store i32 %tmp9, i32* %tmp7, align 4 + br label %bb3 + +bb10: ; preds = %bb3 + br label %bb11 + +bb11: ; preds = %bb10 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb12: ; preds = %bb1 + ret void +} Index: test/ScopDetectionDiagnostics/ReportLoopBound-01.ll =================================================================== --- test/ScopDetectionDiagnostics/ReportLoopBound-01.ll +++ test/ScopDetectionDiagnostics/ReportLoopBound-01.ll @@ -1,13 +1,29 @@ -; RUN: opt %loadPolly -polly-detect-unprofitable -pass-remarks-missed="polly-detect" -polly-detect-track-failures -polly-detect -analyze < %s 2>&1| FileCheck %s +; RUN: opt %loadPolly -polly-detect-unprofitable -pass-remarks-missed="polly-detect" -polly-detect-track-failures -polly-allow-nonaffine-loops=false -polly-detect -analyze < %s 2>&1| FileCheck %s --check-prefix=REJECTNONAFFINELOOPS +; RUN: opt %loadPolly -polly-detect-unprofitable -pass-remarks-missed="polly-detect" -polly-detect-track-failures -polly-allow-nonaffine-loops=true -polly-detect -analyze < %s 2>&1| FileCheck %s --check-prefix=ALLOWNONAFFINELOOPS +; RUN: opt %loadPolly -polly-detect-unprofitable -pass-remarks-missed="polly-detect" -polly-detect-track-failures -polly-allow-nonaffine-loops=true -polly-allow-nonaffine -polly-detect -analyze < %s 2>&1| FileCheck %s --check-prefix=ALLOWNONAFFINEALL ; void f(int A[], int n) { ; for (int i = 0; i < A[n]; i++) ; A[i] = 0; ; } -; CHECK: remark: ReportLoopBound-01.c:2:8: The following errors keep this region from being a Scop. -; CHECK: remark: ReportLoopBound-01.c:2:8: Failed to derive an affine function from the loop bounds. -; CHECK: remark: ReportLoopBound-01.c:3:5: Invalid Scop candidate ends here. +; If we reject non-affine loops the non-affine loop bound will be reported: +; +; REJECTNONAFFINELOOPS: remark: ReportLoopBound-01.c:2:8: The following errors keep this region from being a Scop. +; REJECTNONAFFINELOOPS: remark: ReportLoopBound-01.c:2:8: Failed to derive an affine function from the loop bounds. +; REJECTNONAFFINELOOPS: remark: ReportLoopBound-01.c:3:5: Invalid Scop candidate ends here. + +; If we allow non-affine loops the non-affine access will be reported: +; +; ALLOWNONAFFINELOOPS: remark: ReportLoopBound-01.c:2:8: The following errors keep this region from being a Scop. +; ALLOWNONAFFINELOOPS: remark: ReportLoopBound-01.c:3:5: The array subscript of "A" is not affine +; ALLOWNONAFFINELOOPS: remark: ReportLoopBound-01.c:3:5: Invalid Scop candidate ends here. + +; If we allow non-affine loops and non-affine accesses the region will be reported as not profitable: +; +; ALLOWNONAFFINEALL: remark: ReportLoopBound-01.c:2:8: The following errors keep this region from being a Scop. +; ALLOWNONAFFINEALL: remark: ReportLoopBound-01.c:3:5: The regions does not seem to be amendable to profitable polyhedral optimization +; ALLOWNONAFFINEALL: remark: ReportLoopBound-01.c:3:5: Invalid Scop candidate ends here. target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu"