Index: include/polly/ScopDetection.h =================================================================== --- include/polly/ScopDetection.h +++ include/polly/ScopDetection.h @@ -136,6 +136,11 @@ AliasAnalysis *AA; //@} + /// @brief Set to remeber non-affine branches in regions. + using NonAffineBranchSetTy = + DenseSet>; + NonAffineBranchSetTy NonAffineBranchSet; + /// @brief Context variables for SCoP detection. struct DetectionContext { Region &CurRegion; // The region to check. @@ -162,9 +167,15 @@ /// @brief The region has at least one store instruction. bool hasStores; + /// @brief The region has at least one loop that is not overapproximated. + bool hasLoops; + + /// @brief The set of non-affine branches in the region. + NonAffineBranchSetTy NonAffineBranchSet; + DetectionContext(Region &R, AliasAnalysis &AA, bool Verify) : CurRegion(R), AST(AA), Verifying(Verify), Log(&R), hasLoads(false), - hasStores(false) {} + hasStores(false), hasLoops(false) {} }; // Remember the valid regions @@ -300,6 +311,9 @@ /// @return Return true if R is the maximum Region in a Scop, false otherwise. bool isMaxRegionInScop(const Region &R, bool Verify = true) const; + /// @brief Return true if @p BB has a non-affine branch in @p R. + bool hasNonAffineBranch(const Region *R, const BasicBlock *BB) const; + /// @brief Get a message why a region is invalid /// /// @param R The region for which we get the error message Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -126,6 +126,12 @@ cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); +static cl::opt + AllowNonAffineBranches("polly-allow-nonaffine-branches", + cl::desc("Allow non affine conditions for branches"), + cl::Hidden, cl::init(false), cl::ZeroOrMore, + cl::cat(PollyCategory)); + static cl::opt AllowUnsigned("polly-allow-unsigned", cl::desc("Allow unsigned expressions"), cl::Hidden, cl::init(false), cl::ZeroOrMore, @@ -301,8 +307,12 @@ return invalid(Context, /*Assert=*/true, Br, &BB); // Only Constant and ICmpInst are allowed as condition. - if (!(isa(Condition) || isa(Condition))) - return invalid(Context, /*Assert=*/true, Br, &BB); + if (!(isa(Condition) || isa(Condition))) { + if (AllowNonAffineBranches) + Context.NonAffineBranchSet.insert(std::make_pair(&CurRegion, &BB)); + else + return invalid(Context, /*Assert=*/true, Br, &BB); + } // Allow perfectly nested conditions. assert(Br->getNumSuccessors() == 2 && "Unexpected number of successors"); @@ -326,9 +336,13 @@ const SCEV *RHS = SE->getSCEVAtScope(ICmp->getOperand(1), L); if (!isAffineExpr(&CurRegion, LHS, *SE) || - !isAffineExpr(&CurRegion, RHS, *SE)) - return invalid(Context, /*Assert=*/true, &BB, LHS, - RHS, ICmp); + !isAffineExpr(&CurRegion, RHS, *SE)) { + if (AllowNonAffineBranches) + Context.NonAffineBranchSet.insert(std::make_pair(&CurRegion, &BB)); + else + return invalid(Context, /*Assert=*/true, &BB, LHS, + RHS, ICmp); + } } // Allow loop exit conditions. @@ -648,10 +662,23 @@ bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const { // Is the loop count affine? const SCEV *LoopCount = SE->getBackedgeTakenCount(L); - if (!isAffineExpr(&Context.CurRegion, LoopCount, *SE)) - return invalid(Context, /*Assert=*/true, L, LoopCount); + if (isAffineExpr(&Context.CurRegion, LoopCount, *SE)) { + Context.hasLoops = true; + return true; + } - return true; + if (AllowNonAffineBranches) { + // Check if we can build a proper region for this loop. + BasicBlock *Header = L->getHeader(); + Region *R = RI->getRegionFor(Header); + if (R->getEntry() == Header && R->contains(L)) { + Context.NonAffineBranchSet.insert( + std::make_pair(&Context.CurRegion, Header)); + return true; + } + } + + return invalid(Context, /*Assert=*/true, L, LoopCount); } Region *ScopDetection::expandRegion(Region &R) { @@ -685,6 +712,10 @@ // far). LastValidRegion = ExpandedRegion; + // Remember the non-affine branches in this region. + NonAffineBranchSet.insert(Context.NonAffineBranchSet.begin(), + Context.NonAffineBranchSet.end()); + // Create and test the next greater region (if any) ExpandedRegion = ExpandedRegion->getExpandedRegion(); @@ -748,6 +779,10 @@ if (!HasErrors) { ++ValidRegion; ValidRegions.insert(&R); + + // Remember the non-affine branches in this region. + NonAffineBranchSet.insert(Context.NonAffineBranchSet.begin(), + Context.NonAffineBranchSet.end()); return; } @@ -880,6 +915,11 @@ if (!DetectUnprofitable && (!Context.hasStores || !Context.hasLoads)) invalid(Context, /*Assert=*/true); + // Check if there was at least one non-overapproximated loop in the region or + // we allow regions without loops. + if (!DetectRegionsWithoutLoops && !Context.hasLoops) + return false; + DEBUG(dbgs() << "OK\n"); return true; } @@ -965,6 +1005,11 @@ return false; } +bool ScopDetection::hasNonAffineBranch(const Region *R, + const BasicBlock *BB) const { + return NonAffineBranchSet.count(std::make_pair(R, BB)); +} + void polly::ScopDetection::verifyRegion(const Region &R) const { assert(isMaxRegionInScop(R) && "Expect R is a valid region."); DetectionContext Context(const_cast(R), *AA, true /*verifying*/); @@ -1000,6 +1045,7 @@ void ScopDetection::releaseMemory() { ValidRegions.clear(); RejectLogs.clear(); + NonAffineBranchSet.clear(); // Do not clear the invalid function set. } Index: test/ScopDetect/no-affine-loop.ll =================================================================== --- /dev/null +++ test/ScopDetect/no-affine-loop.ll @@ -0,0 +1,44 @@ +; RUN: opt %loadPolly -polly-detect -polly-allow-nonaffine-branches -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-detect -polly-allow-nonaffine-branches -analyze -polly-detect-scops-in-regions-without-loops < %s | FileCheck %s --check-prefix=NoLoop +; +; This function/region does contain a loop, however it is non-affine and will +; be overapproximated. We will therefor only detect this region if we allow +; regions without loops. +; +; void f(int *A) { +; for (int i = 0; i < A[i]; i++) +; A[i]++; +; } +; +; CHECK-NOT: Valid +; NoLoop: Valid Region for Scop: bb1 => bb10 +; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +bb: + br label %bb1 + +bb1: ; preds = %bb9, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb9 ], [ 0, %bb ] + %tmp = getelementptr inbounds i32* %A, i64 %indvars.iv + %tmp2 = load i32* %tmp, align 4 + %tmp3 = sext i32 %tmp2 to i64 + %tmp4 = icmp slt i64 %indvars.iv, %tmp3 + br i1 %tmp4, label %bb5, label %bb10 + +bb5: ; preds = %bb1 + %tmp6 = getelementptr inbounds i32* %A, i64 %indvars.iv + %tmp7 = load i32* %tmp6, align 4 + %tmp8 = add nsw i32 %tmp7, 1 + store i32 %tmp8, i32* %tmp6, align 4 + br label %bb9 + +bb9: ; preds = %bb5 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb10: ; preds = %bb1 + ret void +} Index: test/ScopDetect/non-affine-conditional.ll =================================================================== --- /dev/null +++ test/ScopDetect/non-affine-conditional.ll @@ -0,0 +1,42 @@ +; RUN: opt %loadPolly -polly-allow-nonaffine-branches -polly-detect -analyze < %s | FileCheck %s +; +; void f(int *A) { +; for (int i = 0; i < 1024; i++) +; if (A[i]) +; A[i] = 0; +; } +; +; CHECK: Valid Region for Scop: bb1 => bb9 +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +bb: + br label %bb1 + +bb1: ; preds = %bb8, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb8 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb2, label %bb9 + +bb2: ; preds = %bb1 + %tmp = getelementptr inbounds i32* %A, i64 %indvars.iv + %tmp3 = load i32* %tmp, align 4 + %tmp4 = icmp eq i32 %tmp3, 0 + br i1 %tmp4, label %bb7, label %bb5 + +bb5: ; preds = %bb2 + %tmp6 = getelementptr inbounds i32* %A, i64 %indvars.iv + store i32 0, i32* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb2, %bb5 + br label %bb8 + +bb8: ; preds = %bb7 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb9: ; preds = %bb1 + ret void +} Index: test/ScopDetect/non-affine-loop-condition-dependent-access.ll =================================================================== --- /dev/null +++ test/ScopDetect/non-affine-loop-condition-dependent-access.ll @@ -0,0 +1,55 @@ +; RUN: opt %loadPolly -polly-detect -polly-allow-nonaffine-branches -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-detect -polly-allow-nonaffine -polly-allow-nonaffine-branches -analyze < %s | FileCheck %s --check-prefix=NonAffMem +; +; Here we have a non affine branch but also a non-affine access which should +; be rejected as long as -polly-allow-nonaffine isn't given. +; +; CHECK-NOT: Valid +; NonAffMem: Valid Region for Scop: bb1 => bb13 +; +; void f(int *A) { +; int j; +; for (int i = 0; i < 1024; i++) { +; while ((j = C[i])) +; A[j]++; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +bb: + br label %bb1 + +bb1: ; preds = %bb12, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb12 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb2, label %bb13 + +bb2: ; preds = %bb1 + br label %bb3 + +bb3: ; preds = %bb6, %bb2 + %tmp = getelementptr inbounds i32* %A, i64 %indvars.iv + %tmp4 = load i32* %tmp, align 4 + %tmp5 = icmp eq i32 %tmp4, 0 + br i1 %tmp5, label %bb11, label %bb6 + +bb6: ; preds = %bb3 + %tmp7 = sext i32 %tmp4 to i64 + %tmp8 = getelementptr inbounds i32* %A, i64 %tmp7 + %tmp9 = load i32* %tmp8, align 4 + %tmp10 = add nsw i32 %tmp9, 1 + store i32 %tmp10, i32* %tmp8, align 4 + br label %bb3 + +bb11: ; preds = %bb3 + br label %bb12 + +bb12: ; preds = %bb11 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb13: ; preds = %bb1 + ret void +}